from numpy import *
import matplotlib.pyplot as plt
def loadDataSet(fileName):
dataMat = []; labelMat = []
fr = open(fileName)
for line in fr.readlines():
lineArr = line.strip().split()
dataMat.append([1.0, float(lineArr[0]), float(lineArr[1])])
labelMat.append(int(lineArr[2]))
return mat(dataMat), mat(labelMat)
def sigmoid(inX): #定义Sigmoid函数
return 1.0/(1+exp(-inX))
def gradAscent(dataMatIn, classLabels):
labelMat = classLabels.transpose()
m,n = shape(dataMatIn)
alpha = 0.001 #学习速率
maxCycles = 1000 #迭代次数
weights = ones((n, 1))
for k in range(maxCycles):
h = sigmoid(dataMatIn*weights)
error = (labelMat - h) #预测类别与真实类别之间的误差
weights = weights + alpha * dataMatIn.transpose() * error #通过梯度上升法更新w
return weights
def plotBestFit(dataMat, labelMat, weights):
dataArr = array(dataMat)
n = shape(dataArr)[0]
xcord1 = []; ycord1 = []
xcord2 = []; ycord2 = []
for i in range(n):
if int(labelMat[i])== 1:
xcord1.append(dataArr[i,1]); ycord1.append(dataArr[i,2])
else:
xcord2.append(dataArr[i,1]); ycord2.append(dataArr[i,2])
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(xcord1, ycord1, s=30, c='red', marker='s')
ax.scatter(xcord2, ycord2, s=30, c='green')
x = arange(-3.0, 3.0, 0.1)
y = (-weights[0]-weights[1]*x)/weights[2]
ax.plot(x, y.T, linewidth=3)
plt.xlabel('X1'); plt.ylabel('X2');
plt.show()
if __name__=="__main__":
dataMat, labelMat = loadDataSet("testSet.txt")
weights = gradAscent(dataMat, labelMat)
plotBestFit(dataMat, labelMat.T, weights)