numpy库下的实践:
import numpy as np
import matplotlib.pyplot as plt
#加载数据
def loadDataSet():
dataMat=[]; labelMat=[]
fr=open('testSet.txt')
for line in fr.readlines():
lineArr=line.strip().split() #strip()默认删除每行头尾的空格或换行符
dataMat.append([1.0,float(lineArr[0]),float(lineArr[1])]) #属性集
labelMat.append(int(lineArr[2])) #标签集
return dataMat,labelMat
def sigmoid(inx):
return 1.0/(1+np.exp(-inx)) #注意python3和python2的区别
#梯度下降法对weights进行优化
def gradAscent(dataMatIn,classLabels):
dataMatrix=np.mat(dataMatIn)
labelMat=np.mat(classLabels).transpose()
m,n=np.shape(dataMatrix)
alpha=0.001 #梯度下降步长
maxCycles=600 #迭代次数
weights=np.ones((n,1)) #初始化系数矩阵
for k in range(maxCycles):
h=sigmoid(dataMatrix*weights)
error=(h-labelMat)
weights=weights-alpha*dataMatrix.transpose()*error #更新权重
return weights
#函数调用
dataMat,labelMat=loadDataSet()
weights=gradAscent(dataMat,labelMat)
print(weights)
#画出决策边界
def plotBestFit(dataMat,labelMat,weights):
dataArr=np.array(dataMat)
n=np.shape(dataArr)[0] #样本数
xcode1=[];ycode1=[]
xcode2=[];ycode2=[]
for i in range(n):
if int(labelMat[i]==1):
xcode1.append(dataArr[i,1]);ycode1.append(dataArr[i,2])
else:
xcode2.append(dataArr[i,1]);ycode2.append(dataArr[i,2])
fig=plt.figure()
ax=fig.add_subplot(111)
ax.scatter(xcode1,ycode1,s=30,c='red')
ax.scatter(xcode2,ycode2,s=30,c='green')
x=np.arange(-3.0,3.0,0.1)
y=(-weights[0]-weights[1]*x)/weights[2] #由w1*x1+x2*x2+b=0推到得到
ax.plot(x,y)
plt.xlabel('X1');plt.ylabel('X2');
plt.show()
#函数调用
plotBestFit(dataMat,labelMat,weights.getA()) #getA()矩阵转换成数组
##################随机梯度法##################
import numpy as np
import matplotlib.pyplot as plt
#加载数据
def loadDataSet():
dataMat=[]; labelMat=[]
fr=open('testSet.txt')
for line in fr.readlines():
lineArr=line.strip().split() #strip()默认删除每行头尾的空格或换行符
dataMat.append([1.0,float(lineArr[0]),float(lineArr[1])]) #属性集
labelMat.append(int(lineArr[2])) #标签集
return dataMat,labelMat
def sigmoid(inx):
return 1.0/(1+np.exp(-inx)) #注意python3和python2的区别
#梯度下降法对weights进行优化
def gradAscent(dataMatIn,classLabels):
dataMatrix=np.mat(dataMatIn)
labelMat=np.mat(classLabels).transpose()
m,n=np.shape(dataMatrix)
alpha=0.001 #梯度下降步长
maxCycles=600 #迭代次数
weights=np.ones((n,1)) #初始化系数矩阵
for k in range(maxCycles):
h=sigmoid(dataMatrix*weights)
error=(h-labelMat)
weights=weights-alpha*dataMatrix.transpose()*error #更新权重
return weights
#随机梯度下降
def stocGradAscent(dataMatIn,classLabels):
m, n = np.shape(dataMatIn)
alpha=0.01
maxCycles=200
weights = np.ones(n)
for j in range(maxCycles):
for i in range(m):
h=sigmoid(sum(dataMatIn[i]*weights))
error = (h - labelMat[i])
weights = weights - alpha * error* dataMatIn[i] # 更新权重
return weights
#函数调用
dataMat,labelMat=loadDataSet()
weights=stocGradAscent(np.array(dataMat),labelMat)
print(weights)
# 画出决策边界
def plotBestFit(dataMat,labelMat,weights):
dataArr=np.array(dataMat)
n=np.shape(dataArr)[0] #样本数
xcode1=[];ycode1=[]
xcode2=[];ycode2=[]
for i in range(n):
if int(labelMat[i]==1):
xcode1.append(dataArr[i,1]);ycode1.append(dataArr[i,2])
else:
xcode2.append(dataArr[i,1]);ycode2.append(dataArr[i,2])
fig=plt.figure()
ax=fig.add_subplot(111)
ax.scatter(xcode1,ycode1,s=30,c='red')
ax.scatter(xcode2,ycode2,s=30,c='green')
x=np.arange(-3.0,3.0,0.1)
y=(-weights[0]-weights[1]*x)/weights[2] #由w1*x1+x2*x2+b=0推到得到
ax.plot(x,y)
plt.xlabel('X1');plt.ylabel('X2');
plt.show()
# 函数调用
plotBestFit(dataMat,labelMat,weights)
sklearn库下的实践:
import numpy as np
from sklearn.model_selection import train_test_split
#加载数据
def loadDataSet():
dataMat=[]; labelMat=[]
fr=open('testSet.txt')
for line in fr.readlines():
lineArr=line.strip().split() #strip()默认删除每行头尾的空格或换行符
dataMat.append([1.0,float(lineArr[0]),float(lineArr[1])]) #属性集
labelMat.append(int(lineArr[2])) #标签集
return dataMat,labelMat
#生成训练集、测试集
dataMat,labelMat=loadDataSet()
X_train, X_test, y_train, y_test = train_test_split(dataMat, labelMat, test_size=0.3, random_state=0)
#数据预处理
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
sc.fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)
#训练模型
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression(C=10000.0, random_state=0)
lr.fit(X_train_std, y_train)
#预测
import matplotlib.pyplot as plt
def plotPredict(X_test_std,y_test,model):
result=list(model.predict(X_test_std))
print(y_test)
print(result)
n=np.shape(X_test_std)[0] #样本
xcode1=[];ycode1=[];truePrex=[];truePrey=[]
xcode2=[];ycode2=[];falsePrex=[];falsePrey=[]
for i in range(n):
if int(y_test[i]==1):
xcode1.append(X_test_std[i,1]);ycode1.append(X_test_std[i,2])
else:
xcode2.append(X_test_std[i,1]);ycode2.append(X_test_std[i,2])
for j in range(n):
if int(result[j] == y_test[j]):
truePrex.append(X_test_std[j,1]);truePrey.append(X_test_std[j,2])
else:
falsePrex.append(X_test_std[j,1]);falsePrey.append(X_test_std[j,2])
fig=plt.figure()
ax=fig.add_subplot(111)
ax.scatter(xcode1,ycode1,s=30,c='red')
ax.scatter(xcode2,ycode2,s=30,c='green')
ax.scatter(truePrex,truePrey,c='yellow',marker='v')
ax.scatter(falsePrex,falsePrey,s=30,c='yellow',marker='x')
plt.show()
plotPredict(X_test_std,y_test,lr)