逻辑回归理论
逻辑回归区别于线性模型在于激活函数,这里代码用的sigmoid激活函数,适合二分类,而神经网络学习难点都在于反向传播,也就是计算梯度的过程。这里不赘述,关于逻辑回归的梯度推导看下
logistic算法解析,其实会发现逻辑回归和线性回归的梯度形式是一样的(内容区别在于预测函数),这是由于逻辑回归和线性回归的代价函数是不同的。
不得不吐槽一下机器学习实战这本书,哎…
代码实践
loaddata准备数据集,sigmoid激活函数。梯度更新函数,有些地方我更新了py3的代码,要注意的地方就是梯度的计算那里,是按照上述理论推导写的代码。
import numpy as np
#logistic回归梯度上升优化算法(也可以叫梯度下降)
def loaddata():
datamat = []
label = []
with open('/Users/enjlife/machine-learning/machinelearninginaction/ch05/testSet.txt') as fr:
for line in fr.readlines():
arr = line.strip().split()
datamat.append([1.0,float(arr[0]),float(arr[1])])
label.append(int(arr[2]))
return datamat,label
def sigmoid(x):
return 1.0/(1+np.exp(-x))
def gradasc(datamat,label):
datama = np.array(datamat)
labelma = np.expand_dims(np.array(label),axis=1)
m,n = np.shape(datama)
a = 0.001
cycles = 500
weights = np.ones((n,1))
for k in range(cycles):
#print(np.dot(datamat,weights))
h = sigmoid(np.dot(datamat,weights))
error = labelma-h
weights = weights + a*np.dot(datama.transpose(),error)
#print(datama)
return weights,datama,labelma
画出决策边界
这里画线的代码注意下,坐标轴我们选择的第二第三个特征,决策边界指的是当拟合两种类别的概率都为0.5,而要使得sigmoid(z=0)=0.5,所以 ,这里我们可以把 看做y。
#画出决策边界
import matplotlib.pyplot as plt
def plotfit(datamat,label):
weights,datama,labelma = gradasc(datamat,label)
n = datama.shape[0]
xc1=[]
yc1=[]
xc2=[]
yc2=[]
#print(datama)
for i in range(n):
if labelma[i]==1:
xc1.append(datama[i,1])
yc1.append(datama[i,2])
else:
#print(datama[i,1])
xc2.append(datama[i,1])
yc2.append(datama[i,2])
plt.figure()
plt.subplot(111)
plt.scatter(xc1,yc1,s=30,c='red')
plt.scatter(xc2,yc2,s=30,c='green')
x = np.linspace(-3,3,60)#range只接受整数步长
y = (-weights[0]-weights[1]*x)/weights[2]#取边界点
plt.plot(x,y,c='black')
plt.xlabel('x1')
plt.ylabel('x2')
plt.show()
结果图如下:
随机梯度下降法
随机梯度下降法,这里其实不太适合做随机梯度下降法的岩石,因为数据集太小了,代码与上述类似。
#随机梯度下降法
def stogradasc(datamat,label,inum,batch_size):
datama = np.array(datamat)
labelma = np.expand_dims(np.array(label),axis=1)
m,n = np.shape(datama)
weights = np.ones((n,1))
batch = int(m/batch_size)
for i in range(inum):
for j in range(batch):
a = 0.001
dataid = datama[batch_size*j:batch_size*(j+1),:]
labelid = labelma[batch_size*j:batch_size*(j+1),:]
#print(np.dot(dataid,weights))
h = sigmoid(np.dot(dataid,weights))
error = labelid-h
weights = weights + a*np.dot(dataid.transpose(),error)
return weights
weights=stogradasc(datamat,label,1000,50)
画出决策线
#随机梯度下降法可视化
def stoplotfit(datamat,label,weightnew):
weig,datama,labelma = gradasc(datamat,label)
n = datama.shape[0]
print(weightnew)
xc1=[]
yc1=[]
xc2=[]
yc2=[]
for i in range(n):
if labelma[i]==1:
xc1.append(datama[i,1])
yc1.append(datama[i,2])
else:
#print(datama[i,1])
xc2.append(datama[i,1])
yc2.append(datama[i,2])
plt.figure()
plt.subplot(111)
plt.scatter(xc1,yc1,s=30,c='red')
plt.scatter(xc2,yc2,s=30,c='green')
x = np.linspace(-3,3,60)#range只接受整数步长
y = (-weightnew[0]-weightnew[1]*x)/weights[2]#取边界点
plt.plot(x,y,c='black')
plt.xlabel('x1')
plt.ylabel('x2')
plt.show()
测试算法,用logistic回归进行分类
首先数据预处理,返回训练集和测试集
def colic():
with open('/Users/enjlife/machine-learning/machinelearninginaction/ch05/horseColicTraining.txt') as fr:
trset = []
trlabel = []
for line in fr.readlines():
curline = line.strip().split('\t')
trset.append(np.array(curline[:21]).astype(float))
trlabel.append(float(curline[21]))
with open('/Users/enjlife/machine-learning/machinelearninginaction/ch05/horseColicTest.txt') as fr:
teset = []
telabel = []
for line in fr.readlines():
curline = line.strip().split('\t')
teset.append(np.array(curline[:21]).astype(float))
telabel.append(float(curline[21]))
return trset,trlabel,teset,telabel
对训练集进行训练,得到weights参数
trweights = stogradasc(trset,trlabel,500,50)
定义回归预测函数
#用logistic回归进行分类
def classvec(x,weights):
prob = sigmoid(np.dot(x,weights))
yprob = prob > 0.5
#print(yprob)
return yprob
比较预测结果和正确结果,计算错误率。
testset = np.array(teset)
testlabel = np.expand_dims(np.array(telabel),axis=1)
yprob = classvec(testset,trweights)
lente = testset.shape[0]
count = 0
for i in range(lente):
if int(yprob[i]) != int(telabel[i]):
count += 1
print('the error rate:%s' %(float(count)/lente))
output:
(67, 21)
the error rate:0.26865671641791045
本文参考书籍《机器学习实战》Peter Harrington