import numpy as np
from random import sample
#y_hat=theta1*x1+theta2*x2+theta3*x3
x = np.array([[1,4], [2,5], [5,1], [4,2]] )
y = np.array([19,26,19,20] ) #实际值
w=np.array([1,1]) #初始化两个个参数,w=[theta1,theta2]
max_iter_cnt=10000 #最大迭代次数,总不能一直循环下去吧
cnt,cnt1,cnt2=0,0,0 #用于统计bgd/sgd/mbgd的迭代次数
'''evaluation function: 0.5*(y-y_hat)^2=0.5*(y-x1*theta1-x2*theta2-x3*theta3)^2 # 这里不考虑线性函数的常数项c了,对求导没啥影响
BGD / SGD / MBSG 的区别主要用于在于更新theta的样本量上,一个是全量样本都参与,一个是随机选取一个,一个是选取少量样本; bgd的缺点在于样本量比较大的时候,计算复杂度比较高;sgd的缺点在于迭代次数会增加,饶远路;所以,mbsg是比较可取的;
这里还会涉及到学习率的问题,一一般的思路是:初始设置一个较大的值,加快迭代速度;等误差小到一定程度后,减少学习率;如下代码不进行考虑了!!
'''
m=x.shape[0]
rate=0.001
#BGD
print("BGD!!")
while( cnt<max_iter_cnt ):
cnt+=1
for i in range(m):
diff=w.dot(x[i,:])-y[i]
w=w-rate*diff*x[i,:]
error1=0
for i in range(m):
error1+=abs(w.dot(x[i,:])-y[i])
if error1<0.01:
print(w)
print(cnt)
print(error1)
break;
'''
输出:
[3.00081678 3.99916927]
331
0.009885054833915774
'''
#SGD:
print("SGD!! ")
while( cnt1<max_iter_cnt ):
cnt1+=1
i=sample(range(m),1)[0]
diff=w.dot(x[i,:])-y[i]
w=w-rate*diff*x[i,:]
error1=0
for i in range(m):
error1+=abs(w.dot(x[i,:])-y[i])
if error1<0.01:
print(w)
print(cnt1)
print(error1)
break;
'''
输出:
SGD!!
[3.00083151 3.99916536]
1279
0.009996922972455025
'''
#MBGD:
print("MSGD!! ")
while( cnt2<max_iter_cnt ):
cnt2+=1
set=sample(range(m),3)
for i in set:
diff=w.dot(x[i,:])-y[i]
w=w-rate*diff*x[i,:]
error1=0
for i in range(m):
error1+=abs(w.dot(x[i,:])-y[i])
if error1<0.01:
print(w)
print(cnt2)
print(error1)
break;
'''输出:
MSGD!!
[3.0008586 3.99920255]
452
0.009936281035848538
'''
BGD/SGD/MBGD简单实现
猜你喜欢
转载自blog.csdn.net/huangqihao723/article/details/79171319
今日推荐
周排行