算法来源:
https://arxiv.org/pdf/1801.05894.pdf 数学家眼中的深度神经网络,该论文公式推理清晰,算法描述简单易记。 因此根据此论文,修改论文中matlab代码,实现为python代码。 下面主要图片和算法来自此文。
关键算法
原文中的matlab代码
function netbp
%NETBP Uses backpropagation to train a network
%%%%%%% DATA %%%%%%%%%%%
x1 = [0.1,0.3,0.1,0.6,0.4,0.6,0.5,0.9,0.4,0.7];
x2 = [0.1,0.4,0.5,0.9,0.2,0.3,0.6,0.2,0.4,0.6];
y = [ones(1,5) zeros(1,5); zeros(1,5) ones(1,5)];
% Initialize weights and biases
rng(5000);
W2 = 0.5*randn(2,2); W3 = 0.5*randn(3,2); W4 = 0.5*randn(2,3);
b2 = 0.5*randn(2,1); b3 = 0.5*randn(3,1); b4 = 0.5*randn(2,1);
% Forward and Back propagate
eta = 0.05; % learning rate
Niter = 1e6; % number of SG iterations
savecost = zeros(Niter,1); % value of cost function at each iteration
for counter = 1:Niter
k = randi(10); % choose a training point at random
x = [x1(k); x2(k)];
% Forward pass
a2 = activate(x,W2,b2); %2x1
a3 = activate(a2,W3,b3); %3x1
a4 = activate(a3,W4,b4); %2x1
% Backward pass
delta4 = a4.*(1-a4).*(a4-y(:,k)); % 2x1 .* 2x1
delta3 = a3.*(1-a3).*(W4’*delta4); % 3x1 .* (3x2 * 2x1 )
delta2 = a2.*(1-a2).*(W3’*delta3); % 2x1 .* (2x3 * 3x1 )
% Gradient step
W2 = W2 - eta*delta2*x’; % 2x2 - 1* 2x1* 1x2
W3 = W3 - eta*delta3*a2’; % 3x2 - 1* 3x1* 1x3
W4 = W4 - eta*delta4*a3’; % 2x3 - 1* 2x1* 1x3
b2 = b2 - eta*delta2; % 2x1
b3 = b3 - eta*delta3; % 3x1
b4 = b4 - eta*delta4; % 2x1
% Monitor progress
newcost = cost(W2,W3,W4,b2,b3,b4) % display cost to screen
savecost(counter) = newcost;
end
% Show decay of cost function
save costvec
semilogy([1:1e4:Niter],savecost(1:1e4:Niter))
function costval = cost(W2,W3,W4,b2,b3,b4)
costvec = zeros(10,1);
for i = 1:10
x =[x1(i);x2(i)];
a2 = activate(x,W2,b2);
a3 = activate(a2,W3,b3);
a4 = activate(a3,W4,b4);
costvec(i) = norm(y(:,i) - a4,2);
end
costval = norm(costvec,2)^2;
end % of nested function
end
%(1) " * " 即矩阵乘法,两个矩阵必须满足左边矩阵的列数等于右边矩阵的行数,如:
%A(m,k) * B(k,n) = C(m,n)
%(2) " .* " 即对应元素相乘,两个矩阵必须满足规格相同,如:
%A(m,n) .* B(m,n) = C(m,n) = { a(i,j)*b(i,j) }
python改写后的全连接代码,注意下面有个【除以m】是批量算法,去除m的处理,就是随机梯度,每次处理一个样本,收敛速度会差一些。一次根据多个样本来更新权重和偏执。批量部分参考了 https://blog.csdn.net/qq_28888837/article/details/84296673#comments 。
此处按matlab代码,最后一层(输出层)加了激活函数。 一般最后一层无激活函数。
#!/usr/bin/python
# -*- coding: utf-8 -*-
import numpy as np
from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
#Full Connected Neural Networks
class NNLayer:
def __init__(self, shape):
self.shape = shape
self.w= np.random.random([shape[1],shape[0]])
self.b=np.zeros((shape[1],1))
@staticmethod
def sigmoid(z):
return 1.0/(1.0+np.exp(-z))
@staticmethod
def sigmoid_primez(a):
return a * (1-a)
def forward(self,inputx):
self.z = np.dot(self.w,inputx)+self.b
self.a = NNLayer.sigmoid(self.z)
self.Dl = NNLayer.sigmoid_primez(self.a)
return self.a;
class FullCon:
def __init__(self, layers_dim):
self.L = len(layers_dim)
self.layers={} #注意此处用的是dict,非数组list
for i in range(1,self.L):
layer=NNLayer((layers_dim[i-1],layers_dim[i]))
self.layers[i+1]=layer
def forward(self,x):
inputa=x
for i in range(2,self.L):
layer=self.layers[i]
a_temp =layer.forward(inputa)
inputa=a_temp
layer=self.layers[self.L]
layer.forward(inputa)
return layer.a
def backward(self,al,y):
m = y.shape[1]
# 假设最后一层不经历激活函数
# 就是按照上面的图片中的公式写的
#grades["dz"+str(layers)] = al - y
#DL=sigmoid_primez(caches["z"][-1])
lys=self.layers
layer=lys[self.L]
error=(al - y)
layer.delta=layer.Dl*error #若输出层无激活函数,此处无Dl
for i in reversed(range(2,self.L)):
layer=lys[i]
layer.delta=layer.Dl*(np.dot(lys[i+1].w.T,lys[i+1].delta))/m #批量算法
# 就是把其所有的权重以及偏执都更新一下
def update_wandb(self,beta,inx):
lys=self.layers
pre_a=inx
for i in range(2,self.L+1):
layer=lys[i]
delta=layer.delta
layer.w=layer.w-beta*np.dot(delta,pre_a.T)
layer.b=layer.b-beta*np.sum(delta,axis=1,keepdims=True)
pre_a=lys[i].a
# 计算误差值
@staticmethod
def compute_loss(al,y):
return np.mean(np.square(al-y))
# 加载数据
def load_data():
x = np.arange(0.0,1.0,0.01)
yx =0.4* np.sin(2*np.pi*x)+0.5
y =0.4* np.sin(2*np.pi*x)+0.5;
# 数据可视化
plt.plot(x,yx)
return x,y
#进行测试
m=10;
np.random.seed(300)
#进行测试
x1 = np.array([0.1,0.3,0.1,0.6,0.4,0.6,0.5,0.9,0.4,0.7]);
x2 = np.array([0.1,0.4,0.5,0.9,0.2,0.3,0.6,0.2,0.4,0.6]);
y = np.array([[1,1,1,1,1, 0,0,0,0,0],
[0,0,0,0,0, 1,1,1,1,1.0]]);
x=np.array([x1,x2])
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
np.random.seed(300)
fnn=FullCon([2,2,3,2])
yy=y[0]
ax.scatter(x1,x2,yy,c='r',marker='v')
al = 0
los=1.0
learning_rate= 5
for i in range(1640000):
#k=np.random.randint(m)
#inx=np.array([x[:,k] ]).T;
#outy=np.array([y[:,k] ]).T;
inx=x
outy=y
al = fnn.forward(inx)
fnn.backward( al, outy)
fnn.update_wandb(learning_rate,inx)
if i % 1000 == 0:
aly = fnn.forward(x)
los=fnn.compute_loss(aly, y)
print("i=%d los=%f"%(i,los))
if los < 0.031 :
break;
aly = fnn.forward(x)
zz=aly[0]
print("aly ",aly)
zxy=[[zi[0],zi[1]] for zi in al.T ]
print ("zxy =",zxy )
print("norm2 ", [np.linalg.norm(zi,ord=2) for zi in zxy ] )
print("norm1 ", [np.linalg.norm(zi,ord=1) for zi in zxy ] )
ax.scatter(x1,x2,zz,c='g',marker='^')
plt.show()
代码运行结果
所有代码见github: https://github.com/foolpanda/deepFC
通过名称获得激活函数
#!/usr/bin/python
# -*- coding: utf-8 -*-
import numpy as np
from matplotlib import pyplot as plt
#Full Connected Neural Networks
class ActivateFunction:
def __init__(self, actname):
self.actname = actname
self.Af,self.Df=ActivateFunction.getAFandDF(actname)
def funValue(self,x):
v=self.Af(x)
z=self.Df(v)
return (v,z)
@staticmethod
def getAFandDF(var):
return {
'sigmoid': (ActivateFunction.sigmoid , ActivateFunction.sigmoid_primez),
'tanh': (ActivateFunction.tanh , ActivateFunction.tanh_primez),
'relu': (ActivateFunction.relu , ActivateFunction.relu_primez),
}.get(var,'error') #'error'为默认返回值,可自设置
@staticmethod
def sigmoid(z):
return 1.0/(1.0+np.exp(-z))
@staticmethod
def sigmoid_primez(a):
return a * (1-a)
@staticmethod
def tanh(x):
return 2*ActivateFunction.sigmoid(2*x)-1;
@staticmethod
def tanh_primez(a):
return 2*a * (1-a)
@staticmethod
def relu(x):
return np.max(0,x)
@staticmethod
def relu_primez(a):
return 1 if a>0 else 0;
@staticmethod
def softplus(x):
return np.log(1.0+np.exp(x))
@staticmethod
def softplus_primez(x):
return ActivateFunction.sigmoid(x);
afun=ActivateFunction('sigmoid')
print("afun(%s,%s)"%afun.funValue(0));
运行结果: afun(0.5,0.25)