python实现深度学习全连接网络

算法来源：

https://arxiv.org/pdf/1801.05894.pdf 数学家眼中的深度神经网络，该论文公式推理清晰，算法描述简单易记。因此根据此论文，修改论文中matlab代码，实现为python代码。下面主要图片和算法来自此文。

关键算法

原文中的matlab代码

function netbp
%NETBP Uses backpropagation to train a network
%%%%%%% DATA %%%%%%%%%%%
x1 = [0.1,0.3,0.1,0.6,0.4,0.6,0.5,0.9,0.4,0.7];
x2 = [0.1,0.4,0.5,0.9,0.2,0.3,0.6,0.2,0.4,0.6];
y = [ones(1,5) zeros(1,5); zeros(1,5) ones(1,5)];
% Initialize weights and biases
rng(5000);
W2 = 0.5*randn(2,2); W3 = 0.5*randn(3,2); W4 = 0.5*randn(2,3);
b2 = 0.5*randn(2,1); b3 = 0.5*randn(3,1); b4 = 0.5*randn(2,1);
% Forward and Back propagate
eta = 0.05; % learning rate
Niter = 1e6; % number of SG iterations
savecost = zeros(Niter,1); % value of cost function at each iteration
for counter = 1:Niter
	k = randi(10); % choose a training point at random
	x = [x1(k); x2(k)];
	% Forward pass
	a2 = activate(x,W2,b2);  %2x1
	a3 = activate(a2,W3,b3); %3x1
	a4 = activate(a3,W4,b4); %2x1
	% Backward pass
	delta4 = a4.*(1-a4).*(a4-y(:,k)); % 2x1 .* 2x1 
	delta3 = a3.*(1-a3).*(W4’*delta4); % 3x1 .* (3x2 * 2x1 )
	delta2 = a2.*(1-a2).*(W3’*delta3); % 2x1 .* (2x3 * 3x1 )
	% Gradient step
	W2 = W2 - eta*delta2*x’;  % 2x2 - 1* 2x1* 1x2 
	W3 = W3 - eta*delta3*a2’; % 3x2 - 1* 3x1* 1x3 
	W4 = W4 - eta*delta4*a3’; % 2x3 - 1* 2x1* 1x3 
	b2 = b2 - eta*delta2;  % 2x1
	b3 = b3 - eta*delta3;  % 3x1
	b4 = b4 - eta*delta4;  % 2x1
	% Monitor progress
	newcost = cost(W2,W3,W4,b2,b3,b4) % display cost to screen
	savecost(counter) = newcost;
end
% Show decay of cost function
save costvec
semilogy([1:1e4:Niter],savecost(1:1e4:Niter))
	function costval = cost(W2,W3,W4,b2,b3,b4)
		costvec = zeros(10,1);
		for i = 1:10
			x =[x1(i);x2(i)];
			a2 = activate(x,W2,b2);
			a3 = activate(a2,W3,b3);
			a4 = activate(a3,W4,b4);
			costvec(i) = norm(y(:,i) - a4,2);
		end
		costval = norm(costvec,2)^2;
	end % of nested function
end

%(1)   " * "   即矩阵乘法，两个矩阵必须满足左边矩阵的列数等于右边矩阵的行数，如：
%A(m,k) * B(k,n) = C(m,n) 
%(2)   " .* "  即对应元素相乘，两个矩阵必须满足规格相同，如：
 %A(m,n) .* B(m,n) = C(m,n) = { a(i,j)*b(i,j) }

python改写后的全连接代码，注意下面有个【除以m】是批量算法，去除m的处理，就是随机梯度，每次处理一个样本，收敛速度会差一些。一次根据多个样本来更新权重和偏执。批量部分参考了 https://blog.csdn.net/qq_28888837/article/details/84296673#comments 。

此处按matlab代码，最后一层（输出层）加了激活函数。一般最后一层无激活函数。

#!/usr/bin/python
# -*- coding: utf-8 -*-
import numpy as np
from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
#Full Connected  Neural Networks
class NNLayer:
    def __init__(self, shape):
        self.shape = shape
        self.w= np.random.random([shape[1],shape[0]])
        self.b=np.zeros((shape[1],1))
    @staticmethod
    def sigmoid(z):
        return 1.0/(1.0+np.exp(-z))
    @staticmethod
    def sigmoid_primez(a):
            return a * (1-a)
    def forward(self,inputx):
        self.z  = np.dot(self.w,inputx)+self.b
        self.a  = NNLayer.sigmoid(self.z)
        self.Dl = NNLayer.sigmoid_primez(self.a)
        return self.a;

class FullCon:
    def __init__(self, layers_dim):
        self.L = len(layers_dim)
        self.layers={}   #注意此处用的是dict,非数组list
        for i in range(1,self.L):
            layer=NNLayer((layers_dim[i-1],layers_dim[i]))
            self.layers[i+1]=layer

    def forward(self,x):
        inputa=x
        for i in range(2,self.L):
            layer=self.layers[i]
            a_temp =layer.forward(inputa)
            inputa=a_temp
        layer=self.layers[self.L]
        layer.forward(inputa)
        return layer.a

    def backward(self,al,y):
        m = y.shape[1]
        # 假设最后一层不经历激活函数
        # 就是按照上面的图片中的公式写的
        #grades["dz"+str(layers)] = al - y
        #DL=sigmoid_primez(caches["z"][-1])

        lys=self.layers
        layer=lys[self.L]
        error=(al - y)
        layer.delta=layer.Dl*error #若输出层无激活函数，此处无Dl
        for i in  reversed(range(2,self.L)):
            layer=lys[i]
            layer.delta=layer.Dl*(np.dot(lys[i+1].w.T,lys[i+1].delta))/m #批量算法

    # 就是把其所有的权重以及偏执都更新一下
    def update_wandb(self,beta,inx):
         lys=self.layers
         pre_a=inx
         for i in range(2,self.L+1):
             layer=lys[i]
             delta=layer.delta
             layer.w=layer.w-beta*np.dot(delta,pre_a.T)
             layer.b=layer.b-beta*np.sum(delta,axis=1,keepdims=True)
             pre_a=lys[i].a

    # 计算误差值
    @staticmethod
    def compute_loss(al,y):
        return np.mean(np.square(al-y))

# 加载数据
def load_data():
    x = np.arange(0.0,1.0,0.01)
    yx =0.4* np.sin(2*np.pi*x)+0.5
    y =0.4* np.sin(2*np.pi*x)+0.5;
    # 数据可视化
    plt.plot(x,yx)
    return x,y
#进行测试
m=10;
np.random.seed(300)
#进行测试
x1 = np.array([0.1,0.3,0.1,0.6,0.4,0.6,0.5,0.9,0.4,0.7]);
x2 = np.array([0.1,0.4,0.5,0.9,0.2,0.3,0.6,0.2,0.4,0.6]);
y = np.array([[1,1,1,1,1, 0,0,0,0,0],
              [0,0,0,0,0, 1,1,1,1,1.0]]);
x=np.array([x1,x2])
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
np.random.seed(300)

fnn=FullCon([2,2,3,2])
yy=y[0]
ax.scatter(x1,x2,yy,c='r',marker='v')
al = 0
los=1.0
learning_rate= 5
for i in range(1640000):
    #k=np.random.randint(m)
    #inx=np.array([x[:,k] ]).T;
    #outy=np.array([y[:,k] ]).T;
    inx=x
    outy=y
    al = fnn.forward(inx)
    fnn.backward( al, outy)
    fnn.update_wandb(learning_rate,inx)
    if i % 1000 == 0:
        aly = fnn.forward(x)
        los=fnn.compute_loss(aly, y)
        print("i=%d los=%f"%(i,los))
    if los < 0.031 :
       break;
aly = fnn.forward(x)
zz=aly[0]
print("aly ",aly)
zxy=[[zi[0],zi[1]]   for zi in al.T ]
print ("zxy =",zxy )
print("norm2 ", [np.linalg.norm(zi,ord=2) for zi in zxy ] )
print("norm1 ",  [np.linalg.norm(zi,ord=1) for zi in zxy ] )
ax.scatter(x1,x2,zz,c='g',marker='^')

plt.show()

代码运行结果

所有代码见github: https://github.com/foolpanda/deepFC

通过名称获得激活函数

#!/usr/bin/python
# -*- coding: utf-8 -*-
import numpy as np
from matplotlib import pyplot as plt
#Full Connected  Neural Networks
class ActivateFunction:
    def __init__(self, actname):
        self.actname = actname
        self.Af,self.Df=ActivateFunction.getAFandDF(actname)
    def funValue(self,x):
        v=self.Af(x)
        z=self.Df(v)
        return (v,z)
    @staticmethod
    def getAFandDF(var):
        return {
                'sigmoid': (ActivateFunction.sigmoid , ActivateFunction.sigmoid_primez),
                'tanh': (ActivateFunction.tanh , ActivateFunction.tanh_primez),
                'relu': (ActivateFunction.relu , ActivateFunction.relu_primez),
        }.get(var,'error')    #'error'为默认返回值，可自设置
    @staticmethod
    def sigmoid(z):
        return 1.0/(1.0+np.exp(-z))
    @staticmethod
    def sigmoid_primez(a):
            return a * (1-a)
    @staticmethod
    def tanh(x):
        return 2*ActivateFunction.sigmoid(2*x)-1;
    @staticmethod
    def tanh_primez(a):
            return 2*a * (1-a)
    @staticmethod
    def relu(x):
        return np.max(0,x)
    @staticmethod
    def relu_primez(a):
            return 1 if a>0 else 0;
    @staticmethod
    def softplus(x):
        return np.log(1.0+np.exp(x))
    @staticmethod
    def softplus_primez(x):
            return ActivateFunction.sigmoid(x);
afun=ActivateFunction('sigmoid')

print("afun(%s,%s)"%afun.funValue(0));

运行结果： afun(0.5,0.25)

foolpanda1168

发布了29 篇原创文章 · 获赞 5 · 访问量 4万+

私信关注

python实现深度学习全连接网络

猜你喜欢