Python实现神经网络NN（手写字体识别）

数据集

链接：https://pan.baidu.com/s/1T8-0LfHWOJrFiX7175rt-A 
提取码：8737

代码

  1 import numpy as np
  2 from scipy import io as spio
  3 from matplotlib import pyplot as plt
  4 from scipy import optimize
  5 from sklearn import datasets
  6 from sklearn.preprocessing import StandardScaler
  7 import time
  8 import pandas as pd
  9 
 10 def loadmat(FileName):
 11     return spio.loadmat(FileName)
 12 
 13 def displaydata(imdata):
 14     sum = 0
 15     pad = 1
 16     m,n = imdata.shape
 17     width = np.int32(np.round(np.sqrt(n)))
 18     height = np.int32(n/width)
 19     rows = np.int32(np.floor(np.sqrt(m)))
 20     cols = np.int32(np.ceil(m/rows))
 21     
 22     display = -np.ones((pad+rows*(height + pad),pad+cols*(width+pad)))
 23     
 24     for i in range(rows):
 25         for j in range(cols):
 26             if sum >= m:
 27                 break
 28             display[pad+i*(height+pad):pad+i*(height+pad)+height,pad+j*(width+pad):pad+j*(width+pad)+width] = imdata[sum,:].reshape(height,width,order = "F")
 29             sum += 1
 30         if sum >= m:
 31             break
 32     
 33     plt.imshow(display,cmap = 'gray')
 34     plt.axis('off')
 35     plt.show()
 36 
 37 def sigmoid(z):
 38     return 1/(1+np.exp(-z))
 39 
 40 def sigmoidGradient(z):
 41     return sigmoid(z) * (1 - sigmoid(z))
 42 
 43 def randInitializeWeight(L_in,L_out):
 44     epsilon_init = (6.0 / (L_in+L_out))**0.5; 
 45     W = np.zeros((L_out,L_in+1))
 46     W = np.random.rand(L_out, 1 + L_in) * 2 * epsilon_init - epsilon_init
 47     return W
 48 
 49 def nnCostfunction(nn_params,input_layer_size,hidden_layer_size,num_label,X,y,Lambda):
 50     length = nn_params.shape[0]
 51     #还原
 52     theta1 = nn_params[0:hidden_layer_size*(input_layer_size+1)].reshape(hidden_layer_size,input_layer_size+1)
 53     theta2 = nn_params[hidden_layer_size*(input_layer_size+1):length].reshape(num_label,hidden_layer_size+1)
 54     
 55     m,n = X.shape
 56     class_y = np.zeros((m,num_label))
 57     #映射y
 58     for i in range(num_label):
 59         class_y[:,i] = np.int32(y==i).reshape(1,-1)  #映射reshape 才能赋值
 60     
 61     theta_count1 = theta1.shape[1]
 62     theta1_x = theta1[:,1:theta_count1]
 63     theta_count2 = theta1.shape[1]
 64     theta2_x = theta2[:,1:theta_count2]
 65     
 66     #计算正则化参数 theta^2
 67     
 68     term = np.dot(np.transpose(np.vstack((theta1_x.reshape(-1,1),theta2_x.reshape(-1,1)))),np.vstack((theta1_x.reshape(-1,1),theta2_x.reshape(-1,1))))
 69     
 70     a1 = np.hstack((np.ones((m,1)),X))
 71     z2 = np.dot(a1,np.transpose(theta1))
 72     a2 = np.hstack((np.ones((m,1)),sigmoid(z2)))
 73     z3 = np.dot(a2,np.transpose(theta2))
 74     h = sigmoid(z3)
 75     
 76     J = -(np.dot(np.transpose(class_y.reshape(-1,1)),np.log(h.reshape(-1,1)))+np.dot(np.transpose(1-class_y.reshape(-1,1)),np.log(1-h.reshape(-1,1)))-Lambda*term/2)/m
 77     
 78     return np.ravel(J)
 79 
 80 def nnGradient(nn_params,input_layer_size,hidden_layer_size,num_labels,X,y,Lambda):
 81     length = nn_params.shape[0]
 82     Theta1 = nn_params[0:hidden_layer_size*(input_layer_size+1)].reshape(hidden_layer_size,input_layer_size+1).copy()   # 这里使用copy函数，否则下面修改Theta的值，nn_params也会一起修改
 83     Theta2 = nn_params[hidden_layer_size*(input_layer_size+1):length].reshape(num_labels,hidden_layer_size+1).copy()
 84     m = X.shape[0]
 85     class_y = np.zeros((m,num_labels))      # 数据的y对应0-9，需要映射为0/1的关系    
 86     # 映射y
 87     for i in range(num_labels):
 88         class_y[:,i] = np.int32(y==i).reshape(1,-1) # 注意reshape(1,-1)才可以赋值
 89      
 90     '''去掉theta1和theta2的第一列，因为正则化时从1开始'''
 91     Theta1_colCount = Theta1.shape[1]    
 92     Theta1_x = Theta1[:,1:Theta1_colCount]
 93     Theta2_colCount = Theta2.shape[1]    
 94     Theta2_x = Theta2[:,1:Theta2_colCount]
 95     
 96     Theta1_grad = np.zeros((Theta1.shape))  #第一层到第二层的权重
 97     Theta2_grad = np.zeros((Theta2.shape))  #第二层到第三层的权重
 98       
 99    
100     '''正向传播，每次需要补上一列1的偏置bias'''
101     a1 = np.hstack((np.ones((m,1)),X))
102     z2 = np.dot(a1,np.transpose(Theta1))
103     a2 = sigmoid(z2)
104     a2 = np.hstack((np.ones((m,1)),a2))
105     z3 = np.dot(a2,np.transpose(Theta2))
106     h  = sigmoid(z3)
107     
108     
109     '''反向传播，delta为误差，'''
110     delta3 = np.zeros((m,num_labels))
111     delta2 = np.zeros((m,hidden_layer_size))
112     for i in range(m):
113         #delta3[i,:] = (h[i,:]-class_y[i,:])*sigmoidGradient(z3[i,:])  # 均方误差的误差率
114         delta3[i,:] = h[i,:]-class_y[i,:]                              # 交叉熵误差率
115         Theta2_grad = Theta2_grad+np.dot(np.transpose(delta3[i,:].reshape(1,-1)),a2[i,:].reshape(1,-1))
116         delta2[i,:] = np.dot(delta3[i,:].reshape(1,-1),Theta2_x)*sigmoidGradient(z2[i,:])
117         Theta1_grad = Theta1_grad+np.dot(np.transpose(delta2[i,:].reshape(1,-1)),a1[i,:].reshape(1,-1))
118     
119     Theta1[:,0] = 0
120     Theta2[:,0] = 0          
121     '''梯度'''
122     grad = (np.vstack((Theta1_grad.reshape(-1,1),Theta2_grad.reshape(-1,1)))+Lambda*np.vstack((Theta1.reshape(-1,1),Theta2.reshape(-1,1))))/m
123     return np.ravel(grad)
124 
125 def predict(theta1,theta2,X):
126     m = X.shape[0]
127     
128     X = np.hstack((np.ones((m,1)),X))
129     h1 = sigmoid(np.dot(X,np.transpose(theta1)))
130     h1 = np.hstack((np.ones((m,1)),h1))
131     h2 = sigmoid(np.dot(h1,np.transpose(theta2)))
132     
133     p = -np.array(np.where(h2[0,:] == np.max(h2,axis = 1)[0]))
134     for i in range(1,m):
135         t = np.array(np.where(h2[i,:] == np.max(h2,axis = 1)[i]))
136         p = np.vstack((p,t))
137     return p
138 
139 def checkGradient(Lambda = 0):
140     input_layer_size = 3
141     hidden_layer_size = 5
142     num_labels = 3
143     m = 5
144     initial_Theta1 = debugInitializeWeights(input_layer_size,hidden_layer_size); 
145     initial_Theta2 = debugInitializeWeights(hidden_layer_size,num_labels)
146     X = debugInitializeWeights(input_layer_size-1,m)
147     y = np.transpose(np.mod(np.arange(1,m+1), num_labels))# 初始化y
148     
149     y = y.reshape(-1,1)
150     nn_params = np.vstack((initial_Theta1.reshape(-1,1),initial_Theta2.reshape(-1,1)))  
151     #展开theta 
152     '''BP求出梯度'''
153     grad = nnGradient(nn_params, input_layer_size, hidden_layer_size, 
154                      num_labels, X, y, Lambda)  
155     '''使用数值法计算梯度'''
156     num_grad = np.zeros((nn_params.shape[0]))
157     step = np.zeros((nn_params.shape[0]))
158     e = 1e-4
159     for i in range(nn_params.shape[0]):
160         step[i] = e
161         loss1 = nnCostfunction(nn_params-step.reshape(-1,1), input_layer_size, 
162                                 hidden_layer_size,num_labels, X, y,Lambda)
163         loss2 = nnCostfunction(nn_params+step.reshape(-1,1), input_layer_size, 
164                                 hidden_layer_size,num_labels, X, y,Lambda)
165         num_grad[i] = (loss2-loss1)/(2*e)
166         step[i]=0
167     # 显示两列比较
168     res = np.hstack((num_grad.reshape(-1,1),grad.reshape(-1,1)))
169     print("检查梯度的结果，第一列为数值法计算得到的，第二列为BP得到的:")
170     print (res)
171 
172 def debugInitializeWeights(fan_in,fan_out):
173     W = np.zeros((fan_out,fan_in+1))
174     x = np.arange(1,fan_out*(fan_in+1)+1)
175     W = np.sin(x).reshape(W.shape)/10
176     return W

主程序：

def NeuralNetwrok(input_layer_size,hidden_layer_size,out_put_layer):
    data = loadmat("ex3data1.mat")
    X = data['X']
    y = data['y']
    m,n = X.shape
    
    #随机搞点数据
    rand_indices = [t for t in [np.random.randint(x,m) for x in range(100)]]
    displaydata(X[rand_indices,:])
    
    #初始化权重 theta1 theta2
    initial_theta1 = randInitializeWeight(input_layer_size,hidden_layer_size)
    initial_theta2 = randInitializeWeight(hidden_layer_size,out_put_layer)
       
    initial_nn_params = np.vstack((initial_theta1.reshape(-1,1),initial_theta2.reshape(-1,1)))
    Lambda = 1
    
    start = time.time()
    result = optimize.fmin_cg(nnCostfunction,initial_nn_params,fprime = nnGradient ,args=(input_layer_size,hidden_layer_size,out_put_layer,X,y,Lambda),maxiter=200)
    
    print("Running time:", time.time() - start)
    #print(result)
    
    length = result.shape[0]
    Theta1 = result[0:hidden_layer_size*(input_layer_size+1)].reshape(hidden_layer_size,input_layer_size+1)
    Theta2 = result[hidden_layer_size*(input_layer_size+1):length].reshape(out_put_layer,hidden_layer_size+1)    
    displaydata(Theta1[:,1:length])
    displaydata(Theta2[:,1:length])    
    
    p = predict(Theta1,Theta2,X)
    print("精度为：",np.mean(np.float64(p == y.reshape(-1,1))*100))
    res = np.hstack((p,y.reshape(-1,1)))
    np.savetxt("predict.csv", res, delimiter=',')
    
    
if __name__ == "__main__":
    checkGradient()
    NeuralNetwrok(400,25,10)

Python实现神经网络NN（手写字体识别）

猜你喜欢