数据集
链接:https://pan.baidu.com/s/1T8-0LfHWOJrFiX7175rt-A 提取码:8737
代码
1 import numpy as np 2 from scipy import io as spio 3 from matplotlib import pyplot as plt 4 from scipy import optimize 5 from sklearn import datasets 6 from sklearn.preprocessing import StandardScaler 7 import time 8 import pandas as pd 9 10 def loadmat(FileName): 11 return spio.loadmat(FileName) 12 13 def displaydata(imdata): 14 sum = 0 15 pad = 1 16 m,n = imdata.shape 17 width = np.int32(np.round(np.sqrt(n))) 18 height = np.int32(n/width) 19 rows = np.int32(np.floor(np.sqrt(m))) 20 cols = np.int32(np.ceil(m/rows)) 21 22 display = -np.ones((pad+rows*(height + pad),pad+cols*(width+pad))) 23 24 for i in range(rows): 25 for j in range(cols): 26 if sum >= m: 27 break 28 display[pad+i*(height+pad):pad+i*(height+pad)+height,pad+j*(width+pad):pad+j*(width+pad)+width] = imdata[sum,:].reshape(height,width,order = "F") 29 sum += 1 30 if sum >= m: 31 break 32 33 plt.imshow(display,cmap = 'gray') 34 plt.axis('off') 35 plt.show() 36 37 def sigmoid(z): 38 return 1/(1+np.exp(-z)) 39 40 def sigmoidGradient(z): 41 return sigmoid(z) * (1 - sigmoid(z)) 42 43 def randInitializeWeight(L_in,L_out): 44 epsilon_init = (6.0 / (L_in+L_out))**0.5; 45 W = np.zeros((L_out,L_in+1)) 46 W = np.random.rand(L_out, 1 + L_in) * 2 * epsilon_init - epsilon_init 47 return W 48 49 def nnCostfunction(nn_params,input_layer_size,hidden_layer_size,num_label,X,y,Lambda): 50 length = nn_params.shape[0] 51 #还原 52 theta1 = nn_params[0:hidden_layer_size*(input_layer_size+1)].reshape(hidden_layer_size,input_layer_size+1) 53 theta2 = nn_params[hidden_layer_size*(input_layer_size+1):length].reshape(num_label,hidden_layer_size+1) 54 55 m,n = X.shape 56 class_y = np.zeros((m,num_label)) 57 #映射y 58 for i in range(num_label): 59 class_y[:,i] = np.int32(y==i).reshape(1,-1) #映射reshape 才能赋值 60 61 theta_count1 = theta1.shape[1] 62 theta1_x = theta1[:,1:theta_count1] 63 theta_count2 = theta1.shape[1] 64 theta2_x = theta2[:,1:theta_count2] 65 66 #计算正则化参数 theta^2 67 68 term = np.dot(np.transpose(np.vstack((theta1_x.reshape(-1,1),theta2_x.reshape(-1,1)))),np.vstack((theta1_x.reshape(-1,1),theta2_x.reshape(-1,1)))) 69 70 a1 = np.hstack((np.ones((m,1)),X)) 71 z2 = np.dot(a1,np.transpose(theta1)) 72 a2 = np.hstack((np.ones((m,1)),sigmoid(z2))) 73 z3 = np.dot(a2,np.transpose(theta2)) 74 h = sigmoid(z3) 75 76 J = -(np.dot(np.transpose(class_y.reshape(-1,1)),np.log(h.reshape(-1,1)))+np.dot(np.transpose(1-class_y.reshape(-1,1)),np.log(1-h.reshape(-1,1)))-Lambda*term/2)/m 77 78 return np.ravel(J) 79 80 def nnGradient(nn_params,input_layer_size,hidden_layer_size,num_labels,X,y,Lambda): 81 length = nn_params.shape[0] 82 Theta1 = nn_params[0:hidden_layer_size*(input_layer_size+1)].reshape(hidden_layer_size,input_layer_size+1).copy() # 这里使用copy函数,否则下面修改Theta的值,nn_params也会一起修改 83 Theta2 = nn_params[hidden_layer_size*(input_layer_size+1):length].reshape(num_labels,hidden_layer_size+1).copy() 84 m = X.shape[0] 85 class_y = np.zeros((m,num_labels)) # 数据的y对应0-9,需要映射为0/1的关系 86 # 映射y 87 for i in range(num_labels): 88 class_y[:,i] = np.int32(y==i).reshape(1,-1) # 注意reshape(1,-1)才可以赋值 89 90 '''去掉theta1和theta2的第一列,因为正则化时从1开始''' 91 Theta1_colCount = Theta1.shape[1] 92 Theta1_x = Theta1[:,1:Theta1_colCount] 93 Theta2_colCount = Theta2.shape[1] 94 Theta2_x = Theta2[:,1:Theta2_colCount] 95 96 Theta1_grad = np.zeros((Theta1.shape)) #第一层到第二层的权重 97 Theta2_grad = np.zeros((Theta2.shape)) #第二层到第三层的权重 98 99 100 '''正向传播,每次需要补上一列1的偏置bias''' 101 a1 = np.hstack((np.ones((m,1)),X)) 102 z2 = np.dot(a1,np.transpose(Theta1)) 103 a2 = sigmoid(z2) 104 a2 = np.hstack((np.ones((m,1)),a2)) 105 z3 = np.dot(a2,np.transpose(Theta2)) 106 h = sigmoid(z3) 107 108 109 '''反向传播,delta为误差,''' 110 delta3 = np.zeros((m,num_labels)) 111 delta2 = np.zeros((m,hidden_layer_size)) 112 for i in range(m): 113 #delta3[i,:] = (h[i,:]-class_y[i,:])*sigmoidGradient(z3[i,:]) # 均方误差的误差率 114 delta3[i,:] = h[i,:]-class_y[i,:] # 交叉熵误差率 115 Theta2_grad = Theta2_grad+np.dot(np.transpose(delta3[i,:].reshape(1,-1)),a2[i,:].reshape(1,-1)) 116 delta2[i,:] = np.dot(delta3[i,:].reshape(1,-1),Theta2_x)*sigmoidGradient(z2[i,:]) 117 Theta1_grad = Theta1_grad+np.dot(np.transpose(delta2[i,:].reshape(1,-1)),a1[i,:].reshape(1,-1)) 118 119 Theta1[:,0] = 0 120 Theta2[:,0] = 0 121 '''梯度''' 122 grad = (np.vstack((Theta1_grad.reshape(-1,1),Theta2_grad.reshape(-1,1)))+Lambda*np.vstack((Theta1.reshape(-1,1),Theta2.reshape(-1,1))))/m 123 return np.ravel(grad) 124 125 def predict(theta1,theta2,X): 126 m = X.shape[0] 127 128 X = np.hstack((np.ones((m,1)),X)) 129 h1 = sigmoid(np.dot(X,np.transpose(theta1))) 130 h1 = np.hstack((np.ones((m,1)),h1)) 131 h2 = sigmoid(np.dot(h1,np.transpose(theta2))) 132 133 p = -np.array(np.where(h2[0,:] == np.max(h2,axis = 1)[0])) 134 for i in range(1,m): 135 t = np.array(np.where(h2[i,:] == np.max(h2,axis = 1)[i])) 136 p = np.vstack((p,t)) 137 return p 138 139 def checkGradient(Lambda = 0): 140 input_layer_size = 3 141 hidden_layer_size = 5 142 num_labels = 3 143 m = 5 144 initial_Theta1 = debugInitializeWeights(input_layer_size,hidden_layer_size); 145 initial_Theta2 = debugInitializeWeights(hidden_layer_size,num_labels) 146 X = debugInitializeWeights(input_layer_size-1,m) 147 y = np.transpose(np.mod(np.arange(1,m+1), num_labels))# 初始化y 148 149 y = y.reshape(-1,1) 150 nn_params = np.vstack((initial_Theta1.reshape(-1,1),initial_Theta2.reshape(-1,1))) 151 #展开theta 152 '''BP求出梯度''' 153 grad = nnGradient(nn_params, input_layer_size, hidden_layer_size, 154 num_labels, X, y, Lambda) 155 '''使用数值法计算梯度''' 156 num_grad = np.zeros((nn_params.shape[0])) 157 step = np.zeros((nn_params.shape[0])) 158 e = 1e-4 159 for i in range(nn_params.shape[0]): 160 step[i] = e 161 loss1 = nnCostfunction(nn_params-step.reshape(-1,1), input_layer_size, 162 hidden_layer_size,num_labels, X, y,Lambda) 163 loss2 = nnCostfunction(nn_params+step.reshape(-1,1), input_layer_size, 164 hidden_layer_size,num_labels, X, y,Lambda) 165 num_grad[i] = (loss2-loss1)/(2*e) 166 step[i]=0 167 # 显示两列比较 168 res = np.hstack((num_grad.reshape(-1,1),grad.reshape(-1,1))) 169 print("检查梯度的结果,第一列为数值法计算得到的,第二列为BP得到的:") 170 print (res) 171 172 def debugInitializeWeights(fan_in,fan_out): 173 W = np.zeros((fan_out,fan_in+1)) 174 x = np.arange(1,fan_out*(fan_in+1)+1) 175 W = np.sin(x).reshape(W.shape)/10 176 return W
主程序:
def NeuralNetwrok(input_layer_size,hidden_layer_size,out_put_layer): data = loadmat("ex3data1.mat") X = data['X'] y = data['y'] m,n = X.shape #随机搞点数据 rand_indices = [t for t in [np.random.randint(x,m) for x in range(100)]] displaydata(X[rand_indices,:]) #初始化权重 theta1 theta2 initial_theta1 = randInitializeWeight(input_layer_size,hidden_layer_size) initial_theta2 = randInitializeWeight(hidden_layer_size,out_put_layer) initial_nn_params = np.vstack((initial_theta1.reshape(-1,1),initial_theta2.reshape(-1,1))) Lambda = 1 start = time.time() result = optimize.fmin_cg(nnCostfunction,initial_nn_params,fprime = nnGradient ,args=(input_layer_size,hidden_layer_size,out_put_layer,X,y,Lambda),maxiter=200) print("Running time:", time.time() - start) #print(result) length = result.shape[0] Theta1 = result[0:hidden_layer_size*(input_layer_size+1)].reshape(hidden_layer_size,input_layer_size+1) Theta2 = result[hidden_layer_size*(input_layer_size+1):length].reshape(out_put_layer,hidden_layer_size+1) displaydata(Theta1[:,1:length]) displaydata(Theta2[:,1:length]) p = predict(Theta1,Theta2,X) print("精度为:",np.mean(np.float64(p == y.reshape(-1,1))*100)) res = np.hstack((p,y.reshape(-1,1))) np.savetxt("predict.csv", res, delimiter=',') if __name__ == "__main__": checkGradient() NeuralNetwrok(400,25,10)