从网上找的训练集和测试集,代码如下:
import numpy as np import h5py from scipy.misc import imread # ------------------------DDN神经网络框架------------------------ # sigmoid 正向激活函数 def sigmoid(Z): # 由线性单元导入参数 Z # 导出 cache 存储Z值 A = 1.0 / (1 + np.exp(-Z)) cache = Z return A, cache # ReLu(Z) 正向激活函数 def ReLU(Z): A = np.maximum(0,Z) assert(A.shape == Z.shape) cache = Z return A, cache # sigmoid 反向激活函数 def sigmoid_backward(dA, cache): # dZ[l] = dA[l] * g[l]'( Z[l] ) Z = cache f = 1.0 / (1 + np.exp(-Z)) dZ = dA * f * (1-f) assert(dZ.shape == Z.shape) return dZ # ReLU 反向激活函数 def ReLU_backward(dA, cache): Z = cache dZ = np.array(dA,copy = True) dZ[Z <= 0] = 0 assert(dZ.shape == Z.shape) return dZ # ------------------------正向传播------------------------ # 初始化隐藏层至输出层参数 def initialize_parameters(layer): # layer -- 神经网络各层层数 L = len(layer) parameters = {} # 当前矩阵 W 的维度: 行 -- 当前神经元数目 列 -- 上层神经元数目 # 当前矩阵 b 的维度: 行 -- 当前神经元数目 列 -- 始终为 1 for i in range(1, L): parameters["W" + str(i)] = np.random.randn(layer[i], layer[i-1])/np.sqrt(layer[i-1]) parameters["b" + str(i)] = np.zeros((layer[i], 1)) assert (parameters["W" + str(i)].shape == (layer[i], layer[i-1])) assert (parameters["b" + str(i)].shape == (layer[i], 1)) return parameters # 测试代码 ''' initialize_parameters([3,4,1]) ''' # 正向传播过程线性 def linear_forward(A_prev, W, b): # A_prev -- 上一层的传递参数 A # W,b -- 当前层的参数 # Z = W*A + b Z = np.dot(W, A_prev) + b assert (Z.shape == (W.shape[0], A_prev.shape[1])) # 存储 A_prev W b cache = (A_prev, W, b) return Z, cache # 测试代码 ''' A_prev = np.array([[1], [2], [3]]) parameters = initialize_parameters([3, 4, 1]) Z, cache = linear_forward(A_prev, parameters["W1"], parameters["b1"]) ''' # 正向传播 -- 线性->激活 def linear_forward_activation(A_prev, W, b, activation_function): # linear_cache -- 存储 A_prev, W, b # activation_cache -- 存储 Z if activation_function == "sigmoid": Z, linear_cache = linear_forward(A_prev, W, b) A, activation_cache = sigmoid(Z) elif activation_function == "relu": Z, linear_cache = linear_forward(A_prev, W, b) A, activation_cache = ReLU(Z) # 当前层 A 的维度: 行 -- W矩阵行 列 -- A_prev矩阵列 assert (A.shape == (W.shape[0], A_prev.shape[1])) cache = (linear_cache, activation_cache) return A, cache # 正向总体传播 def deep_forward(X, parameters): # caches -- 存储隐藏层、输出层 A_prev, W, b # AL -- 输出层输出 A caches = [] A = X L = len(parameters) // 2 # 隐藏层:对应激活函数 -- ReLU for i in range(1, L): A_prev = A A, cache = linear_forward_activation(A_prev, parameters["W" + str(i)], parameters["b" + str(i)], "relu") caches.append(cache) # 输出层:对应激活函数 -- sigmoid # W = parameters["W"+str(length)] # b = parameters["b"+str(length)] AL, cache = linear_forward_activation(A, parameters["W" + str(L)], parameters["b" + str(L)], "sigmoid") caches.append(cache) assert(AL.shape == (1,X.shape[1])) return AL, caches # 成本函数(cost function) def compute_cost(AL, Y): # Y -- 测试集真实值 # AL -- 正向传播所得预测值 Y(hat) m = Y.shape[1] cost = (1./m) * (-np.dot(Y,np.log(AL).T) - np.dot(1-Y,np.log(1-AL).T)) cost = np.squeeze(cost) assert (cost.shape == ()) return cost Y = np.asarray([[1,1,1]]) AL = np.asarray([[.8,.9,0.4]]) cost = compute_cost(AL,Y) # 测试代码 ''' X = np.array([[1],[2],[3],[4],[5]]) parameters = initialize_parameters([5,4,3,1]) AL, caches = deep_forward(X, parameters) Y = np.array([[2]]) print(compute_cost(AL, Y)) ''' # ------------------------反向传播------------------------ # 反向传播线性传播 def linear_backward(dZ, cache): A_prev, W, b = cache # 原理 ''' dW[l] = dZ[l]*A_prev db[l] = dZ[l dA[l-1] = W[l].T * dZ[l] ''' # m -- 样本数目 m = A_prev.shape[1] # dW -- 损失函数对当前层 W 梯度 # db -- 损失函数对当前层 b 梯度 # dA_prev -- 损失函数对l-1层 A 梯度 dW = np.dot(dZ, A_prev.T) / m # 保证数组的二维性 db = np.sum(dZ, axis = 1, keepdims = True) / m dA_prev = np.dot(W.T, dZ) assert (dW.shape == W.shape) assert (db.shape == b.shape) assert (dA_prev.shape == A_prev.shape) return dA_prev, dW , db # 测试代码 # 当前层数为 3,上层层数为 1 '''np.random.seed(3) dZ = np.random.randn(3,1) A_prev = np.random.randn(1,1) W = np.random.randn(3,1) b = np.random.randn(3,1) linear_cache = (A_prev, W, b) dA_prev, dW, db = linear_backward(dZ, linear_cache) ''' # 反向传播 -- 线性->激活 def linear_activation_backward(dA, cache, activation): # linear_cache -- 存储了A_prev, W, b linear_cache, activation_cache = cache if activation == "sigmoid": dZ = sigmoid_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) elif activation == "relu": dZ = ReLU_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) return dA_prev, dW, db # 测试代码 ''' np.random.seed(2) dA = np.random.randn(1,2) A = np.random.randn(3,2) W = np.random.randn(1,3) b = np.random.randn(1,1) Z = np.random.randn(1,2) linear_cache = (A,W,b) activation_cache = Z cache = (linear_cache,activation_cache) dA_prev,dW,db = linear_activation_backward(dA, cache, "relu") ''' # 反向总体传播 def deep_backward(AL, Y, caches): # AL -- 正向传播输出函数值(预测值) 组成向量 # Y -- 测试集真实值 组成向量 grads = {} #递归神经网络深度 L = len(caches) # 统一 Y与 AL维度 Y = Y.reshape(AL.shape) # 求解 dA[l] dAL = -(np.divide(Y, AL) - np.divide(1 - Y, 1 - AL)) # sigmoid -> ReLU 反向传播 current_cache = caches[L - 1] grads["dA" + str(L)], grads["dW" + str(L)], grads["db" + str(L)] = linear_activation_backward(dAL, current_cache, "sigmoid") # 隐藏层间的反向传播, 同时记录各梯度数据以供参数更新使用 for i in reversed(range(L - 1)): current_cache = caches[i] dA_prev, dW, db = linear_activation_backward(grads["dA" + str(i + 2)], current_cache, "relu") grads["dA" + str(i+1)] = dA_prev grads["dW" + str(i+1)] = dW grads["db" + str(i+1)] = db return grads # 测试代码 ''' np.random.seed(3) AL = np.random.randn(1, 2) Y = np.array([[1, 0]]) A1 = np.random.randn(4,2) W1 = np.random.randn(3,4) b1 = np.random.randn(3,1) Z1 = np.random.randn(3,2) linear_cache_activation_1 = ((A1, W1, b1), Z1) A2 = np.random.randn(3,2) W2 = np.random.randn(1,3) b2 = np.random.randn(1,1) Z2 = np.random.randn(1,2) linear_cache_activation_2 = ( (A2, W2, b2), Z2) caches = (linear_cache_activation_1, linear_cache_activation_2) grads = deep_backward(AL, Y, caches) print(grads) ''' # 参数更新 def update_parameters(parameters, grads, learning_rate): # learning_rate: 学习率 L = len(parameters) // 2 #深层神经网络的层数 # 循环更新 parameters 参数 for i in range(L): parameters["W"+str(i + 1)] = parameters["W" + str(i + 1)] - learning_rate * grads["dW"+str(i + 1)] parameters["b"+str(i + 1)] = parameters["b" + str(i + 1)] - learning_rate * grads["db"+str(i + 1)] return parameters # 总体传播测试代码 ''' X = np.array([[1],[2],[3],[4],[5]]) parameters = initialize_parameters([5,4,3,1]) print("parameters:{}".format(parameters)) AL, caches = deep_forward(X, parameters) Y = np.array([[2]]) grads = deep_backward(AL, Y, caches) learning_rate = 0.05 parameters_update = update_parameters(parameters, grads, 2) print("parameters_update:{}".format(parameters_update)) ''' #------------------------实际应用------------------------ # 加载数据集 def load_dataset(): train_dataset = h5py.File('train_catvnoncat.h5', "r") # 训练集像素矩阵 train_set_x_orig = np.array(train_dataset["train_set_x"][:]) # 训练集标签 train_set_y_orig = np.array(train_dataset["train_set_y"][:]) test_dataset = h5py.File('test_catvnoncat.h5', "r") # 测试像素矩阵 test_set_x_orig = np.array(test_dataset["test_set_x"][:]) #测试集标签 test_set_y_orig = np.array(test_dataset["test_set_y"][:]) # 类型列表 classes = np.array(test_dataset["list_classes"][:]) train_set_y_orig = train_set_y_orig.reshape((1, train_set_y_orig.shape[0])) test_set_y_orig = test_set_y_orig.reshape((1, test_set_y_orig.shape[0])) return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes # 预测神经网络结果 def predict(X, y, parameters): # X -- 输入数据集 # y -- 真实值组成向量 # 样本数目集 number = X.shape[1] # 神经网络层数 p = np.zeros((1,number)) AL, caches = deep_forward(X, parameters) # 循环对预测值进行评估: for i in range(0, AL.shape[1]): if AL[0, i] > 0.5: p[0, i] = 1 else: p[0, i] = 0 # 计算预测正确所占比例 print("Accuracy: " + str(np.sum((p == y) /number ))) return p def L_layer_model(X, Y, layer, iterations = 1000, learning_rate = 0.0075): costs = [] parameters = initialize_parameters(layer) # 一次传播过程 for i in range(0,iterations): AL, caches = deep_forward(X, parameters) cost = compute_cost(AL, Y) grads = deep_backward(AL, Y, caches) parameters = update_parameters(parameters, grads, learning_rate) # 每循环50次记录 if i % 100 == 0: print ("Cost after iteration %i: %f" %(i, cost)) if i % 100 == 0: costs.append(cost) return parameters # ---------main函数--------- train_x_orig, train_y, test_x_orig, test_y, classes = load_dataset() # 存储数据集信息 m_train = train_x_orig.shape[0] num_px = train_x_orig.shape[1] m_test = test_x_orig.shape[0] # 打印数据集信息 print("Number of training examples:" + str(m_train)) print("Number of testing examples:" + str(m_test)) print("Each image of size:(" + str(num_px) + "," + str(num_px) + ",3)") print("train_x_orig shape:" + str(train_x_orig.shape)) print("train_y shape:" + str(train_y.shape)) print("test_x_orig shape:" + str(test_x_orig.shape)) print("test_y shape:" + str(test_y.shape)) # 测试集图片矩阵转换成向量形式 train_x_flatten = train_x_orig.reshape(train_x_orig.shape[0], -1).T test_x_flatten = test_x_orig.reshape(test_x_orig.shape[0], -1).T # 数据集标准化(使数据集中在 0-1 间) train_x = train_x_flatten / 255 test_x = test_x_flatten / 255 print("train_x's shape:" + str(train_x.shape)) print("test_x's shape:" + str(test_x.shape)) # 神经网络层数 layer = [12288, 20, 7, 5, 1] parameters = L_layer_model(train_x, train_y, layer , iterations = 2500) # 训练集正确率 print("train:") predictions_train = predict(train_x,train_y,parameters) # 测试集正确率 print("test:") predictions_test = predict(test_x,test_y,parameters) # 应用阶段图片测试 image = np.array(imread("image.jpg")) my_image = image.reshape([-1,1]) # 图片真实分类[0 or 1] my_label_y = [0] # 图片信息预测并打印 print("my_image:") my_predicted_image = predict(my_image, my_label_y, parameters) print("result:") print ("y = " + str(np.squeeze(my_predicted_image)) + " " +classes[int(np.squeeze(my_predicted_image)),].decode("utf-8"))