《deep learning for cv with python》读书笔记第十章

本章主要介绍如何用python手撸前向传播和反向传播，激活函数是基于sigmoid的
前向传播：计算出来预测值，然后与target Y做差值计算出来误差
反向传播：根据前向传播得到的输出，得到delta值，用来更新权重值
直接上代码：
# -*- coding:utf-8 -*-
__author__ = 'xuy'
import numpy as np
"""
这个是10.1.3反向传播和多层网络
"""
class NeuralNetwork:

    def __init__(self, layers, alpha=0.1):
        self.W = []
        self.losses = []
        # A list of integers which represents the actual architecture of the
        # feedforward network. For example, a value of [2, 2, 1] would imply
        # that our first layer has 2 nodes, our hidden layer has 2 nodes, and our
        # final output layer has one node.
        self.layers = layers
        self.alpha = alpha

        # start looping from the index of the first layers
        # but stop before we reach the last 2 layers
        for i in np.arange(0, len(layers) - 2):#这里循环仅仅到倒数第三层，因为最后一层【输出层】没有bias值
            # randomly initialize a weight matrix connecting the number of nodes
            # in each respective layer together, adding an extra node for the
            # bias.
            w = np.random.randn(layers[i] + 1, layers[i + 1] + 1)
            self.W.append(w / np.sqrt(layers[i]))

        # the last 2 layers are a special case where the input connections need
        # a bias term but the output does not
        w = np.random.randn(layers[-2] + 1, layers[-1])#单独处理最后一层的情况，不需要新增一层偏置值列
        self.W.append(w / np.sqrt(layers[-2]))#W一共有layer-1个元素

    def __repr__(self):#输出每层神经网络的神经元个数，便于进行调试
        # construct and return a string that represents the network
        # architecture
        #在这里输出结果是：2-2-1
        return "NeuralNetwork: {}".format("-".join(str(l) for l in self.layers))

    def sigmoid(self, x):#sigmoid的前向传播
        # compute and return the sigmoid activation value for a given inputs
        return 1.0 / (1 + np.exp(-x))

    def sigmoid_deriv(self, x):#sigmoid的反向传播
        # compute the derivative of the sigmoid function ASSUMING that "x" has
        # already been passed through the sigmoid function
        return x * (1 - x)

    def fit(self, X, y, epochs=1000, display_update=100):#train code，最终进行训练的时候调用该函数即可
        # insert a column of ones as the last entry in the feature matrix --
        # this little trick allows us to treat the bias as a trainable parameter
        # within the weight matrix
        X = np.c_[X, np.ones((X.shape[0]))]#在输入数据x后面添加bias,这一列都为1


        # loop over the desired number of epochs
        for epoch in np.arange(0, epochs):
            # loop over each individual data point and train our network
            # on it
            for (x, target) in zip(X, y):#训练阶段，包含正向传播和反向传播
                self.fit_partial(x, target)
            loss = self.calculate_loss(X, y)
            self.losses.append(loss)
            # check to see if we should display a training update
            #输出结果，并且输出每次epoch的loss结果
            if epoch == 0 or (epoch + 1) % display_update == 0:

                print("[INFO] epoch={}, loss={:.7f}".format(
					epoch + 1, loss))

    def fit_partial(self, x, y):
        #在训练的时候，每个epoch都调用一次fit函数，进行一次正向传播和一次反向传播，本段代码是先前向传播，然后接着再反向传播，最终从前到后更新权重
        # construct our list of output activations for each layer as our data
        # point flows through the network; the first activation is a special
        # it's just the input feature vector itself
        #A的作用是用来存储每一层的输出结果
        A = [np.atleast_2d(x)]#将x转化为二维数组，如果x是一维的，那么就转化为1*len(x)

        # FEEDFORWARD:正向传播
        # loop over the layers in the network
        # print(print1是否相等",len(self.W)==len(self.layers)-1)#这里layers的长度是3，weight的长度是2,A的长度是3，D的长度是2

        for layer in np.arange(0, len(self.W)):#从第一层遍历到倒数第二层
            # feedforward the activation at the current layer by taking the dot
            # product between the activation and the weight matrix. this is
            # called "net input" to the current layer.
            net = A[layer].dot(self.W[layer])#w*x

            # computing the "net output" is simply applying our nonlinear
            # activation function to the net input
            out = self.sigmoid(net)#进行非线性的sigmoid计算

            #once we have the net output, add it to our list of activations
            #在这里：len(layers)=3,len(weight)=2,len(A)=3，
            # A[0]是输入的x，A[1]是根据A[0]以及W[0]的运算得出的结果,
            # A[2]作为输出结果，是根据A【1】以及W[1]的运算结果求出的
            A.append(out)#将前向传播的结果存储在A中


        # BACKPROPAGATION:
        # the first phase of backpropagation is to compute the difference
        # between our *prediction* (the final output activation in the
        # activations list) and the true target value
        #error用来计算输出层的预测值A[-1]与真实值：y的差值，用来计算误差
        #反向传播的第一步是计算误差值
        error = A[-1] - y

        # from here we need to apply the chain rule and build our list of deltas
        # 'D'; the first entry in the deltas is simply the error of the output
        # layer times the derivative of our activation function for the output
        # value.
        D = [error * self.sigmoid_deriv(A[-1])]#作为D[0]=error*sigmoid(y)，作为反向传播的开始

        # once you understand the chain rule, it becomes super easy to
        # implement with a 'for' loop -- simply loop over the layers in reverse
        # order (ignoring the last 2 since we already have taken them into
        # account)
        # count=0
        #反向传播的第二步是，根据前面前向传播的结果，更新delta，为下一步更新weight做准备
        for layer in np.arange(len(A) - 2, 0, -1):#反向传播，从倒数第二层开始进行反向传播直到输入层【不包括输入层】，因为初始值是D = [error * self.sigmoid_deriv(A[-1])]
            # the delta for the current layer is equal to the delta of the
            # *previous layer* dotted with the weight matrix of the current
            # layer, followed by multiplying the delta by the derivative of the
            # nonlinear activation function for the activations of the current
            # layer
            #开始计算梯度
            delta = D[-1].dot(self.W[layer].T)#当前层的delta=前一层的D[-1]*当前层的权重
            delta = delta * self.sigmoid_deriv(A[layer])
            D.append(delta)
            # count=count+1
        # print ("循环的次数：",count)
        # print(len(D)==len(self.layers)-1)
        # since we looped over our layers in reverse order we need to reverse
        # the deltas，反转回来,便于进行下面的梯度下降对于weight的更新
        D = D[::-1]

        # WEIGHT UPDATE PHASE:
        # loop ove the layers
        # print("D的长度和Weight的长度相同",len(D)==len(self.W))
        # 进行权重更新,为下一次epoch做准备
        for layer in np.arange(0, len(self.W)):
            # update our weights by taking the dot product of the layer
            # activations with their respective deltas, then multiplying
            # this value by some small learning rate and adding to our weight
            # matrix -- this is where the actual "learning" takes palce
            self.W[layer] += -self.alpha * A[layer].T.dot(D[layer])

    def calculate_loss(self, X, targets):
        # make predictions for the input data points then compute the loss
        targets = np.atleast_2d(targets)

        predictions = self.predict(X, add_bias=False)
        loss = 0.5 * np.sum((predictions - targets) ** 2)#二阶loss
        return loss

    def predict(self, X, add_bias=True):#返回每个layer的预测值的结果,是一个【【single data】】,因此是p[0][0]
        # initialize the output prediction as the input features -- this value
        # will be (forward) propogated through the network to obtain the final
        # prediction
        p = np.atleast_2d(X)

        # check to see if the bias column should be added
        if add_bias:
            # insert column of 1's as last entry in the feature matrix (bias)
            p = np.c_[p, np.ones((p.shape[0]))]

        # loop over our layers in the network
        for layer in np.arange(0, len(self.W)):
            p = self.sigmoid(np.dot(p, self.W[layer]))

        # return the predicted value,
        return p
《deep learning for cv with python》读书笔记第十章

猜你喜欢