cs231n_2017_FullyConnectedNet

这是一个全连接网络，结构为：{affine - [batch norm] - relu - [dropout]} x (L - 1) - affine - softmax
（也是fc_net.py里面的一个网络，纯自己写）
class FullyConnectedNet(object):
    """
    Author::Chenx
    """
    """
    A fully-connected neural network with an arbitrary number of hidden layers,
    ReLU nonlinearities, and a softmax loss function. This will also implement
    dropout and batch normalization as options. For a network with L layers,
    the architecture will be

    {affine - [batch norm] - relu - [dropout]} x (L - 1) - affine - softmax

    where batch normalization and dropout are optional, and the {...} block is
    repeated L - 1 times.

    Similar to the TwoLayerNet above, learnable parameters are stored in the
    self.params dictionary and will be learned using the Solver class.
    """

    def __init__(self, hidden_dims, input_dim=3*32*32, num_classes=10,
                 dropout=0, use_batchnorm=False, reg=0.0,
                 weight_scale=1e-2, dtype=np.float32, seed=None):
        """
        Initialize a new FullyConnectedNet.

        Inputs:
        - hidden_dims: A list of integers giving the size of each hidden layer.
        - input_dim: An integer giving the size of the input.
        - num_classes: An integer giving the number of classes to classify.
        - dropout: Scalar between 0 and 1 giving dropout strength. If dropout=0 then
          the network should not use dropout at all.
        - use_batchnorm: Whether or not the network should use batch normalization.
        - reg: Scalar giving L2 regularization strength.
        - weight_scale: Scalar giving the standard deviation for random
          initialization of the weights.
        - dtype: A numpy datatype object; all computations will be performed using
          this datatype. float32 is faster but less accurate, so you should use
          float64 for numeric gradient checking.
        - seed: If not None, then pass this random seed to the dropout layers. This
          will make the dropout layers deteriminstic so we can gradient check the
          model.
        """
        self.use_batchnorm = use_batchnorm
        self.use_dropout = dropout > 0
        self.reg = reg
        self.num_layers = 1 + len(hidden_dims)
        self.dtype = dtype
        self.params = {}

        ############################################################################
        # TODO: Initialize the parameters of the network, storing all values in    #
        # the self.params dictionary. Store weights and biases for the first layer #
        # in W1 and b1; for the second layer use W2 and b2, etc. Weights should be #
        # initialized from a normal distribution with standard deviation equal to  #
        # weight_scale and biases should be initialized to zero.                   #
        #                                                                          #
        # When using batch normalization, store scale and shift parameters for the #
        # first layer in gamma1 and beta1; for the second layer use gamma2 and     #
        # beta2, etc. Scale parameters should be initialized to one and shift      #
        # parameters should be initialized to zero.                                #
        ############################################################################
        i=1
        b = input_dim
        #初始化（L-1）个隐藏层的权值、偏置、batchnorm、dropout
        for d in hidden_dims:
            stringw = 'W' + str(i)
            stringb = 'b' + str(i)
            self.params[stringw] = weight_scale*np.random.randn(b,d)
            self.params[stringb] = np.zeros(d)
            if use_batchnorm:
                stringg = 'gamma' + str(i)
                stringbe = 'beta' + str(i)
                self.params[stringg] = np.ones(d)
                self.params[stringbe] = np.zeros(d)
            b=d
            i+=1
        #初始化输出层的权值、偏置
        stringw = 'W' + str(i)
        stringb = 'b' + str(i)
        self.params[stringw] = weight_scale*np.random.randn(b,num_classes)
        self.params[stringb] = np.zeros(num_classes)
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        # When using dropout we need to pass a dropout_param dictionary to each
        # dropout layer so that the layer knows the dropout probability and the mode
        # (train / test). You can pass the same dropout_param to each dropout layer.
        self.dropout_param = {}
        if self.use_dropout:
            self.dropout_param = {'mode': 'train', 'p': dropout}
            if seed is not None:
                self.dropout_param['seed'] = seed

        # With batch normalization we need to keep track of running means and
        # variances, so we need to pass a special bn_param object to each batch
        # normalization layer. You should pass self.bn_params[0] to the forward pass
        # of the first batch normalization layer, self.bn_params[1] to the forward
        # pass of the second batch normalization layer, etc.
        self.bn_params = []
        if self.use_batchnorm:
            self.bn_params = [{'mode': 'train'} for i in range(self.num_layers - 1)]

        # Cast all parameters to the correct datatype
        for k, v in self.params.items():
            if isinstance(v,int):
                self.params[k] = dtype(v)
            else: self.params[k] = v.astype(dtype)


    def loss(self, X, y=None):
        """
        Compute loss and gradient for the fully-connected net.

        Input / output: Same as TwoLayerNet above.
        """
        X = X.astype(self.dtype)
        mode = 'test' if y is None else 'train'

        # Set train/test mode for batchnorm params and dropout param since they
        # behave differently during training and testing.
        if self.use_dropout:
            self.dropout_param['mode'] = mode
        if self.use_batchnorm:
            for bn_param in self.bn_params:
                bn_param['mode'] = mode

        scores = None
        ############################################################################
        # TODO: Implement the forward pass for the fully-connected net, computing  #
        # the class scores for X and storing them in the scores variable.          #
        #                                                                          #
        # When using dropout, you'll need to pass self.dropout_param to each       #
        # dropout forward pass.                                                    #
        #                                                                          #
        # When using batch normalization, you'll need to pass self.bn_params[0] to #
        # the forward pass for the first batch normalization layer, pass           #
        # self.bn_params[1] to the forward pass for the second batch normalization #
        # layer, etc.                                                              #
        ############################################################################
        #计算scores
        num_hiddenlayers = self.num_layers-1
        cache = {} #记录各层每一级的cache，如第一层：“affine:cache11,batchnorm:cache12....."
        z={} #记录各层每一级的输出结果，如第一层：“affine:z11,batchnorm:z12....."
        #初始化网络输入值
        stringz_last="input"
        z[stringz_last] = X
        #隐藏层网络循环开始
        for i in range(num_hiddenlayers): 
            #提取当前层的权值、偏置
            stringW = 'W' + str(i+1)
            stringb = 'b' + str(i+1)
            W = self.params[stringW]
            b = self.params[stringb]
            #当前层的第一个步骤（affine_forward)
            stringc = 'cache' + str(i+1) + str(1)
            stringz = 'z' + str(i+1) + str(1)
            z[stringz],cache[stringc] = affine_forward(z[stringz_last],W,b)
            stringz_last=stringz
            #当前层的第二个步骤（可选）(batchnorm_forward)
            if self.use_batchnorm:
                #提取gamma,beta
                stringg = 'gamma' + str(i+1)
                stringbe = 'beta' + str(i+1)
                gamma = self.params[stringg]
                beta = self.params[stringbe]
                #执行
                stringc = 'cache' + str(i+1) + str(2)
                stringz = 'z' + str(i+1) + str(2)
                z[stringz],cache[stringc]=batchnorm_forward(z[stringz_last], gamma, beta, self.bn_params[i])
                stringz_last=stringz
            #当前层的第三个步骤（relu_forward)
            stringc = 'cache' + str(i+1) + str(3)
            stringz = 'z' + str(i+1) + str(3)
            z[stringz],cache[stringc] = relu_forward(z[stringz_last])
            stringz_last=stringz
            #当前层的第四个步骤(可选）（dropout_forward)
            if self.use_dropout:
                stringc = 'cache' + str(i+1) + str(4)
                stringz = 'z' + str(i+1) + str(4)
                z[stringz],cache[stringc]=dropout_forward(z[stringz_last], self.dropout_param)
                stringz_last=stringz
        #循环隐藏层结束
        #最后一个隐藏层，affine_forward + softmax_loss：
        #提取权值、偏置
        stringW = 'W' + str(i+2)
        stringb = 'b' + str(i+2)
        W = self.params[stringW]
        b = self.params[stringb]
        stringc = 'cache' + '_out'
        stringz = 'z' + '_out'
        z[stringz],cache[stringc] = affine_forward(z[stringz_last],W,b)
        #得到scores即z_out
        scores = z['z_out']

        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        # If test mode return early
        if mode == 'test':
            return scores

        loss, grads = 0.0, {}
        ############################################################################
        # TODO: Implement the backward pass for the fully-connected net. Store the #
        # loss in the loss variable and gradients in the grads dictionary. Compute #
        # data loss using softmax, and make sure that grads[k] holds the gradients #
        # for self.params[k]. Don't forget to add L2 regularization!               #
        #                                                                          #
        # When using batch normalization, you don't need to regularize the scale   #
        # and shift parameters.                                                    #
        #                                                                          #
        # NOTE: To ensure that your implementation matches ours and you pass the   #
        # automated tests, make sure that your L2 regularization includes a factor #
        # of 0.5 to simplify the expression for the gradient.                      #
        ############################################################################
        #反向传播
        dz = {} #记录反向传播的中间值
        #计算loss
        loss_without_reg,dz['dz_out'] = softmax_loss(scores,y)
        for i in range(num_hiddenlayers+1): 
            stringW = 'W' + str(i+1)
            W = self.params[stringW]
            loss += 0.5*((W**2).sum())
        loss = loss+loss_without_reg
        #计算grads
        j=num_hiddenlayers  #j=L-1
        #最后一层affine反向传播并更新梯度
        stringW = 'W' + str(j+1)
        stringb = 'b' + str(j+1)
        stringdz = 'dz' + str(j) + str(4)
        dz[stringdz],grads[stringW], grads[stringb] = affine_backward(dz['dz_out'], cache['cache_out'])
        grads[stringW] += self.reg*cache['cache_out'][1]
        stringdz_last = stringdz
        #（L-1)层隐藏层反向传播并更新梯度
        for i in range(j): 
            #dropout级反向
            if self.use_dropout:
                stringdz = 'dz' + str(j-i) + str(3)
                stringc = 'cache' + str(j-i) + str(4)
                dz[stringdz] = dropout_backward(dz[stringdz_last], cache[stringc])
                stringdz_last = stringdz
            #relu级反向
            stringdz = 'dz' + str(j-i) + str(2)
            stringc = 'cache' + str(j-i) + str(3)
            if not stringc in cache:
                stringc = 'cache' + str(j-i) + str(4)
            dz[stringdz] = relu_backward(dz[stringdz_last], cache[stringc])
            stringdz_last = stringdz
            #batchnorm级反向并更新梯度
            if self.use_batchnorm:
                stringdz = 'dz' + str(j-i) + str(1)
                stringc = 'cache' + str(j-i) + str(2)
                stringg = 'gamma' + str(j-i)
                stringbe = 'beta' + str(j-i)
                dz[stringdz],grads[stringg],grads[stringbe] = batchnorm_backward(dz[stringdz_last], cache[stringc])
                stringdz_last = stringdz
            #affine级反向并更新梯度
            stringdz = 'dz' + str(j-i-1) + str(4)
            stringc = 'cache' + str(j-i) + str(1)
            if not stringc in cache:
                stringc = 'cache' + str(j-i) + str(2)
            stringW = 'W' + str(j-i)
            stringb = 'b' + str(j-i)
            dz[stringdz],grads[stringW],grads[stringb] = affine_backward(dz[stringdz_last],cache[stringc])
            grads[stringW] += self.reg*cache[stringc][1]
            stringdz_last = stringdz
       
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        return loss, grads
cs231n_2017_FullyConnectedNet

猜你喜欢