cs231n_2017_max_pool_naive

max_pool_forward_naive:

def max_pool_forward_naive(x, pool_param):
    """
    A naive implementation of the forward pass for a max pooling layer.

    Inputs:
    - x: Input data, of shape (N, C, H, W)
    - pool_param: dictionary with the following keys:
      - 'pool_height': The height of each pooling region
      - 'pool_width': The width of each pooling region
      - 'stride': The distance between adjacent pooling regions

    Returns a tuple of:
    - out: Output data
    - cache: (x, pool_param)
    """
    out = None
    ###########################################################################
    # TODO: Implement the max pooling forward pass                            #
    ###########################################################################
    HH, WW = pool_param['pool_height'], pool_param['pool_width']
    s = pool_param['stride']
    N, C, H, W = x.shape
    H_new = int(1 + (H - HH) / s)
    W_new = int(1 + (W - WW) / s)
    out = np.zeros((N, C, H_new, W_new))
    for i in range(N):    
        for j in range(C):        
            for k in range(H_new):            
                for l in range(W_new):                
                    window = x[i, j, k*s:HH+k*s, l*s:WW+l*s] 
                    out[i, j, k, l] = np.max(window)
    ###########################################################################
    #                             END OF YOUR CODE                            #
    ###########################################################################
    cache = (x, pool_param)
    return out, cache

max_pool_backward_naive:

def max_pool_backward_naive(dout, cache):
    """
    A naive implementation of the backward pass for a max pooling layer.

    Inputs:
    - dout: Upstream derivatives
    - cache: A tuple of (x, pool_param) as in the forward pass.

    Returns:
    - dx: Gradient with respect to x
    """
    dx = None
    ###########################################################################
    # TODO: Implement the max pooling backward pass                           #
    ###########################################################################
    x, pool_param = cache
    HH, WW = pool_param['pool_height'], pool_param['pool_width']
    s = pool_param['stride']
    N, C, H, W = x.shape
    H_new = int(1 + (H - HH) / s)
    W_new = int(1 + (W - WW) / s)
    dx = np.zeros_like(x)
    for i in range(N):    
        for j in range(C):        
            for k in range(H_new):            
                for l in range(W_new):                
                    window = x[i, j, k*s:HH+k*s, l*s:WW+l*s]                
                    m = np.max(window)             
                    dx[i, j, k*s:HH+k*s, l*s:WW+l*s] = (window == m) * dout[i, j, k, l]
    '''
    $池化层对信息的反向传播有歧义性：如果x的一个池化区域里同时有两个及以上的最大值，反向传播时dout的值应该归回给哪一个位置是歧义的
    $课程笔记里提到有篇论文指出实践中取消pooling层，可以提高准确率
    $更多的讨论：去过拟合的问题，比如正则化、dropout、池化，讨论这三种方法。正则化应该是最为数学严格的一种方式，它通过调整惩罚的程
    度来调节过拟合；dropout具有随机性，由于全连接网络自身局限性，dropout可以作为在正则化难以起效的情况下，去过拟合的一种补充；池化
    去过拟合可能会引起歧义，但其歧义的影响程度不好概括，暂不讨论。
    '''
    ###########################################################################
    #                             END OF YOUR CODE                            #
    ###########################################################################
    return dx

cs231n_2017_max_pool_naive

猜你喜欢