Deconv的python实现

之前一段时间学习cs231n时，写过的几个naive版本的conv和deconv函数，今天拿出来跟大家分享。

conv

def conv(X, w, b, conv_param):
    '''
    X: input with shape (C, H, W)
    W: filter with shape (C, HH, WW)
    b: bias float
    '''

    C, H, W = X.shape
    C, HH, WW = w.shape
    pad = conv_param['pad']
    stride = conv_param['stride']

    # padding
    npad = ((0, 0), (pad, pad), (pad, pad))
    X = np.pad(X, pad_width = npad, mode = 'constant', constant_values = 0)

    H_o = 1 + (H + 2 * pad - HH) // stride
    W_o = 1 + (W + 2 * pad - WW) // stride
    
    # conv
    Y = np.zeros((H_o, W_o))
    for i in range(H_o):
        for j in range(W_o):
            left_top_y, left_top_x = i * stride, j * stride
            conv_map = X[:, left_top_y:(left_top_y + HH), left_top_x:(left_top_x + HH)] * w
            Y[i, j]  = np.sum(conv_map) + b

    return Y

def conv_forward_naive(x, w, b, conv_param):
    """
    A naive implementation of the forward pass for a convolutional layer.

    The input consists of N data points, each with C channels, height H and width
    W. We convolve each input with F different filters, where each filter spans
    all C channels and has height HH and width HH.

    Input:
    - x: Input data of shape (N, C, H, W)
    - w: Filter weights of shape (F, C, HH, WW)
    - b: Biases, of shape (F,)
    - conv_param: A dictionary with the following keys:
      - 'stride': The number of pixels between adjacent receptive fields in the horizontal and vertical directions.
      - 'pad'   : The number of pixels that will be used to zero-pad the input.

    Returns a tuple of:
    - out: Output data, of shape (N, F, H', W') where H' and W' are given by
                 H' = 1 + (H + 2 * pad - HH) // stride
                 W' = 1 + (W + 2 * pad - WW) // stride
    - cache: (x, w, b, conv_param)
    """

    out = None

    # get params
    N, C, H, W   = x.shape
    F, C, HH, WW = w.shape

    # conv for evry image
    out = []
    for i in range(N):
        channel_list = []
        for j in range(F):
            y = conv(x[i], w[j], b[j], conv_param)
            channel_list.append(y)
        out.append(channel_list)

    out = np.array(out)

    cache = (x, w, b, conv_param)
    return out, cache

deconv

def conv_backward_naive(dout, cache):
    """
    A naive implementation of the backward pass for a convolutional layer.

    Inputs:
    - dout: Upstream derivatives.
    - cache: A tuple of (x, w, b, conv_param) as in conv_forward_naive

    Returns a tuple of:
    - dx: Gradient with respect to x
    - dw: Gradient with respect to w
    - db: Gradient with respect to b
    """

    dx, dw, db = None, None, None

    x, w, b, conv_param = cache
    stride = conv_param['stride']
    pad = conv_param['pad']
    N, C, H, W = x.shape
    F, _, HH, WW = w.shape
    _, _, H_o, W_o = dout.shape

    npad  = ((0, 0), (0, 0), (pad, pad), (pad, pad))
    x_pad = np.pad(x, pad_width = npad, mode = 'constant', constant_values = 0)

    # calculate the db
    db, temp = np.zeros((F)), dout.transpose(1, 0, 2, 3)
    for f in range(F):
        db[f] = np.sum(temp[f, :, :, :])
    
    # calculate the dx and dw
    dw, dx_pad = np.zeros(w.shape), np.zeros(x_pad.shape)
    for n in range(N):
        for f in range(F):
            for i in range(H_o):
                for j in range(W_o):
                    y_left_top, x_left_top = i * stride, j * stride
                    current_x_matrix = x_pad[n, :, y_left_top:(y_left_top + HH), x_left_top:(x_left_top + WW)]
                    dw[f] += dout[n, f, i, j] * current_x_matrix
                    dx_pad[n, :, y_left_top:(y_left_top + HH), x_left_top:(x_left_top + WW)] += w[f] * dout[n, f, i, j]

    dx = dx_pad[:, :, pad: H + pad, pad: W + pad]
    return dx, dw, db

conv

deconv

猜你喜欢