之前一段时间学习cs231n时,写过的几个naive版本的conv和deconv函数,今天拿出来跟大家分享。
conv
def conv(X, w, b, conv_param):
'''
X: input with shape (C, H, W)
W: filter with shape (C, HH, WW)
b: bias float
'''
C, H, W = X.shape
C, HH, WW = w.shape
pad = conv_param['pad']
stride = conv_param['stride']
# padding
npad = ((0, 0), (pad, pad), (pad, pad))
X = np.pad(X, pad_width = npad, mode = 'constant', constant_values = 0)
H_o = 1 + (H + 2 * pad - HH) // stride
W_o = 1 + (W + 2 * pad - WW) // stride
# conv
Y = np.zeros((H_o, W_o))
for i in range(H_o):
for j in range(W_o):
left_top_y, left_top_x = i * stride, j * stride
conv_map = X[:, left_top_y:(left_top_y + HH), left_top_x:(left_top_x + HH)] * w
Y[i, j] = np.sum(conv_map) + b
return Y
def conv_forward_naive(x, w, b, conv_param):
"""
A naive implementation of the forward pass for a convolutional layer.
The input consists of N data points, each with C channels, height H and width
W. We convolve each input with F different filters, where each filter spans
all C channels and has height HH and width HH.
Input:
- x: Input data of shape (N, C, H, W)
- w: Filter weights of shape (F, C, HH, WW)
- b: Biases, of shape (F,)
- conv_param: A dictionary with the following keys:
- 'stride': The number of pixels between adjacent receptive fields in the horizontal and vertical directions.
- 'pad' : The number of pixels that will be used to zero-pad the input.
Returns a tuple of:
- out: Output data, of shape (N, F, H', W') where H' and W' are given by
H' = 1 + (H + 2 * pad - HH) // stride
W' = 1 + (W + 2 * pad - WW) // stride
- cache: (x, w, b, conv_param)
"""
out = None
# get params
N, C, H, W = x.shape
F, C, HH, WW = w.shape
# conv for evry image
out = []
for i in range(N):
channel_list = []
for j in range(F):
y = conv(x[i], w[j], b[j], conv_param)
channel_list.append(y)
out.append(channel_list)
out = np.array(out)
cache = (x, w, b, conv_param)
return out, cache
deconv
def conv_backward_naive(dout, cache):
"""
A naive implementation of the backward pass for a convolutional layer.
Inputs:
- dout: Upstream derivatives.
- cache: A tuple of (x, w, b, conv_param) as in conv_forward_naive
Returns a tuple of:
- dx: Gradient with respect to x
- dw: Gradient with respect to w
- db: Gradient with respect to b
"""
dx, dw, db = None, None, None
x, w, b, conv_param = cache
stride = conv_param['stride']
pad = conv_param['pad']
N, C, H, W = x.shape
F, _, HH, WW = w.shape
_, _, H_o, W_o = dout.shape
npad = ((0, 0), (0, 0), (pad, pad), (pad, pad))
x_pad = np.pad(x, pad_width = npad, mode = 'constant', constant_values = 0)
# calculate the db
db, temp = np.zeros((F)), dout.transpose(1, 0, 2, 3)
for f in range(F):
db[f] = np.sum(temp[f, :, :, :])
# calculate the dx and dw
dw, dx_pad = np.zeros(w.shape), np.zeros(x_pad.shape)
for n in range(N):
for f in range(F):
for i in range(H_o):
for j in range(W_o):
y_left_top, x_left_top = i * stride, j * stride
current_x_matrix = x_pad[n, :, y_left_top:(y_left_top + HH), x_left_top:(x_left_top + WW)]
dw[f] += dout[n, f, i, j] * current_x_matrix
dx_pad[n, :, y_left_top:(y_left_top + HH), x_left_top:(x_left_top + WW)] += w[f] * dout[n, f, i, j]
dx = dx_pad[:, :, pad: H + pad, pad: W + pad]
return dx, dw, db