基础的理论知识参考:https://www.zybuluo.com/hanbingtao/note/485480
下面的代码也是基于上面文章的实现:
整个算法分为三个步骤:
- 前向计算每个神经元的输出值 ( 表示网络的第 个神经元,以下同);
- 反向计算每个神经元的误差项 , 在有的文献中也叫做敏感度(sensitivity)。它实际上是 网络的损失函数对神经元 加权输入的偏导数,即 ;
- 计算每个神经元连接权重 的梯度( 表示从神经元连接到神经元 的权重),公式为 ,其中 ,表示神经元 的输出。
最后,根据梯度下降法则更新每个权重即可。
具体的细节参考上面的连接文章,这里只贴出代码实现:
- 首先是activators.py文件:
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
import numpy as np
class ReluActivator(object):
def forward(self, weighted_input):
#return weighted_input
return max(0, weighted_input)
def backward(self, output):
return 1 if output > 0 else 0
class IdentityActivator(object):
def forward(self, weighted_input):
return weighted_input
def backward(self, output):
return 1
class SigmoidActivator(object):
def forward(self, weighted_input):
return 1.0 / (1.0 + np.exp(-weighted_input))
def backward(self, output):
return output * (1 - output)
class TanhActivator(object):
def forward(self, weighted_input):
return 2.0 / (1.0 + np.exp(-2 * weighted_input)) - 1.0
def backward(self, output):
return 1 - output * output
是一些基本的激活函数的实现
- 下面的CNN.py文件实现cnn网络主要的组件
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
import numpy as np
from activators import ReluActivator, IdentityActivator
# 获取卷积区域
def get_patch(input_array, i, j, filter_width,filter_height, stride):
'''
从输入数组中获取本次卷积的区域,
自动适配输入为2D和3D的情况
'''
start_i = i * stride
start_j = j * stride
if input_array.ndim == 2:
return input_array[
start_i : start_i + filter_height,
start_j : start_j + filter_width]
elif input_array.ndim == 3:
return input_array[:,
start_i : start_i + filter_height,
start_j : start_j + filter_width]
# 获取一个2D区域的最大值所在的索引
def get_max_index(array):
max_i = 0
max_j = 0
max_value = array[0,0]
for i in range(array.shape[0]):
for j in range(array.shape[1]):
if array[i,j] > max_value:
max_value = array[i,j]
max_i, max_j = i, j
return max_i, max_j
# 计算卷积:conv函数实现了2维和3维数组的卷积
def conv(input_array,kernel_array,output_array,stride, bias):
'''
计算卷积,自动适配输入为2D和3D的情况,是在get_patch函数中判断的
'''
#print 'shape 1:',np.shape(input_array)
#print 'shape 2:',np.shape(kernel_array)
#print 'shape 3:',np.shape(output_array)
channel_number = input_array.ndim
output_width = output_array.shape[1]
output_height = output_array.shape[0]
kernel_width = kernel_array.shape[-1]
kernel_height = kernel_array.shape[-2]
for i in range(output_height):
for j in range(output_width):
# 这里的*是np.array*np.array的对应元素相乘
#print 'get_patch:\n',get_patch(input_array, i, j, kernel_width,kernel_height, stride)
#print 'kernel_array:\n',kernel_array
output_array[i][j] = (get_patch(input_array, i, j, kernel_width,
kernel_height, stride) * kernel_array).sum() + bias
# padding函数实现了zero padding操作
def padding(input_array, zp):
'''
为数组增加Zero padding,自动适配输入为2D和3D的情况
'''
if zp == 0:
return input_array
else:
# 输入为3D时
if input_array.ndim == 3:
input_width = input_array.shape[2]
input_height = input_array.shape[1]
input_depth = input_array.shape[0]
padded_array = np.zeros((input_depth,
input_height + 2 * zp,
input_width + 2 * zp))
padded_array[:,zp : zp + input_height,zp : zp + input_width] = input_array
return padded_array
# # 输入为2D时
elif input_array.ndim == 2:
input_width = input_array.shape[1]
input_height = input_array.shape[0]
padded_array = np.zeros((input_height + 2 * zp,input_width + 2 * zp))
# 二维数组直接赋值
padded_array[zp : zp + input_height,zp : zp + input_width] = input_array
return padded_array
# 对numpy数组进行element wise操作
# element_wise_op函数实现了对numpy数组进行按元素操作,并将返回值写回到数组中
def element_wise_op(array, op):
for i in np.nditer(array,op_flags=['readwrite']):
i[...] = op(i)
# Filter类保存了卷积层的参数以及梯度,并且实现了用梯度下降算法来更新参数
class Filter(object):
def __init__(self, width, height, depth):
self.weights = np.random.uniform(-1e-4, 1e-4,(depth, height, width))
self.bias = 0
self.weights_grad = np.zeros(self.weights.shape)
self.bias_grad = 0
def __repr__(self):
return 'filter weights:\n%s\nbias:\n%s' % (
repr(self.weights), repr(self.bias))
def get_weights(self):
return self.weights
def get_bias(self):
return self.bias
def update(self, learning_rate):
self.weights -= learning_rate * self.weights_grad
self.bias -= learning_rate * self.bias_grad
# 用ConvLayer类来实现一个卷积层
class ConvLayer(object):
# 初始化
def __init__(self, input_width, input_height,
channel_number, filter_width,
filter_height, filter_number,
zero_padding, stride, activator,
learning_rate):
self.input_width = input_width
self.input_height = input_height
self.channel_number = channel_number
self.filter_width = filter_width
self.filter_height = filter_height
self.filter_number = filter_number
self.zero_padding = zero_padding
self.stride = stride
self.activator = activator
self.learning_rate = learning_rate
# 卷积后的Feature Map的高度和宽度
self.output_width = ConvLayer.calculate_output_size(
self.input_width, filter_width, zero_padding,stride)
self.output_height = ConvLayer.calculate_output_size(
self.input_height, filter_height, zero_padding,stride)
# 把输出的feature map用列表存起来
self.output_array = np.zeros((self.filter_number,
self.output_height, self.output_width))
# filters的每个元素是过滤器对象
self.filters = []
for i in range(filter_number):
self.filters.append(Filter(filter_width,filter_height, self.channel_number))
# 用来确定卷积层输出的大小
@staticmethod
def calculate_output_size(input_size,filter_size, zero_padding, stride):
return (input_size - filter_size + 2 * zero_padding) / stride + 1
# forward方法实现了卷积层的前向计算
def forward(self, input_array):
'''
计算卷积层的输出
输出结果保存在self.output_array
'''
self.input_array = input_array
# 为数组增加Zero padding
self.padded_input_array = padding(input_array,self.zero_padding)
for f in range(self.filter_number):
filter = self.filters[f]
#print 'shape of filter:',np.shape(filter.get_weights())
conv(self.padded_input_array, filter.get_weights(), self.output_array[f],
self.stride, filter.get_bias())
element_wise_op(self.output_array,self.activator.forward)
def backward(self, input_array, sensitivity_array, activator):
'''
计算传递给前一层的误差项,以及计算每个权重的梯度
前一层的误差项保存在:self.delta_array
梯度保存在:Filter对象的weights_grad
'''
self.forward(input_array)
self.bp_sensitivity_map(sensitivity_array,activator)
self.bp_gradient(sensitivity_array)
def update(self):
'''
按照梯度下降,更新权重
'''
for filter in self.filters:
filter.update(self.learning_rate)
def bp_sensitivity_map(self, sensitivity_array,activator):
'''
计算传递到上一层的sensitivity map
sensitivity_array: 本层的sensitivity map
activator: 上一层的激活函数
'''
# 处理卷积步长,对原始sensitivity map进行扩展
expanded_array = self.expand_sensitivity_map(sensitivity_array)
# full卷积,对sensitivitiy map进行zero padding
# 虽然原始输入的zero padding单元也会获得残差
# 但这个残差不需要继续向上传递,因此就不计算了
expanded_width = expanded_array.shape[2]
zp = (self.input_width + self.filter_width - 1 - expanded_width) / 2
#print 'zp:',zp
# 对误差图进行扩展后再进行0填充
padded_array = padding(expanded_array, zp)
print 'padded_array:',np.shape(padded_array)
# 初始化delta_array,用于保存传递到上一层的sensitivity map
self.delta_array = self.create_delta_array()
# 对于具有多个filter的卷积层来说,最终传递到上一层的
# sensitivity map相当于所有的filter的sensitivity map之和
for f in range(self.filter_number):
filter = self.filters[f]
# 将filter权重翻转180度
flipped_weights = np.array(map(lambda i: np.rot90(i, 2),filter.get_weights()))
print 'flipped_weights:',np.shape(flipped_weights)
# 计算与一个filter对应的delta_array
delta_array = self.create_delta_array()
for d in range(delta_array.shape[0]):
# input_array,kernel_array,output_array,stride, bias
conv(padded_array[f], flipped_weights[d],delta_array[d], 1, 0)
self.delta_array += delta_array
# 将计算结果与激活函数的偏导数做element-wise乘法操作
derivative_array = np.array(self.input_array)
element_wise_op(derivative_array,activator.backward)
self.delta_array *= derivative_array
def bp_gradient(self, sensitivity_array):
# 处理卷积步长,对原始sensitivity map进行扩展
expanded_array = self.expand_sensitivity_map(sensitivity_array)
for f in range(self.filter_number):
# 计算每个权重的梯度
filter = self.filters[f]
for d in range(filter.weights.shape[0]):
conv(self.padded_input_array[d],expanded_array[f],
filter.weights_grad[d], 1, 0)
# 计算偏置项的梯度
filter.bias_grad = expanded_array[f].sum()
def expand_sensitivity_map(self, sensitivity_array):
print 'sensitivity_array:\n',sensitivity_array
depth = sensitivity_array.shape[0]
# 确定扩展后sensitivity map的大小,计算stride为1时sensitivity map的大小
expanded_width = (self.input_width - self.filter_width + 2 * self.zero_padding + 1)
expanded_height = (self.input_height - self.filter_height + 2 * self.zero_padding + 1)
# 构建新的sensitivity_map
expand_array = np.zeros((depth, expanded_height, expanded_width))
# 从原始sensitivity map拷贝误差值
for i in range(self.output_height):
for j in range(self.output_width):
i_pos = i * self.stride
j_pos = j * self.stride
expand_array[:,i_pos,j_pos] = sensitivity_array[:,i,j]
print 'expand_array:\n',expand_array
return expand_array
def create_delta_array(self):
return np.zeros((self.channel_number,self.input_height, self.input_width))
# 池化层
class MaxPoolingLayer(object):
def __init__(self, input_width, input_height,
channel_number, filter_width,
filter_height, stride):
self.input_width = input_width
self.input_height = input_height
self.channel_number = channel_number
self.filter_width = filter_width
self.filter_height = filter_height
self.stride = stride
self.output_width = (input_width - filter_width) / self.stride + 1
self.output_height = (input_height -filter_height) / self.stride + 1
self.output_array = np.zeros((self.channel_number,
self.output_height, self.output_width))
def forward(self, input_array):
for d in range(self.channel_number):
for i in range(self.output_height):
for j in range(self.output_width):
self.output_array[d,i,j] = ( get_patch(input_array[d], i, j,
self.filter_width, self.filter_height, self.stride).max())
def backward(self, input_array, sensitivity_array):
self.delta_array = np.zeros(input_array.shape)
for d in range(self.channel_number):
for i in range(self.output_height):
for j in range(self.output_width):
patch_array = get_patch(
input_array[d], i, j,
self.filter_width,
self.filter_height,
self.stride)
k, l = get_max_index(patch_array)
self.delta_array[d,
i * self.stride + k,
j * self.stride + l] = \
sensitivity_array[d,i,j]
##.............................卷积层的一些测试.......................................
# 卷积层前向传播数据初始化
def init_test():
a = np.array(
[[[0,1,1,0,2],
[2,2,2,2,1],
[1,0,0,2,0],
[0,1,1,0,0],
[1,2,0,0,2]],
[[1,0,2,2,0],
[0,0,0,2,0],
[1,2,1,2,1],
[1,0,0,0,0],
[1,2,1,1,1]],
[[2,1,2,0,0],
[1,0,0,1,0],
[0,2,1,0,1],
[0,1,2,2,2],
[2,1,0,0,1]]])
# 假设误差项矩阵已经算好
b = np.array(
[[[0,1,1],
[2,2,2],
[1,0,0]],
[[1,0,2],
[0,0,0],
[1,2,1]]])
# input_width, input_height, channel_number, filter_width, filter_height,
# filter_number, zero_padding, stride, activator,learning_rate
cl = ConvLayer(5,5,3,3,3, 2,1,2,IdentityActivator(),0.001)
cl.filters[0].weights = np.array(
[[[-1,1,0],
[0,1,0],
[0,1,1]],
[[-1,-1,0],
[0,0,0],
[0,-1,0]],
[[0,0,-1],
[0,1,0],
[1,-1,-1]]], dtype=np.float64)
cl.filters[0].bias=1
cl.filters[1].weights = np.array(
[[[1,1,-1],
[-1,-1,1],
[0,-1,1]],
[[0,1,0],
[-1,0,-1],
[-1,1,0]],
[[-1,0,0],
[-1,0,1],
[-1,0,0]]], dtype=np.float64)
cl.filters[1].bias=1
return a, b, cl
# 卷积层前向传播测试
def test():
a, b, cl = init_test()
cl.forward(a)
print 'cl.output_array:\n',cl.output_array
# 卷积层的反向传播测试
def test_bp():
a, b, cl = init_test()
cl.backward(a, b, IdentityActivator())
cl.update()
print 'cl.filters[0]:\n',cl.filters[0]
print 'cl.filters[1]:\n',cl.filters[1]
#.............................池化层的一些测试.......................................
# 池化层测试数据初始化
def init_pool_test():
a = np.array(
[[[1,1,2,4],
[5,6,7,8],
[3,2,1,0],
[1,2,3,4]],
[[0,1,2,3],
[4,5,6,7],
[8,9,0,1],
[3,4,5,6]]], dtype=np.float64)
b = np.array(
[[[1,2],
[2,4]],
[[3,5],
[8,2]]], dtype=np.float64)
# input_width, input_height, channel_number, filter_width, filter_height, stride
mpl = MaxPoolingLayer(4,4,2,2,2,2)
return a, b, mpl
# 池化层测试
def test_pool():
a, b, mpl = init_pool_test()
mpl.forward(a)
print 'input array:\n%s\noutput array:\n%s' % (a,mpl.output_array)
def test_pool_bp():
a, b, mpl = init_pool_test()
mpl.backward(a, b)
print 'input array:\n%s\nsensitivity array:\n%s\ndelta array:\n%s' % (
a, b, mpl.delta_array)
if __name__ == '__main__':
test()
test_pool()
test_bp()
print '................................................'
test_pool_bp()
# 测试np.nditer
'''
a = np.arange(6).reshape(2, 3)
print a
for x in np.nditer(a, op_flags = ['readwrite']):
x[...] = 2*x
print a
'''
一些基本得运行结果:
cl.output_array:
[[[ 6. 7. 5.]
[ 3. -1. -1.]
[ 2. -1. 4.]]
[[ 3. -4. -7.]
[ 2. -3. -3.]
[ 1. -4. -4.]]]
input array:
[[[ 1. 1. 2. 4.]
[ 5. 6. 7. 8.]
[ 3. 2. 1. 0.]
[ 1. 2. 3. 4.]]
[[ 0. 1. 2. 3.]
[ 4. 5. 6. 7.]
[ 8. 9. 0. 1.]
[ 3. 4. 5. 6.]]]
output array:
[[[ 6. 8.]
[ 3. 4.]]
[[ 5. 7.]
[ 9. 6.]]]
sensitivity_array:
[[[0 1 1]
[2 2 2]
[1 0 0]]
[[1 0 2]
[0 0 0]
[1 2 1]]]
expand_array:
[[[ 0. 0. 1. 0. 1.]
[ 0. 0. 0. 0. 0.]
[ 2. 0. 2. 0. 2.]
[ 0. 0. 0. 0. 0.]
[ 1. 0. 0. 0. 0.]]
[[ 1. 0. 0. 0. 2.]
[ 0. 0. 0. 0. 0.]
[ 0. 0. 0. 0. 0.]
[ 0. 0. 0. 0. 0.]
[ 1. 0. 2. 0. 1.]]]
padded_array: (2L, 7L, 7L)
flipped_weights: (3L, 3L, 3L)
flipped_weights: (3L, 3L, 3L)
sensitivity_array:
[[[0 1 1]
[2 2 2]
[1 0 0]]
[[1 0 2]
[0 0 0]
[1 2 1]]]
expand_array:
[[[ 0. 0. 1. 0. 1.]
[ 0. 0. 0. 0. 0.]
[ 2. 0. 2. 0. 2.]
[ 0. 0. 0. 0. 0.]
[ 1. 0. 0. 0. 0.]]
[[ 1. 0. 0. 0. 2.]
[ 0. 0. 0. 0. 0.]
[ 0. 0. 0. 0. 0.]
[ 0. 0. 0. 0. 0.]
[ 1. 0. 2. 0. 1.]]]
cl.filters[0]:
filter weights:
array([[[-1.008, 0.99 , -0.009],
[-0.005, 0.994, -0.006],
[-0.006, 0.995, 0.996]],
[[-1.004, -1.001, -0.004],
[-0.01 , -0.009, -0.012],
[-0.002, -1.002, -0.002]],
[[-0.002, -0.002, -1.003],
[-0.005, 0.992, -0.005],
[ 0.993, -1.008, -1.007]]])
bias:
0.99099999999999999
cl.filters[1]:
filter weights:
array([[[ 9.98000000e-01, 9.98000000e-01, -1.00100000e+00],
[ -1.00400000e+00, -1.00700000e+00, 9.97000000e-01],
[ -4.00000000e-03, -1.00400000e+00, 9.98000000e-01]],
[[ 0.00000000e+00, 9.99000000e-01, 0.00000000e+00],
[ -1.00900000e+00, -5.00000000e-03, -1.00400000e+00],
[ -1.00400000e+00, 1.00000000e+00, 0.00000000e+00]],
[[ -1.00400000e+00, -6.00000000e-03, -5.00000000e-03],
[ -1.00200000e+00, -5.00000000e-03, 9.98000000e-01],
[ -1.00200000e+00, -1.00000000e-03, 0.00000000e+00]]])
bias:
0.99299999999999999
................................................
input array:
[[[ 1. 1. 2. 4.]
[ 5. 6. 7. 8.]
[ 3. 2. 1. 0.]
[ 1. 2. 3. 4.]]
[[ 0. 1. 2. 3.]
[ 4. 5. 6. 7.]
[ 8. 9. 0. 1.]
[ 3. 4. 5. 6.]]]
sensitivity array:
[[[ 1. 2.]
[ 2. 4.]]
[[ 3. 5.]
[ 8. 2.]]]
delta array:
[[[ 0. 0. 0. 0.]
[ 0. 1. 0. 2.]
[ 2. 0. 0. 0.]
[ 0. 0. 0. 4.]]
[[ 0. 0. 0. 0.]
[ 0. 3. 0. 5.]
[ 0. 8. 0. 0.]
[ 0. 0. 0. 2.]]]
全连接层的实现和上一篇文章类似,在此就不再赘述了。至此,你已经拥有了实现了一个简单的卷积神经网络所需要的基本组件,并没有完全实现一个CNN网络。
对于卷积神经网络,现在有很多优秀的开源实现,因此我们并不需要真的自己去实现一个。这里贴出这些代码能让我们更深的理解卷积神经网络的原理,仅供参考学习。