最近开始接触了AlexNet,收获也很多,在这里做一个小总结吧。
首先,基础的部分就不再赘述了,在我的另一篇博客中有一些很详细的说明了。
这里只做实践的部分;
代码全部源自于https://kratzert.github.io/2017/02/24/finetuning-alexnet-with-tensorflow.html
有兴趣的朋友可以直接看这个,要比我写的好的多的多;
首先是第一部分:
这里定义了一个 __init__ 函数,观察一下代码便可知道他是一个用来解析输入参数的函数
并且调用了一个名为creat()的函数。这个函数的具体实现代码没有写在这里,但是会出现在后面,要留意一下
load_initial_weights(self)这个函数的作用是为我们创建的变量来分配pretrained weights,具体实现的代码也在后面(出于简洁的考虑,并没有在一段描述中写入过多的代码)
class AlexNet(object):
def __init__(self, x, keep_prob, num_classes, skip_layer,
weights_path = 'DEFAULT'):
"""
Inputs:
- x: tf.placeholder, for the input images
- keep_prob: tf.placeholder, for the dropout rate
- num_classes: int, number of classes of the new dataset
- skip_layer: list of strings, names of the layers you want to reinitialize
- weights_path: path string, path to the pretrained weights,
(if bvlc_alexnet.npy is not in the same folder)
"""
# Parse input arguments
self.X = x
self.NUM_CLASSES = num_classes
self.KEEP_PROB = keep_prob
self.SKIP_LAYER = skip_layer
self.IS_TRAINING = is_training
if weights_path == 'DEFAULT':
self.WEIGHTS_PATH = 'bvlc_alexnet.npy'
else:
self.WEIGHTS_PATH = weights_path
# Call the create function to build the computational graph of AlexNet
self.create()
def create(self):
pass
def load_initial_weights(self):
pass
这样一来我们就有了一个基本的类结构了,
下面我们会在定义一些函数来帮助我们创建各种layer
def conv(x, filter_height, filter_width, num_filters, stride_y, stride_x, name,
padding='SAME', groups=1):
# Get number of input channels
input_channels = int(x.get_shape()[-1])
# Create lambda function for the convolution
convolve = lambda i, k: tf.nn.conv2d(i, k,
strides = [1, stride_y, stride_x, 1],
padding = padding)
with tf.variable_scope(name) as scope:
# Create tf variables for the weights and biases of the conv layer
weights = tf.get_variable('weights',
shape = [filter_height, filter_width,
input_channels/groups, num_filters])
biases = tf.get_variable('biases', shape = [num_filters])
if groups == 1:
conv = convolve(x, weights)
# In the cases of multiple groups, split inputs & weights and
else:
# Split input and weights and convolve them separately
input_groups = tf.split(axis = 3, num_or_size_splits=groups, value=x)
weight_groups = tf.split(axis = 3, num_or_size_splits=groups, value=weights)
output_groups = [convolve(i, k) for i,k in zip(input_groups, weight_groups)]
# Concat the convolved output together again
conv = tf.concat(axis = 3, values = output_groups)
# Add biases
bias = tf.reshape(tf.nn.bias_add(conv, biases), conv.get_shape().as_list())
# Apply relu function
relu = tf.nn.relu(bias, name = scope.name)
return relu
######注意一下这里的lambda函数的用法很巧妙,
下面是全连接层的定义,这看起来比卷积层的定义要容易得多
def fc(x, num_in, num_out, name, relu = True):
with tf.variable_scope(name) as scope:
# Create tf variables for the weights and biases
weights = tf.get_variable('weights', shape=[num_in, num_out], trainable=True)
biases = tf.get_variable('biases', [num_out], trainable=True)
# Matrix multiply weights and inputs and add bias
act = tf.nn.xw_plus_b(x, weights, biases, name=scope.name)
if relu == True:
# Apply ReLu non linearity
relu = tf.nn.relu(act)
return relu
else:
return act
The rest are Max-Pooling, Local-Response-Normalization and Dropout and should be self-explaining.
def max_pool(x, filter_height, filter_width, stride_y, stride_x,
name, padding='SAME'):
return tf.nn.max_pool(x, ksize=[1, filter_height, filter_width, 1],
strides = [1, stride_y, stride_x, 1],
padding = padding, name = name)
def lrn(x, radius, alpha, beta, name, bias=1.0):
return tf.nn.local_response_normalization(x, depth_radius = radius,
alpha = alpha, beta = beta,
bias = bias, name = name)
def dropout(x, keep_prob):
return tf.nn.dropout(x, keep_prob)
下面就是对creat()函数 和 load_initial_weights 的实现
def create(self):
# 1st Layer: Conv (w ReLu) -> Lrn -> Pool
conv1 = conv(self.X, 11, 11, 96, 4, 4, padding = 'VALID', name = 'conv1')
norm1 = lrn(conv1, 2, 1e-05, 0.75, name = 'norm1')
pool1 = max_pool(norm1, 3, 3, 2, 2, padding = 'VALID', name = 'pool1')
# 2nd Layer: Conv (w ReLu) -> Lrn -> Poolwith 2 groups
conv2 = conv(pool1, 5, 5, 256, 1, 1, groups = 2, name = 'conv2')
norm2 = lrn(conv2, 2, 1e-05, 0.75, name = 'norm2')
pool2 = max_pool(norm2, 3, 3, 2, 2, padding = 'VALID', name ='pool2')
# 3rd Layer: Conv (w ReLu)
conv3 = conv(pool2, 3, 3, 384, 1, 1, name = 'conv3')
# 4th Layer: Conv (w ReLu) splitted into two groups
conv4 = conv(conv3, 3, 3, 384, 1, 1, groups = 2, name = 'conv4')
# 5th Layer: Conv (w ReLu) -> Pool splitted into two groups
conv5 = conv(conv4, 3, 3, 256, 1, 1, groups = 2, name = 'conv5')
pool5 = max_pool(conv5, 3, 3, 2, 2, padding = 'VALID', name = 'pool5')
# 6th Layer: Flatten -> FC (w ReLu) -> Dropout
flattened = tf.reshape(pool5, [-1, 6*6*256])
fc6 = fc(flattened, 6*6*256, 4096, name='fc6')
dropout6 = dropout(fc6, self.KEEP_PROB)
# 7th Layer: FC (w ReLu) -> Dropout
fc7 = fc(dropout6, 4096, 4096, name = 'fc7')
dropout7 = dropout(fc7, self.KEEP_PROB)
# 8th Layer: FC and return unscaled activations
# (for tf.nn.softmax_cross_entropy_with_logits)
self.fc8 = fc(dropout7, 4096, self.NUM_CLASSES, relu = False, name='fc8')
def load_initial_weights(self, session):
# Load the weights into memory
weights_dict = np.load(self.WEIGHTS_PATH, encoding = 'bytes').item()
# Loop over all layer names stored in the weights dict
for op_name in weights_dict:
# Check if the layer is one of the layers that should be reinitialized
if op_name not in self.SKIP_LAYER:
with tf.variable_scope(op_name, reuse = True):
# Loop over list of weights/biases and assign them to their corresponding tf variable
for data in weights_dict[op_name]:
# Biases
if len(data.shape) == 1:
var = tf.get_variable('biases', trainable = False)
session.run(var.assign(data))
# Weights
else:
var = tf.get_variable('weights', trainable = False)
session.run(var.assign(data))
有了这两个函数的实现,我们的AlexNet 就大功告成啦。
这里有实践的步骤和简单的数据集,有需要的可以1C币支持一下,啊哈哈