目标
微调论文”A Lightened CNN for Deep Face Representation”中的29层caffe网络。
数据
自己收集的包含500人的亚洲人脸数据集;
训练数据准备
1.获得500人微调数据集之后,使用python源码工具将数据集划分为训练集和验证集:
#!---* coding: utf-8 --*--
#!/usr/bin/python
"""
将数据集随机分成训练集、验证集、测试集
"""
from PIL import Image, ImageFile
import random
import os, time
import logging
logger = logging.getLogger(__name__)
ImageFile.LOAD_TRUNCATED_IMAGES = True
class CreateDataSet:
"""
创建train、val、test数据集
"""
def __init__(self):
pass
@staticmethod
def randNumber(li,num):
temp_li=li
res=random.sample(li,num)
for i in res:
temp_li.remove(i)
#print ("res len=%d"%len(res))
#print ("temp_li len=%d"%len(temp_li))
return res,temp_li
@staticmethod
def openImage(image):
return Image.open(image, mode="r")
@staticmethod
def saveImage(image, path):
image.save(path)
def makeDir(path):
try:
if not os.path.exists(path):
if not os.path.isfile(path):
# os.mkdir(path)
os.makedirs(path)
return 0
else:
return 1
except Exception as e:
print (str(e))
return -2
def save(image, des_path, class_name,file_name):
temp_name=os.path.join(des_path,class_name)
file_name=os.path.join(temp_name,file_name)
CreateDataSet.saveImage(image, file_name)
is_create_test=0
ratio=0.1
def create(path, new_path):
"""
多线程处理事务
:param src_path: 资源文件
:param des_path: 目的地文件
:return:
"""
train_sample_path=os.path.join(new_path, "train")
val_sample_path=os.path.join(new_path, "val")
test_sample_path=os.path.join(new_path, "test")
if (os.path.isdir(new_path)):
if makeDir(train_sample_path) == -1:
print ('create train dir failure')
return -1
if makeDir(val_sample_path) == -1:
print ('create val dir failure')
return -1
if(is_create_test==1):
if makeDir(test_sample_path) == -1:
print ('create test dir failure')
return -1
else:
print ('the input param new_path is not the dir')
return -1
if os.path.isdir(path):
class_names = os.listdir(path)
else:
print ('the input param path is not the dir')
return -1
for name in class_names:
print ("process class name=%s"%name)
tmp_class_name = os.path.join(path, name)
val_sample_num=0;
test_sample_num=0;
val_total_sample_num=0;
test_total_sample_num=0;
if (os.path.isdir(tmp_class_name)):
image_names=os.listdir(tmp_class_name)
total=len(image_names)
li = [i for i in range(total)]
val_total_sample_num=int(total*ratio)
val_name_list,remain_list=CreateDataSet.randNumber(li,val_total_sample_num)
if(is_create_test==1):
test_total_sample_num=int(total*ratio)
test_name_list,remain_list=CreateDataSet.randNumber(remain_list,test_total_sample_num)
#read val sample
if makeDir(os.path.join(val_sample_path,name)) == -1:
print ('create val class dir failure')
return -1
print ("val sample number=%d"%val_total_sample_num)
#print val_name_list
while(val_sample_num<val_total_sample_num):
index=val_name_list[val_sample_num]
temp_img_name=os.path.join(tmp_class_name,image_names[index])
if(os.path.isfile(temp_img_name)):
image = CreateDataSet.openImage(temp_img_name)
save(image,val_sample_path,name,image_names[index])
val_sample_num=val_sample_num+1
#read test sample
if(is_create_test==1):
if makeDir(os.path.join(test_sample_path,name)) == -1:
print ('create test class dir failure')
return -1
print ("test sample number=%d"%test_total_sample_num)
#print test_name_list
while(test_sample_num<test_total_sample_num):
index=test_name_list[test_sample_num]
temp_img_name=os.path.join(tmp_class_name,image_names[index])
if(os.path.isfile(temp_img_name)):
image = CreateDataSet.openImage(temp_img_name)
save(image,test_sample_path,name,image_names[index])
test_sample_num=test_sample_num+1
#read train sample
if makeDir(os.path.join(train_sample_path,name)) == -1:
print ('create train class dir failure')
return -1
print ("train sample number=%d"%len(remain_list))
#print remain_list
for train in remain_list:
temp_img_name=os.path.join(tmp_class_name,image_names[train])
if(os.path.isfile(temp_img_name)):
image = CreateDataSet.openImage(temp_img_name)
save(image,train_sample_path,name,image_names[train])
print ("finish")
if __name__ == '__main__':
create("H:\\Finetuning-500","H:\\Finetuning-500_new")
所得的训练集和验证集放置Finetuning-500_new中。
2.利用gen_train_val_txt.py生成训练集和验证集的图片列表及其对应标签的txt文件(label标签需从0开始);将Finetuning-500_new放置/home/Data文件夹下,并在caffe-master/examples/下新建文件夹Finetuning-500,将gen_train_val_txt.py放置该文件夹下;
#!---* coding: utf-8 --*--
#!/usr/bin/python
import os
class CaffeData:
def __init__(self):
pass
@staticmethod
def create_train_txt(train_data_path,train_txt_path):
txt_path = train_txt_path + '/' + r'train.txt'
f = open(txt_path, "w")
count = 0
for dir_name in os.listdir(val_data_path):
# count=0
for file_name in os.listdir(os.path.join(val_data_path, dir_name)):
# count=count+1
# img_file_path='/'+dir_name+'/'+file_name+" "+dir_name
# img_file_path='/'+dir_name+'/'+file_name
img_file_path = '/' + dir_name + '/' + file_name + " " + str(count)
f.write(img_file_path + '\n')
# print '{} class: {}'.format(dir_name,count)
count = count + 1
@staticmethod
def create_val_txt(val_data_path,val_txt_path):
txt_path=val_txt_path +'/'+r'val.txt'
f=open(txt_path,"w")
count=0
for dir_name in os.listdir(val_data_path):
# count=0
for file_name in os.listdir(os.path.join(val_data_path,dir_name)):
# count=count+1
# img_file_path='/'+dir_name+'/'+file_name+" "+dir_name
# img_file_path='/'+dir_name+'/'+file_name
img_file_path = '/'+dir_name+'/'+file_name+" "+ str(count)
f.write(img_file_path+'\n')
# print '{} class: {}'.format(dir_name,count)
count=count+1
if __name__ == '__main__':
CaffeData.create_train_txt(r'/home/Data/Finetuning-500_new/train',r'/home/yi_miao/caffe-master/examples/Finetuning-500')
CaffeData.create_val_txt(r'/home/Data/Finetuning-500_new/val',r'/home/yi_miao/caffe-master/examples/Finetuning-500')
#finetuning 数据集
生成的训练集和验证集的图片列表及其对应标签的txt文件内容如下:
利用CreateLmdb.sh脚本文件生成训练集和验证集的lmdb数据格式,CreateLmdb.sh位于caffe-master/examples/Finetuning-500下;
#!/bin/sh
CAFFE_ROOT='/***/caffe-master/'
TOOLS=$CAFFE_ROOT/build/tools
EXAMPLE=$CAFFE_ROOT/examples/Finetuning-500
TRAIN_TXT=$EXAMPLE/train.txt
VAL_TXT=$EXAMPLE/val.txt
DATA=$CAFFE_ROOT/data/Finetuning-500
TRAIN_DATA_ROOT=/home/Data/Finetuning-500_new/train
VAL_DATA_ROOT=/home/Data/Finetuning-500_new/val
TRAIN_DATA_LMDB=$DATA/train_lmdb
VAL_DATA_LMDB=$DATA/val_lmdb
IMG_WIDTH=144
IMG_HEIGHT=144
# Set RESIZE=true to resize the images. Leave as false if images have already been resized using another tool.
RESIZE=true
if $RESIZE; then
RESIZE_HEIGHT=$IMG_HEIGHT
RESIZE_WIDTH=$IMG_WIDTH
else
RESIZE_HEIGHT=0
RESIZE_WIDTH=0
fi
if [ -d "$TRAIN_DATA_LMDB" ]; then
rm -rf $TRAIN_DATA_LMDB
fi
if [ -d "$VAL_DATA_LMDB" ]; then
rm -rf $VAL_DATA_LMDB
fi
if [ ! -d "$TRAIN_DATA_ROOT" ]; then
echo "Error: TRAIN_DATA_ROOT is not a path to a directory: $TRAIN_DATA_ROOT"
exit 1
fi
if [ ! -d "$VAL_DATA_ROOT" ]; then
echo "Error: VAL_DATA_ROOT is not a path to a directory: $VAL_DATA_ROOT"
exit 1
fi
echo $RESIZE_HEIGHT
echo $RESIZE_WIDTH
echo "Creating train lmdb..."
GLOG_logtostderr=1 $TOOLS/convert_imageset \
--resize_height=$RESIZE_HEIGHT \
--resize_width=$RESIZE_WIDTH \
--shuffle \
--gray=true \
$TRAIN_DATA_ROOT \
$TRAIN_TXT \
$TRAIN_DATA_LMDB\
echo "Creating val lmdb..."
GLOG_logtostderr=1 $TOOLS/convert_imageset \
--resize_height=$RESIZE_HEIGHT \
--resize_width=$RESIZE_WIDTH \
--shuffle \
--gray=true \
$VAL_DATA_ROOT \
$VAL_TXT \
$VAL_DATA_LMDB
echo "Done"
利用CreateMeanFile.sh脚本文件生成训练集和验证集的lmdb数据格式,CreateMeanFile.sh位于caffe-master/examples/Finetuning-500下;
#!/bin/sh
CAFFE_ROOT='/***/caffe-master'
TOOLS=$CAFFE_ROOT/build/tools
DATA=$CAFFE_ROOT/data/Finetuning-500
EXAMPLE=$CAFFE_ROOT/examples/Finetuning-500
TRAIN_DATA_LMDB=$DATA/train_lmdb
VAL_DATA_LMDB=$DATA/val_lmdb
TRAIN_MEAN=$DATA/train_data_mean.binaryproto
VAL_MEAN=$DATA/val_data_mean.binaryproto
$TOOLS/compute_image_mean $TRAIN_DATA_LMDB \
$TRAIN_MEAN
$TOOLS/compute_image_mean $VAL_DATA_LMDB \
$VAL_MEAN
使用caffe,微调模型
方法一:
1.修改train_test.prototxt和solver.prototxt文件
以Light CNN29网络模型为例,考虑到数据量较少,LCNN29微调过程中,保留前面层的参数,修改最后一层fc2为fc2_500,将最后预测种类设置为500,添加propagate_down:0参数,微调过程中只训练最后一个全连接fc2_500层,保持前面层的参数不变;
layer{
name: "fc2_qh_500" //modify
type: "InnerProduct"
bottom: "eltwise_fc1"
top: "fc2_qh_500" //modify
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
propagate_down:0 //modify
inner_product_param{
num_output: 500 //modify
weight_filler {
type: "xavier"
# type:"gaussian"
# std:0.01
}
bias_filler {
type: "constant"
value: 0.1
}
}
}
问题:
测试通过设置propagate_down参数训练的网络模型,由设置fc2层propagate_down为0微调得到的网络模型,虽然成功识别的数量与原始模型差不多,但失败的数量大大增加,根据资料所说:设置fc2层propagate_down为0,只会微调fc2层的参数,前面层的参数理论上保持不变,因此在利用fc1层特征进行人脸识别时效果应该和原始模型一致,但测试结果差异较大,对该参数的具体作用存在疑问,希望有人能答疑解惑。
方法二
希望C层的参数不会改变,但是C前面的AB层的参数会改变,这种情况,只是固定了C层的参数,C层得到的梯度依然会反向传播给前面的B层。只需要将对应的参数blob的学习率调整为0:在layer里面加上param { lr_mult: 0 decay_mult:0}就可以了,比如全连接层里面,这里设置fc2层的lr_mult和decay_mult参数为0:
layer {
type: "InnerProduct"
param {
# 对应第1个参数blob的配置,也就是全连接层的参数矩阵的配置
lr_mult: 0 # 学习率为0,其他参数可以看caffe.proto里面的ParamSpec这个类型
decay_mult: 0
}
param {
# 对应第2个参数blob的配置,也就是全连接层的偏置项的配置
lr_mult: 0 # 学习率为0
decay_mult: 0
}
对学习率的设置
初始学习率设置为0.001,并以inv方式进行衰减。发现loss逐渐衰减了,但迭代2万多次后,损失函数值又开始上升,需要调小学习率,将初始学习率设为0.0001
net: "/***/caffe-master/examples/Finetuning-500/train_test.prototxt"
test_iter: 580 # 580*16(batchsize)=9280>4028
test_interval:1340 # 1340*64(batchsize)=85760>85743
test_compute_loss: true
base_lr: 0.0001
#lr_policy:"step"
#gamma:0.1
#stepsize:20000
momentum: 0.9
weight_decay: 0.0005
lr_policy: "inv"
gamma: 0.00005
power: 0.75
display: 50
max_iter: 500000
snapshot: 5000
snapshot_prefix: "/***/caffe-master/examples/Finetuning-500/snapshot/"
solver_mode: GPU
#solver_mode: CPU
debug_info: false
后续逐步增加微调层数进行微调,但由于数据量较少,微调较多的层数效果反而不如微调最后两个全连接层。