通过前几期的演示,我们可以大概总结出一个深度学习网络成型的一般步骤:
1. 准备数据集
2. 构建网络模型
3. 训练网络模型
4. 评估网络模型
这一期我们再做一个有意思的演示实验,让深度学习网络学习人的面部特征,从面部特征来估计人的年龄。
按照上面总结的一般步骤,首先我们需要一个标有年龄标签的数据集,在网络上找到了一个开放的人脸库 http://www.openu.ac.il/home/hassner/projects/cnn_agegender/ 这个机构是一个以色列的大学。
如果下载的比较慢可以直接这个地址下载 https://pan.baidu.com/s/1dE5PrHJ 里面的 AdienceBenchmarkOfUnfilteredFacesForGenderAndAgeClassification 文件夹为数据集文件。网络模型我们还是利用 Inception-V3 的基础结构进行重新训练,最后我们通过 Jupyter 来演示训练后的网络的识别效果。
具体的训练过程和代码都参考了这个链接 https://github.com/dpressel/rude-carnie
演示代码的下载地址 https://github.com/aggresss/GPUDemo
对应演示文件 https://github.com/aggresss/GPUDemo/blob/master/rude-carnie/facial_feature.ipynb
演示代码使用已经训练好的模型,以 checkpoint 形式保存,https://pan.baidu.com/s/1dE5PrHJ 里面的22801文件夹复制到 GPUDemo 目录下 然后运行下面 Jupyter 文件 就可以看到演示结果。
%matplotlib inline
import numpy as np
from matplotlib import pyplot as plt
import cv2
from guess import *
FLAGS_model_dir = "../22801"
FLAGS_class_type = "age"
FLAGS_device_id= "/gpu:0"
FLAGS_filename = "../italy_worldcup.jpg"
FLAGS_checkpoint = "checkpoint"
FLAGS_model_type = "inception"
FLAGS_requested_step = ""
FLAGS_face_detection_model = "../haarcascade_frontalface_default.xml"
FLAGS_face_detection_type= "cascade"
def classify_one_multi_crops(sess, label_list, softmax_output, coder, images, image_file):
try:
image_batch = make_multi_crop_batch(image_file, coder)
batch_results = sess.run(softmax_output, feed_dict={images:image_batch.eval()})
output = batch_results[0]
batch_sz = batch_results.shape[0]
for i in range(1, batch_sz):
output = output + batch_results[i]
output /= batch_sz
best = np.argmax(output)
best_choice = (label_list[best], output[best])
print('Guess @ 1 %s, prob = %.2f' % best_choice)
return label_list[best]
except Exception as e:
print(e)
print('Failed to run image %s ' % image_file)
def age_guess():
files = []
if FLAGS_face_detection_model:
print('Using face detector (%s) %s' % (FLAGS_face_detection_type, FLAGS_face_detection_model))
face_detect = face_detection_model(FLAGS_face_detection_type, FLAGS_face_detection_model)
face_files, rectangles = face_detect.run(FLAGS_filename)
print(face_files)
files += face_files
config = tf.ConfigProto(allow_soft_placement=True)
with tf.Session(config=config) as sess:
label_list = AGE_LIST if FLAGS_class_type == 'age' else GENDER_LIST
nlabels = len(label_list)
print('Executing on %s' % FLAGS_device_id)
model_fn = select_model(FLAGS_model_type)
with tf.device(FLAGS_device_id):
images = tf.placeholder(tf.float32, [None, RESIZE_FINAL, RESIZE_FINAL, 3])
logits = model_fn(nlabels, images, 1, False)
init = tf.global_variables_initializer()
requested_step = FLAGS_requested_step if FLAGS_requested_step else None
checkpoint_path = '%s' % (FLAGS_model_dir)
model_checkpoint_path, global_step = get_checkpoint(checkpoint_path, requested_step, FLAGS_checkpoint)
saver = tf.train.Saver()
saver.restore(sess, model_checkpoint_path)
softmax_output = tf.nn.softmax(logits)
coder = ImageCoder()
# Support a batch mode if no face detection model
if len(files) == 0:
if (os.path.isdir(FLAGS_filename)):
for relpath in os.listdir(FLAGS_filename):
abspath = os.path.join(FLAGS_filename, relpath)
if os.path.isfile(abspath) and any([abspath.endswith('.' + ty) for ty in ('jpg', 'png', 'JPG', 'PNG', 'jpeg')]):
print(abspath)
files.append(abspath)
else:
files.append(FLAGS_filename)
# If it happens to be a list file, read the list and clobber the files
if any([FLAGS_filename.endswith('.' + ty) for ty in ('csv', 'tsv', 'txt')]):
files = list_images(FLAGS_filename)
image_files = list(filter(lambda x: x is not None, [resolve_file(f) for f in files]))
age_labels = []
for image_file in image_files:
age_labels.append(classify_one_multi_crops(sess, label_list, softmax_output, coder, images, image_file))
font = cv2.FONT_HERSHEY_SIMPLEX
face_cascade = cv2.CascadeClassifier('../haarcascade_frontalface_default.xml')
img = cv2.imread(FLAGS_filename)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
faces = face_cascade.detectMultiScale(gray, 1.3, 5)
for index in range(faces.shape[0]):
(x,y,w,h) = faces[index]
# draw rectangle
cv2.rectangle(img,(x,y),(x+w,y+h),(0,255,0),2)
# draw index
cv2.putText(img, age_labels[index], (x-10,y-5), font, 0.75, (0,255,0), 2, cv2.LINE_AA)
roi_gray = gray[y:y+h, x:x+w]
roi_color = img[y:y+h, x:x+w]
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
plt.figure(dpi=150)
plt.imshow(img, cmap = 'gray', interpolation = 'bicubic')
plt.xticks([]), plt.yticks([]) # to hide tick values on X and Y axis
plt.show()
age_guess()
下面的图片是输出图片,可以看到,由于网络训练方式和数据集的质量因素,我们训练的神经网络对年龄的估计有明显的偏差,可以通过后期的不断调优(fine-tune) 来减小误差。