前言

本教程基于OpenCV3.3.1或以上版本（如OpenCV3.4）、DNN模块和face_detector示例实现简单、快速的人脸检测。

主要参考Face detection with OpenCV and deep learning这个英文教程，并作部分修改。

注：亲测OpenCV3.3.0及以下版本，并没有face_detector示例，且不支持face_detector。为了避免折腾，还是建议使用OpenCV3.3.1及以上（如OpenCV3.4）。

1 face_detector简介

face_detector示例链接：https://github.com/opencv/opencv/tree/master/samples/dnn/face_detector

当电脑配置好OpenCV3.3.1或以上版本时，在opencv\samples\dnn也可以找到face_detector示例文件夹，如下图所示：

使用OpenCV的DNN模块以及Caffe模型，必须要有.prototxt和.caffemodel两种文件。但face_detector文件夹中，只有.prototxt一类文件，即缺少训练好的.caffemodel。.prototxt和.caffemodel的作用如下：

The .prototxt file(s) which define the model architecture (i.e., the layers themselves)
The .caffemodel file which contains the weights for the actual layers

face_detector文件分析：

deploy.prototxt：调用.caffemodel时的测试网络文件
how_to_train_face_detector.txt：如何使用自定义数据集来训练网络的说明
solver.prototxt：超参数文件
test.prototxt：测试网络文件
train.prototxt：训练网络文件

本教程直接使用训练好的.caffemodel来进行人脸检测，即只需要.caffemodel和deploy.prototxt两个文件。

如果想要使用自己的数据集来训练网络，请参考"how_to_train_face_detector.txt"。

2 ResNet-10和SSD简介

本教程属于实战篇，故不深入介绍算法内容，若对ResNet和SSD感兴趣的同学，可以参考下述链接进行学习

[1]ResNet paper：https://arxiv.org/abs/1512.03385

[2]ResNet in Caffe：https://github.com/soeaver/caffe-model/tree/master/cls/resnet

[3]SSD paper：https://arxiv.org/abs/1512.02325

[4]SSD in Caffe：https://github.com/weiliu89/caffe/tree/ssd

3 .caffemodel下载

res10_300x300_ssd_iter_140000.caffemodel下载链接：https://anonfile.com/W7rdG4d0b1/face_detector.rar

4 C++版代码

4.1 图像中的人脸检测

对于OpenCV3.4版本，可直接使用opencv-3.4.1\samples\dnn文件夹中的resnet_ssd_face.cpp；

对于OpenCV3.3.1版本，可参考下述代码（自己写的）：

face_detector_image.cpp

// Summary: 使用OpenCV3.3.1中的face_detector对图像进行人脸识别
// Author: Amusi
// Date:   2018-02-28

#include <iostream>
#include <opencv2/opencv.hpp>
#include <opencv2/dnn.hpp>

using namespace std;
using namespace cv;
using namespace cv::dnn;

// Set the size of image and meanval
const size_t inWidth = 300;
const size_t inHeight = 300;
const double inScaleFactor = 1.0;
const Scalar meanVal(104.0, 177.0, 123.0);

int main(int argc, char** argv)
{
	// Load image
	Mat img;
	// Use commandline
#if 0
	if (argc < 2)
	{
		cerr<< "please input "<< endl;
		cerr << "[Format]face_detector_img.exe image.jpg"<< endl;
		return -1;
	}
	img = imread(argv[1]);
#else
	// Not use commandline
	img = imread("iron_chic.jpg");
#endif

	// Initialize Caffe network
	float min_confidence = 0.5;
	String modelConfiguration = "face_detector/deploy.prototxt";
	String modelBinary = "face_detector/res10_300x300_ssd_iter_140000.caffemodel";
	dnn::Net net = readNetFromCaffe(modelConfiguration, modelBinary);
	if (net.empty())
	{
		cerr << "Can't load network by using the following files: " << endl;
		cerr << "prototxt:   " << modelConfiguration << endl;
		cerr << "caffemodel: " << modelBinary << endl;
		cerr << "Models are available here:" << endl;
		cerr << "<OPENCV_SRC_DIR>/samples/dnn/face_detector" << endl;
		cerr << "or here:" << endl;
		cerr << "https://github.com/opencv/opencv/tree/master/samples/dnn/face_detector" << endl;
		exit(-1);
	}

	// Prepare blob
	Mat inputBlob = blobFromImage(img, inScaleFactor, Size(inWidth, inHeight), meanVal, false, false);
	net.setInput(inputBlob, "data");	// set the network input
	Mat detection = net.forward("detection_out");	// compute output

	// Calculate and display time and frame rate
	vector<double> layersTimings;
	double freq = getTickFrequency() / 1000;
	double time = net.getPerfProfile(layersTimings) / freq;

	Mat detectionMat(detection.size[2], detection.size[3], CV_32F, detection.ptr<float>());

	ostringstream ss;
	ss << "FPS: " << 1000 / time << " ; time: " << time << "ms" << endl;
	putText(img, ss.str(), Point(20,20), 0, 0.5, Scalar(0, 0, 255));

	// 
	float confidenceThreshold = min_confidence;
	for (int i = 0; i < detectionMat.rows; ++i)
	{
		// judge confidence
		float confidence = detectionMat.at<float>(i, 2);
		if (confidence > confidenceThreshold)
		{
			int xLeftBottom = static_cast<int>(detectionMat.at<float>(i, 3) * img.cols);
			int yLeftBottom = static_cast<int>(detectionMat.at<float>(i, 4) * img.rows);
			int xRightTop = static_cast<int>(detectionMat.at<float>(i, 5) * img.cols);
			int yRightTop = static_cast<int>(detectionMat.at<float>(i, 6) * img.rows);

			Rect object((int)xLeftBottom, (int)yLeftBottom,
				(int)(xRightTop - xLeftBottom),
				(int)(yRightTop - yLeftBottom));

			rectangle(img, object, Scalar(0, 255, 0));

			ss.str("");
			ss << confidence;
			String conf(ss.str());
			String label = "Face: " + conf;
			int baseLine = 0;
			Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
			rectangle(img, Rect(Point(xLeftBottom, yLeftBottom-labelSize.height), 
				Size(labelSize.width, labelSize.height + baseLine)), 
				Scalar(255, 255, 255), CV_FILLED);
			putText(img, label, Point(xLeftBottom, yLeftBottom), 
				FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 0, 0));


		}
	}

	namedWindow("Face Detection", WINDOW_NORMAL);
	imshow("Face Detection", img);
	waitKey(0);

	return 0;
}

检测结果

4.2 摄像头/视频中的人脸检测

face_detector_video.cpp

// Summary: 使用OpenCV3.3.1中的face_detector
// Author: Amusi
// Date:   2018-02-28
// Reference: http://blog.csdn.net/minstyrain/article/details/78907425

#include <iostream>  
#include <cstdlib>  
#include <stdio.h>
#include <opencv2/opencv.hpp>
#include <opencv2/dnn.hpp>
#include <opencv2/dnn/shape_utils.hpp>

using namespace cv;  
using namespace cv::dnn;  
using namespace std;  
const size_t inWidth = 300;  
const size_t inHeight = 300;  
const double inScaleFactor = 1.0;  
const Scalar meanVal(104.0, 177.0, 123.0);  
  
int main(int argc, char** argv)  
{  
    float min_confidence = 0.5;  
    String modelConfiguration = "face_detector/deploy.prototxt";  
    String modelBinary = "face_detector/res10_300x300_ssd_iter_140000.caffemodel";  
    //! [Initialize network]  
    dnn::Net net = readNetFromCaffe(modelConfiguration, modelBinary);  
    //! [Initialize network]  
    if (net.empty())  
    {  
        cerr << "Can't load network by using the following files: " << endl;  
        cerr << "prototxt:   " << modelConfiguration << endl;  
        cerr << "caffemodel: " << modelBinary << endl;  
        cerr << "Models are available here:" << endl;  
        cerr << "<OPENCV_SRC_DIR>/samples/dnn/face_detector" << endl;  
        cerr << "or here:" << endl;  
        cerr << "https://github.com/opencv/opencv/tree/master/samples/dnn/face_detector" << endl;  
        exit(-1);  
    }  
  
    VideoCapture cap(0);  
    if (!cap.isOpened())  
    {  
        cout << "Couldn't open camera : " << endl;  
        return -1;  
    }  
    for (;;)  
    {  
        Mat frame;  
        cap >> frame; // get a new frame from camera/video or read image  
  
        if (frame.empty())  
        {  
            waitKey();  
            break;  
        }  
  
        if (frame.channels() == 4)  
            cvtColor(frame, frame, COLOR_BGRA2BGR);  
  
        //! [Prepare blob]  
        Mat inputBlob = blobFromImage(frame, inScaleFactor,  
            Size(inWidth, inHeight), meanVal, false, false); //Convert Mat to batch of images  
                                                             //! [Prepare blob]  
  
                                                             //! [Set input blob]  
        net.setInput(inputBlob, "data"); //set the network input  
                                         //! [Set input blob]  
  
                                         //! [Make forward pass]  
        Mat detection = net.forward("detection_out"); //compute output  
                                                      //! [Make forward pass]  
  
        vector<double> layersTimings;  
        double freq = getTickFrequency() / 1000;  
        double time = net.getPerfProfile(layersTimings) / freq;  
  
        Mat detectionMat(detection.size[2], detection.size[3], CV_32F, detection.ptr<float>());  
  
        ostringstream ss;  
        ss << "FPS: " << 1000 / time << " ; time: " << time << " ms";  
        putText(frame, ss.str(), Point(20, 20), 0, 0.5, Scalar(0, 0, 255));  
  
        float confidenceThreshold = min_confidence;  
        for (int i = 0; i < detectionMat.rows; i++)  
        {  
            float confidence = detectionMat.at<float>(i, 2);  
  
            if (confidence > confidenceThreshold)  
            {  
                int xLeftBottom = static_cast<int>(detectionMat.at<float>(i, 3) * frame.cols);  
                int yLeftBottom = static_cast<int>(detectionMat.at<float>(i, 4) * frame.rows);  
                int xRightTop = static_cast<int>(detectionMat.at<float>(i, 5) * frame.cols);  
                int yRightTop = static_cast<int>(detectionMat.at<float>(i, 6) * frame.rows);  
  
                Rect object((int)xLeftBottom, (int)yLeftBottom,  
                    (int)(xRightTop - xLeftBottom),  
                    (int)(yRightTop - yLeftBottom));  
  
                rectangle(frame, object, Scalar(0, 255, 0));  
  
                ss.str("");  
                ss << confidence;  
                String conf(ss.str());  
                String label = "Face: " + conf;  
                int baseLine = 0;  
                Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);  
                rectangle(frame, Rect(Point(xLeftBottom, yLeftBottom - labelSize.height),  
                    Size(labelSize.width, labelSize.height + baseLine)),  
                    Scalar(255, 255, 255), CV_FILLED);  
                putText(frame, label, Point(xLeftBottom, yLeftBottom),  
                    FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 0, 0));  
            }  
        }  
        cv::imshow("detections", frame);  
        if (waitKey(1) >= 0) break;  
    }  
    return 0;  
}

检测结果

5 Python版本代码

最简单安装Python版的OpenCV方法

pip install opencv-contrib-python

对于OpenCV3.4版本，可直接使用opencv-3.4.1\samples\dnn文件夹中的resnet_ssd_face_python.py；

对于OpenCV3.3.1版本，可参考下述代码（自己写的）：

5.1 图像中的人脸检测

detect_faces.py

# USAGE
# python detect_faces.py --image rooster.jpg --prototxt deploy.prototxt.txt --model res10_300x300_ssd_iter_140000.caffemodel

# import the necessary packages
import numpy as np
import argparse
import cv2

# construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image", required=True,
	help="path to input image")
ap.add_argument("-p", "--prototxt", required=True,
	help="path to Caffe 'deploy' prototxt file")
ap.add_argument("-m", "--model", required=True,
	help="path to Caffe pre-trained model")
ap.add_argument("-c", "--confidence", type=float, default=0.5,
	help="minimum probability to filter weak detections")
args = vars(ap.parse_args())

# load our serialized model from disk
print("[INFO] loading model...")
net = cv2.dnn.readNetFromCaffe(args["prototxt"], args["model"])

# load the input image and construct an input blob for the image
# by resizing to a fixed 300x300 pixels and then normalizing it
image = cv2.imread(args["image"])
(h, w) = image.shape[:2]
blob = cv2.dnn.blobFromImage(cv2.resize(image, (300, 300)), 1.0,
	(300, 300), (104.0, 177.0, 123.0))

# pass the blob through the network and obtain the detections and
# predictions
print("[INFO] computing object detections...")
net.setInput(blob)
detections = net.forward()

# loop over the detections
for i in range(0, detections.shape[2]):
	# extract the confidence (i.e., probability) associated with the
	# prediction
	confidence = detections[0, 0, i, 2]

	# filter out weak detections by ensuring the `confidence` is
	# greater than the minimum confidence
	if confidence > args["confidence"]:
		# compute the (x, y)-coordinates of the bounding box for the
		# object
		box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
		(startX, startY, endX, endY) = box.astype("int")
 
		# draw the bounding box of the face along with the associated
		# probability
		text = "{:.2f}%".format(confidence * 100)
		y = startY - 10 if startY - 10 > 10 else startY + 10
		cv2.rectangle(image, (startX, startY), (endX, endY),
			(0, 0, 255), 2)
		cv2.putText(image, text, (startX, y),
			cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 0, 255), 2)

# show the output image
cv2.imshow("Output", image)
cv2.waitKey(0)

打开cmd命令提示符，切换至路径下，输入下述命令：

python detect_faces.py --image rooster.jpg --prototxt deploy.prototxt.txt --model res10_300x300_ssd_iter_140000.caffemodel

运行结果：

python detect_faces.py --image iron_chic.jpg --prototxt deploy.prototxt.txt --model res10_300x300_ssd_iter_140000.caffemodel

运行结果：

5.2 摄像头/视频中的人脸检测

detect_faces_video.py

# USAGE
# python detect_faces_video.py --prototxt deploy.prototxt.txt --model res10_300x300_ssd_iter_140000.caffemodel

# import the necessary packages
from imutils.video import VideoStream
import numpy as np
import argparse
import imutils
import time
import cv2

# construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-p", "--prototxt", required=True,
	help="path to Caffe 'deploy' prototxt file")
ap.add_argument("-m", "--model", required=True,
	help="path to Caffe pre-trained model")
ap.add_argument("-c", "--confidence", type=float, default=0.5,
	help="minimum probability to filter weak detections")
args = vars(ap.parse_args())

# load our serialized model from disk
print("[INFO] loading model...")
net = cv2.dnn.readNetFromCaffe(args["prototxt"], args["model"])

# initialize the video stream and allow the cammera sensor to warmup
print("[INFO] starting video stream...")
vs = VideoStream(src=0).start()
time.sleep(2.0)

# loop over the frames from the video stream
while True:
	# grab the frame from the threaded video stream and resize it
	# to have a maximum width of 400 pixels
	frame = vs.read()
	frame = imutils.resize(frame, width=400)
 
	# grab the frame dimensions and convert it to a blob
	(h, w) = frame.shape[:2]
	blob = cv2.dnn.blobFromImage(cv2.resize(frame, (300, 300)), 1.0,
		(300, 300), (104.0, 177.0, 123.0))
 
	# pass the blob through the network and obtain the detections and
	# predictions
	net.setInput(blob)
	detections = net.forward()

	# loop over the detections
	for i in range(0, detections.shape[2]):
		# extract the confidence (i.e., probability) associated with the
		# prediction
		confidence = detections[0, 0, i, 2]

		# filter out weak detections by ensuring the `confidence` is
		# greater than the minimum confidence
		if confidence < args["confidence"]:
			continue

		# compute the (x, y)-coordinates of the bounding box for the
		# object
		box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
		(startX, startY, endX, endY) = box.astype("int")
 
		# draw the bounding box of the face along with the associated
		# probability
		text = "{:.2f}%".format(confidence * 100)
		y = startY - 10 if startY - 10 > 10 else startY + 10
		cv2.rectangle(frame, (startX, startY), (endX, endY),
			(0, 0, 255), 2)
		cv2.putText(frame, text, (startX, y),
			cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 0, 255), 2)

	# show the output frame
	cv2.imshow("Frame", frame)
	key = cv2.waitKey(1) & 0xFF
 
	# if the `q` key was pressed, break from the loop
	if key == ord("q"):
		break

# do a bit of cleanup
cv2.destroyAllWindows()
vs.stop()

打开cmd命令提示符，切换至路径下，输入下述命令：