版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/zhongqianli/article/details/85691361
项目地址:https://github.com/zhongqianli/cifar10_classification.git
opencv3.4的dnn模块已经支持caffe、tensorflow、pytorch等主流深度学习框架训练的模型。
本文用caffe预先在cifar10数据集上训练了resnet56模型。下面讲述如何使用opencv的dnn模块进行图像识别。
dnn模块使用caffe模型
1、通过readNet()函数加载caffe模型
2、读取图像,并将调用blobFromImage,将图像转换为4维的blob,其中mean= cv::Scalar(125, 123, 124)。
3、设置网络的输入数据net.setInput(blob)
4、执行网络推断Mat prob = net.forward()
5、获取推断结果列表中概率最大的索引,得到图像的标签,将标签转换为图像识别结果。对于32x32的rgb图像,在没有GPU的情况下,resnet56推断耗时5ms左右。
代码
#include <iostream>
#include <fstream>
#include <sstream>
#include <opencv2/dnn.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
using namespace cv;
using namespace dnn;
std::vector<std::string> classes;
int main(int argc, char** argv)
{
float scale = 1.0;
Scalar mean = cv::Scalar(125, 123, 124);
bool swapRB = true;
int inpWidth = 32;
int inpHeight = 32;
String model = "../models/cifar10_resnet56_iter_64000.caffemodel";
String config = "../models/cifar10_resnet56_deploy.prototxt";
String framework = "caffe";
// Open file with classes names.
std::string file = "../samples/synset_words.txt";
std::ifstream ifs(file.c_str());
if (!ifs.is_open())
CV_Error(Error::StsError, "File " + file + " not found");
std::string line;
while (std::getline(ifs, line))
{
classes.push_back(line);
}
//! [Read and initialize network]
Net net = readNet(model, config, framework);
net.setPreferableBackend(0);
net.setPreferableTarget(0);
//! [Read and initialize network]
// Create a window
static const std::string kWinName = "Deep learning image classification in OpenCV";
namedWindow(kWinName, WINDOW_NORMAL);
std::vector<cv::String> filename_vec;
cv::String pattern = "../samples/*.jpg";
cv::glob(pattern, filename_vec, true);
// Process frames.
for(int i = 0; i < filename_vec.size(); i++)
{
cv::String filename = filename_vec[i];
Mat frame = cv::imread(filename, cv::IMREAD_COLOR);
if (frame.empty())
{
continue;
}
// cv::resize(frame, frame, cv::Size(32,32));
Mat blob;
//! [Create a 4D blob from a frame]
blobFromImage(frame, blob, scale, Size(inpWidth, inpHeight), mean, swapRB, false);
// blobFromImage(frame, blob, scale, Size(inpWidth, inpHeight));
//! [Create a 4D blob from a frame]
//! [Set input blob]
net.setInput(blob);
//! [Set input blob]
//! [Make forward pass]
Mat prob = net.forward();
//! [Make forward pass]
//! [Get a class with a highest score]
Point classIdPoint;
double confidence;
minMaxLoc(prob.reshape(1, 1), 0, &confidence, 0, &classIdPoint);
int classId = classIdPoint.x;
//! [Get a class with a highest score]
// Put efficiency information.
std::vector<double> layersTimes;
double freq = getTickFrequency() / 1000;
double t = net.getPerfProfile(layersTimes) / freq;
std::string label = format("Inference time: %.2f ms", t);
// putText(frame, label, Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0));
std::cout << label << std::endl;
// Print predicted class.
label = format("%s: %.4f", (classes.empty() ? format("Class #%d", classId).c_str() :
classes[classId].c_str()),
confidence);
// putText(frame, label, Point(0, 40), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0));
std::cout << label << std::endl;
imshow(kWinName, frame);
cv::waitKey(0);
}
return 0;
}