官方示例
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include "net.h"
int main()
{
cv::Mat img = cv::imread("image.ppm", CV_LOAD_IMAGE_GRAYSCALE);
int w = img.cols;
int h = img.rows;
// subtract 128, norm to -1 ~ 1
ncnn::Mat in = ncnn::Mat::from_pixels_resize(img.data, ncnn::Mat::PIXEL_GRAY, w, h, 60, 60);
float mean[1] = {
128.f };
float norm[1] = {
1/128.f };
in.substract_mean_normalize(mean, norm);
ncnn::Net net;
net.load_param("model.param");
net.load_model("model.bin");
ncnn::Extractor ex = net.create_extractor();
ex.set_light_mode(true);
ex.set_num_threads(4);
ex.input("data", in);
ncnn::Mat feat;
ex.extract("output", feat);
return 0;
}
加载和卸载模型
ncnn::Net net;
net.load_param("alexnet.param");
net.load_model("alexnet.bin");
卸载网络结构和权重数据
net.clear();
从外部存储器内存指针加载网络结构
int load_param(const char *protopath)
从模型文件加载网络权重数据如果成功返回 0
int load_model(const char *modelpath)
数据预处理
Function from_pixels_resize
#include <src/mat.h>
(1) static Mat from_pixels_resize(const unsigned char *pixels, int type, int w, int h, int target_width, int target_height, Allocator *allocator=0)
(2) static Mat from_pixels_resize(const unsigned char *pixels, int type, int w, int h, int stride, int target_width, int target_height, Allocator *allocator=0)
重载函数1:方便地从像素数据构造并调整到特定大小
static Mat from_pixels_resize(const unsigned char *pixels, int type, int w, int h, int target_width, int target_height, Allocator *allocator=0)
Mat Mat::from_pixels_resize(const unsigned char* pixels, int type, int w, int h, int target_width, int target_height, Allocator* allocator)
{
int type_from = type & PIXEL_FORMAT_MASK;
if (type_from == PIXEL_RGB || type_from == PIXEL_BGR)
{
return Mat::from_pixels_resize(pixels, type, w, h, w * 3, target_width, target_height, allocator);
}
else if (type_from == PIXEL_GRAY)
{
return Mat::from_pixels_resize(pixels, type, w, h, w * 1, target_width, target_height, allocator);
}
else if (type_from == PIXEL_RGBA || type_from == PIXEL_BGRA)
{
return Mat::from_pixels_resize(pixels, type, w, h, w * 4, target_width, target_height, allocator);
}
// unknown convert type
NCNN_LOGE("unknown convert type %d", type);
return Mat();
}
重载函数2:方便地从像素数据构造并使用 stride(bytes-per-row) 参数调整到特定大小
static Mat from_pixels_resize(const unsigned char *pixels, int type, int w, int h, int stride, int target_width, int target_height, Allocator *allocator=0)
Mat Mat::from_pixels_resize(const unsigned char* pixels, int type, int w, int h, int stride, int target_width, int target_height, Allocator* allocator)
{
if (w == target_width && h == target_height)
return Mat::from_pixels(pixels, type, w, h, stride, allocator);
int type_from = type & PIXEL_FORMAT_MASK;
if (type_from == PIXEL_RGB || type_from == PIXEL_BGR)
{
Mat dst(target_width, target_height, (size_t)3u, 3);
resize_bilinear_c3(pixels, w, h, stride, dst, target_width, target_height, target_width * 3);
return Mat::from_pixels(dst, type, target_width, target_height, allocator);
}
else if (type_from == PIXEL_GRAY)
{
Mat dst(target_width, target_height, (size_t)1u, 1);
resize_bilinear_c1(pixels, w, h, stride, dst, target_width, target_height, target_width * 1);
return Mat::from_pixels(dst, type, target_width, target_height, allocator);
}
else if (type_from == PIXEL_RGBA || type_from == PIXEL_BGRA)
{
Mat dst(target_width, target_height, (size_t)4u, 4);
resize_bilinear_c4(pixels, w, h, stride, dst, target_width, target_height, target_width * 4);
return Mat::from_pixels(dst, type, target_width, target_height, allocator);
}
// unknown convert type
NCNN_LOGE("unknown convert type %d", type);
return Mat();
}
Function copy_make_border
图像pad
操作
void copy_make_border(const Mat &src, Mat &dst, int top, int bottom, int left, int right, int type, float v, const Option &opt)
void copy_make_border(const Mat& src, Mat& dst, int top, int bottom, int left, int right, int type, float v, const Option& opt)
{
Layer* padding = create_layer(LayerType::Padding);
ParamDict pd;
pd.set(0, top);
pd.set(1, bottom);
pd.set(2, left);
pd.set(3, right);
pd.set(4, type);
pd.set(5, v);
padding->load_param(pd);
padding->create_pipeline(opt);
padding->forward(src, dst, opt);
padding->destroy_pipeline(opt);
delete padding;
}
归一化操作
void substract_mean_normalize(const float* mean_vals, const float* norm_vals);
void Mat::substract_mean_normalize(const float* mean_vals, const float* norm_vals)
{
Layer* op;
if (mean_vals && !norm_vals)
{
// substract mean only
op = create_layer(LayerType::Bias);
ParamDict pd;
pd.set(0, c);
op->load_param(pd);
Mat weights[1];
weights[0] = Mat(c);
for (int q = 0; q < c; q++)
{
weights[0][q] = -mean_vals[q];
}
op->load_model(ModelBinFromMatArray(weights));
}
else if (!mean_vals && norm_vals)
{
// normalize only
op = create_layer(LayerType::Scale);
ParamDict pd;
pd.set(0, c);
op->load_param(pd);
Mat weights[1];
weights[0] = Mat(c);
for (int q = 0; q < c; q++)
{
weights[0][q] = norm_vals[q];
}
op->load_model(ModelBinFromMatArray(weights));
}
else if (mean_vals && norm_vals)
{
// substract mean and normalize
op = create_layer(LayerType::Scale);
ParamDict pd;
pd.set(0, c);
pd.set(1, 1);
op->load_param(pd);
Mat weights[2];
weights[0] = Mat(c);
weights[1] = Mat(c);
for (int q = 0; q < c; q++)
{
weights[0][q] = norm_vals[q];
weights[1][q] = -mean_vals[q] * norm_vals[q];
}
op->load_model(ModelBinFromMatArray(weights));
}
else // if (!mean_vals && !norm_vals)
{
return;
}
Option opt;
opt.num_threads = 1; // TODO
op->create_pipeline(opt);
op->forward_inplace(*this, opt);
op->destroy_pipeline(opt);
delete op;
}
前向推理
从网络构造一个提取器
Extractor Net::create_extractor() const
{
return Extractor(this, d->blobs.size());
}
Extractor::Extractor(const Net* _net, size_t blob_count)
: d(new ExtractorPrivate(_net))
{
d->blob_mats.resize(blob_count);
d->opt = d->net->opt;
#if NCNN_VULKAN
if (d->net->opt.use_vulkan_compute)
{
d->local_blob_vkallocator = 0;
d->local_staging_vkallocator = 0;
d->blob_mats_gpu.resize(blob_count);
d->blob_mats_gpu_image.resize(blob_count);
}
#endif // NCNN_VULKAN
}
设置多线程
void Extractor::set_num_threads(int num_threads)
{
d->opt.num_threads = num_threads;
}
按blob名称设置输入,如果成功则返回0
int input(const char* blob_name, const Mat& in);
源码src/net.cpp会调用find_blob_index_by_name
获得blob_indexf
,然后根据blob_index
去获得网络输出
int Extractor::input(const char* blob_name, const Mat& in)
{
int blob_index = d->net->find_blob_index_by_name(blob_name);
if (blob_index == -1)
{
NCNN_LOGE("Try");
const std::vector<const char*>& input_names = d->net->input_names();
for (size_t i = 0; i < input_names.size(); i++)
{
NCNN_LOGE(" ex.input(\"%s\", in%d);", input_names[i], (int)i);
}
return -1;
}
return input(blob_index, in);
}
通过blob索引设置输入如果成功则返回0
int input(int blob_index, const Mat &in)
int Extractor::input(int blob_index, const Mat& in)
{
if (blob_index < 0 || blob_index >= (int)d->blob_mats.size())
return -1;
d->blob_mats[blob_index] = in;
return 0;
}
通过blob名称获取结果,如果成功则返回0
// type = 0, default
// type = 1, do not convert fp16/bf16 or / and packing
int extract(const char* blob_name, Mat& feat, int type = 0);
源码src/net.cpp里会调用find_blob_index_by_name
获得blob_index
,然后根据blob_index
去获得网络输出
int Extractor::extract(const char* blob_name, Mat& feat, int type)
{
int blob_index = d->net->find_blob_index_by_name(blob_name);
if (blob_index == -1)
{
NCNN_LOGE("Try");
const std::vector<const char*>& output_names = d->net->output_names();
for (size_t i = 0; i < output_names.size(); i++)
{
NCNN_LOGE(" ex.extract(\"%s\", out%d);", output_names[i], (int)i);
}
return -1;
}
return extract(blob_index, feat, type);
}
通过blob索引获取结果返回0
// type = 0, default
// type = 1, do not convert fp16/bf16 or / and packing
int extract(int blob_index, Mat &feat, int type=0)
int Extractor::extract(int blob_index, Mat& feat, int type)
{
if (blob_index < 0 || blob_index >= (int)d->blob_mats.size())
return -1;
int old_blocktime = get_kmp_blocktime();
set_kmp_blocktime(d->opt.openmp_blocktime);
int old_flush_denormals = get_flush_denormals();
set_flush_denormals(d->opt.flush_denormals);
int ret = 0;
if (d->blob_mats[blob_index].dims == 0)
{
int layer_index = d->net->blobs()[blob_index].producer;
// use local allocator
if (d->opt.use_local_pool_allocator)
{
if (!d->opt.blob_allocator)
{
d->opt.blob_allocator = d->net->d->local_blob_allocator;
}
if (!d->opt.workspace_allocator)
{
d->opt.workspace_allocator = d->net->d->local_workspace_allocator;
}
}
#if NCNN_VULKAN
if (d->opt.use_vulkan_compute)
{
// use local allocator
if (!d->opt.blob_vkallocator)
{
d->local_blob_vkallocator = d->net->vulkan_device()->acquire_blob_allocator();
d->opt.blob_vkallocator = d->local_blob_vkallocator;
}
if (!d->opt.workspace_vkallocator)
{
d->opt.workspace_vkallocator = d->opt.blob_vkallocator;
}
if (!d->opt.staging_vkallocator)
{
d->local_staging_vkallocator = d->net->vulkan_device()->acquire_staging_allocator();
d->opt.staging_vkallocator = d->local_staging_vkallocator;
}
ncnn::VkCompute cmd(d->net->vulkan_device());
#if NCNN_BENCHMARK
cmd.create_query_pool(d->net->layers().size() * 2);
#endif // NCNN_BENCHMARK
// TODO vkimagemat for adreno
if (d->opt.use_image_storage)
{
VkImageMat feat_gpu;
ret = extract(blob_index, feat_gpu, cmd);
if (d->blob_mats[blob_index].dims == 0 && feat_gpu.dims != 0)
{
cmd.record_download(feat_gpu, d->blob_mats[blob_index], d->opt);
cmd.submit_and_wait();
#if NCNN_BENCHMARK
std::vector<uint64_t> results(d->net->layers().size() * 2);
cmd.get_query_pool_results(0, d->net->layers().size() * 2, results);
for (size_t i = 0; i < d->net->layers().size(); i++)
{
uint64_t start = results[i * 2];
uint64_t end = results[i * 2 + 1];
if (start == 0 || end == 0)
continue;
double duration_us = (end - start) * d->net->vulkan_device()->info.timestamp_period() / 1000;
NCNN_LOGE("%-24s %-30s %8.2lfus |", d->net->layers()[i]->type.c_str(), d->net->layers()[i]->name.c_str(), duration_us);
}
#endif // NCNN_BENCHMARK
}
}
else
{
VkMat feat_gpu;
ret = extract(blob_index, feat_gpu, cmd);
if (d->blob_mats[blob_index].dims == 0 && feat_gpu.dims != 0)
{
cmd.record_download(feat_gpu, d->blob_mats[blob_index], d->opt);
cmd.submit_and_wait();
#if NCNN_BENCHMARK
std::vector<uint64_t> results(d->net->layers().size() * 2);
cmd.get_query_pool_results(0, d->net->layers().size() * 2, results);
for (size_t i = 0; i < d->net->layers().size(); i++)
{
uint64_t start = results[i * 2];
uint64_t end = results[i * 2 + 1];
if (start == 0 || end == 0)
continue;
double duration_us = (end - start) * d->net->vulkan_device()->info.timestamp_period() / 1000;
NCNN_LOGE("%-24s %-30s %8.2lfus |", d->net->layers()[i]->type.c_str(), d->net->layers()[i]->name.c_str(), duration_us);
}
#endif // NCNN_BENCHMARK
}
}
}
else
{
ret = d->net->d->forward_layer(layer_index, d->blob_mats, d->opt);
}
#else
ret = d->net->d->forward_layer(layer_index, d->blob_mats, d->opt);
#endif // NCNN_VULKAN
}
feat = d->blob_mats[blob_index];
if (d->opt.use_packing_layout && (type == 0) && feat.elempack != 1)
{
Mat bottom_blob_unpacked;
convert_packing(feat, bottom_blob_unpacked, 1, d->opt);
feat = bottom_blob_unpacked;
}
// clang-format off
// *INDENT-OFF*
#if NCNN_ARM82
if (d->opt.use_fp16_storage && cpu_support_arm_asimdhp() && (type == 0))
{
if (feat.elembits() == 16)
{
Mat feat_fp32;
cast_float16_to_float32(feat, feat_fp32, d->opt);
feat = feat_fp32;
}
}
else
#endif // NCNN_ARM82
#if NCNN_BF16
if (d->opt.use_bf16_storage && (type == 0))
{
if (feat.elembits() == 16)
{
Mat feat_fp32;
cast_bfloat16_to_float32(feat, feat_fp32, d->opt);
feat = feat_fp32;
}
}
else
#endif // NCNN_BF16
if (feat.elembits() == 8 && (type == 0))
{
Mat feat_fp32;
cast_int8_to_float32(feat, feat_fp32, d->opt);
feat = feat_fp32;
}
// *INDENT-ON*
// clang-format on
if (d->opt.use_local_pool_allocator && feat.allocator == d->net->d->local_blob_allocator)
{
// detach the returned mat from local pool allocator
// so we could destroy net instance much earlier
feat = feat.clone();
}
set_kmp_blocktime(old_blocktime);
set_flush_denormals(old_flush_denormals);
return ret;
}
其余常见操作
分配图像
// allocate image
void create(int w, int h, size_t elemsize = 4u, Allocator* allocator = 0);
void Mat::create(int _w, int _h, size_t _elemsize, Allocator* _allocator)
{
if (dims == 2 && w == _w && h == _h && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
return;
release();
elemsize = _elemsize;
elempack = 1;
allocator = _allocator;
dims = 2;
w = _w;
h = _h;
d = 1;
c = 1;
cstep = (size_t)w * h;
if (total() > 0)
{
size_t totalsize = alignSize(total() * elemsize, 4);
if (allocator)
data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount));
else
data = fastMalloc(totalsize + (int)sizeof(*refcount));
refcount = (int*)(((unsigned char*)data) + totalsize);
*refcount = 1;
}
}
裁剪图像
void copy_cut_border(const Mat &src, Mat &dst, int top, int bottom, int left, int right, const Option &opt)
void copy_cut_border(const Mat& src, Mat& dst, int top, int bottom, int left, int right, const Option& opt)
{
if (left + right > src.w || top + bottom > src.h)
{
NCNN_LOGE("copy_cut_border parameter error, top: %d, bottom: %d, left: %d, right: %d, src.w: %d, src.h: %d", top, bottom, left, right, src.w, src.h);
return;
}
Layer* crop = create_layer(LayerType::Crop);
ParamDict pd;
pd.set(0, left);
pd.set(1, top);
pd.set(2, 0);
pd.set(3, src.w - left - right);
pd.set(4, src.h - top - bottom);
pd.set(5, -233);
crop->load_param(pd);
crop->create_pipeline(opt);
crop->forward(src, dst, opt);
crop->destroy_pipeline(opt);
delete crop;
}
图像缩放
void resize_bilinear(const Mat &src, Mat &dst, int w, int h, const Option &opt)
void resize_bilinear(const Mat& src, Mat& dst, int w, int h, const Option& opt)
{
Layer* interp = create_layer(LayerType::Interp);
ParamDict pd;
pd.set(0, 2);
pd.set(3, h);
pd.set(4, w);
interp->load_param(pd);
interp->create_pipeline(opt);
interp->forward(src, dst, opt);
interp->destroy_pipeline(opt);
delete interp;
}