版权声明:如果感觉写的不错,转载标明出处链接哦~blog.csdn.net/wyg1997 https://blog.csdn.net/wyg1997/article/details/82226502
一直不是很理解检测结果是怎么出来的,学习最快的方法就是看源码啦,今天先从prior box层开始。
这层的作用就是对不同位置的每个特征点产生不同大小的default box,这些box的大小、形状由prototxt的prior_box_param来控制。随便拿出一个模型的prior box层来举例:
layer {
name: "base_11_priorbox"
type: "PriorBox"
bottom: "ConvNdBackward91"
bottom: "data"
top: "base_11_priorbox"
prior_box_param {
min_size: 32
min_size: 48
aspect_ratio: 0.37
flip: false
clip: false
variance: 0.1
variance: 0.1
variance: 0.2
variance: 0.2
step: 8
offset: 0.5
}
}
然后就来看一下源码(caffe_root/src/caffe/layers/prior_box_layer.cpp):
#include <algorithm>
#include <functional>
#include <utility>
#include <vector>
#include "caffe/layers/prior_box_layer.hpp"
namespace caffe {
template <typename Dtype>
void PriorBoxLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
// 获得参数信息
const PriorBoxParameter& prior_box_param =
this->layer_param_.prior_box_param();
// 至少要提供一个min_size
CHECK_GT(prior_box_param.min_size_size(), 0) << "must provide min_size.";
for (int i = 0; i < prior_box_param.min_size_size(); ++i) {
min_sizes_.push_back(prior_box_param.min_size(i));
CHECK_GT(min_sizes_.back(), 0) << "min_size must be positive.";
}
aspect_ratios_.clear();
// 默认加入正方形框
aspect_ratios_.push_back(1.);
flip_ = prior_box_param.flip();
// 从prototxt中读取长宽比
for (int i = 0; i < prior_box_param.aspect_ratio_size(); ++i) {
float ar = prior_box_param.aspect_ratio(i);
bool already_exist = false;
// 保证每种长宽比唯一存在
for (int j = 0; j < aspect_ratios_.size(); ++j) {
if (fabs(ar - aspect_ratios_[j]) < 1e-6) {
already_exist = true;
break;
}
}
if (!already_exist) {
aspect_ratios_.push_back(ar);
// 翻转长宽比,即对检测框做90°旋转
if (flip_) {
aspect_ratios_.push_back(1./ar);
}
}
}
// 每个像素的default box的数量
num_priors_ = aspect_ratios_.size() * min_sizes_.size();
// 如果有设置max_size,则测试数值正确后计数,表示最大正方形边长(sqrt(min_size*max_size))
if (prior_box_param.max_size_size() > 0) {
CHECK_EQ(prior_box_param.min_size_size(), prior_box_param.max_size_size());
for (int i = 0; i < prior_box_param.max_size_size(); ++i) {
max_sizes_.push_back(prior_box_param.max_size(i));
CHECK_GT(max_sizes_[i], min_sizes_[i])
<< "max_size must be greater than min_size.";
num_priors_ += 1;
}
}
clip_ = prior_box_param.clip();
if (prior_box_param.variance_size() > 1) {
// variance_size必需为4
// Must and only provide 4 variance.
CHECK_EQ(prior_box_param.variance_size(), 4);
for (int i = 0; i < prior_box_param.variance_size(); ++i) {
CHECK_GT(prior_box_param.variance(i), 0);
variance_.push_back(prior_box_param.variance(i));
}
} else if (prior_box_param.variance_size() == 1) {
// 如果只提供一个,必需为0
CHECK_GT(prior_box_param.variance(0), 0);
variance_.push_back(prior_box_param.variance(0));
} else {
// 否则默认为0.1
// Set default to 0.1.
variance_.push_back(0.1);
}
// 设置图片尺寸,一般在prototxt中都不指定,所以默认指定为0
if (prior_box_param.has_img_h() || prior_box_param.has_img_w()) {
CHECK(!prior_box_param.has_img_size())
<< "Either img_size or img_h/img_w should be specified; not both.";
img_h_ = prior_box_param.img_h();
CHECK_GT(img_h_, 0) << "img_h should be larger than 0.";
img_w_ = prior_box_param.img_w();
CHECK_GT(img_w_, 0) << "img_w should be larger than 0.";
} else if (prior_box_param.has_img_size()) {
const int img_size = prior_box_param.img_size();
CHECK_GT(img_size, 0) << "img_size should be larger than 0.";
img_h_ = img_size;
img_w_ = img_size;
} else {
img_h_ = 0;
img_w_ = 0;
}
// 指定x和y方向的步长,否则默认为0,下面用除法求出缩放比例
if (prior_box_param.has_step_h() || prior_box_param.has_step_w()) {
CHECK(!prior_box_param.has_step())
<< "Either step or step_h/step_w should be specified; not both.";
step_h_ = prior_box_param.step_h();
CHECK_GT(step_h_, 0.) << "step_h should be larger than 0.";
step_w_ = prior_box_param.step_w();
CHECK_GT(step_w_, 0.) << "step_w should be larger than 0.";
} else if (prior_box_param.has_step()) {
const float step = prior_box_param.step();
CHECK_GT(step, 0) << "step should be larger than 0.";
step_h_ = step;
step_w_ = step;
} else {
step_h_ = 0;
step_w_ = 0;
}
// 指定偏移量,一般为0.5,做四舍五入处理,可以看下面用法来理解
offset_ = prior_box_param.offset();
}
// 统计一下top的尺寸
template <typename Dtype>
void PriorBoxLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
const int layer_width = bottom[0]->width();
const int layer_height = bottom[0]->height();
vector<int> top_shape(3, 1);
// Since all images in a batch has same height and width, we only need to
// generate one set of priors which can be shared across all images.
top_shape[0] = 1;
// 然后是两个通道,第一个表示default box的4个坐标,第二个表示4个variance
// 2 channels. First channel stores the mean of each prior coordinate.
// Second channel stores the variance of each prior coordinate.
top_shape[1] = 2;
top_shape[2] = layer_width * layer_height * num_priors_ * 4;
CHECK_GT(top_shape[2], 0);
// 然后用blob的reshape方法
top[0]->Reshape(top_shape);
}
template <typename Dtype>
void PriorBoxLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
// 传来的feature map的大小
const int layer_width = bottom[0]->width();
const int layer_height = bottom[0]->height();
int img_width, img_height;
// 一般不改变传来特征图的尺寸,直接赋值
if (img_h_ == 0 || img_w_ == 0) {
img_width = bottom[1]->width();
img_height = bottom[1]->height();
} else {
// 否则用指定的尺寸
img_width = img_w_;
img_height = img_h_;
}
// 指feature map和原图的比例关系
float step_w, step_h;
if (step_w_ == 0 || step_h_ == 0) {
step_w = static_cast<float>(img_width) / layer_width;
step_h = static_cast<float>(img_height) / layer_height;
} else {
step_w = step_w_;
step_h = step_h_;
}
Dtype* top_data = top[0]->mutable_cpu_data();
int dim = layer_height * layer_width * num_priors_ * 4;
int idx = 0;
// 把feature map上的点映射到原图
for (int h = 0; h < layer_height; ++h) {
for (int w = 0; w < layer_width; ++w) {
float center_x = (w + offset_) * step_w;
float center_y = (h + offset_) * step_h;
float box_width, box_height;
// 然后对于每个特征点,在原图中对找到各box
for (int s = 0; s < min_sizes_.size(); ++s) {
int min_size_ = min_sizes_[s];
// 有一个默认的比例是1,即正方形
// first prior: aspect_ratio = 1, size = min_size
// 然后存入坐标(坐标做除法可以把数据压缩到[0,1],方便处理)
box_width = box_height = min_size_;
// xmin
top_data[idx++] = (center_x - box_width / 2.) / img_width;
// ymin
top_data[idx++] = (center_y - box_height / 2.) / img_height;
// xmax
top_data[idx++] = (center_x + box_width / 2.) / img_width;
// ymax
top_data[idx++] = (center_y + box_height / 2.) / img_height;
// 如果指定了最大框,再多生成一个框,边长为sqrt(min_size*max_size)
if (max_sizes_.size() > 0) {
CHECK_EQ(min_sizes_.size(), max_sizes_.size());
int max_size_ = max_sizes_[s];
// second prior: aspect_ratio = 1, size = sqrt(min_size * max_size)
box_width = box_height = sqrt(min_size_ * max_size_);
// xmin
top_data[idx++] = (center_x - box_width / 2.) / img_width;
// ymin
top_data[idx++] = (center_y - box_height / 2.) / img_height;
// xmax
top_data[idx++] = (center_x + box_width / 2.) / img_width;
// ymax
top_data[idx++] = (center_y + box_height / 2.) / img_height;
}
// 这里就用到了aspect_ratio参数了,表示长方形的框,可以指定各种比例
// rest of priors
for (int r = 0; r < aspect_ratios_.size(); ++r) {
float ar = aspect_ratios_[r];
if (fabs(ar - 1.) < 1e-6) {
continue;
}
box_width = min_size_ * sqrt(ar);
box_height = min_size_ / sqrt(ar);
// xmin
top_data[idx++] = (center_x - box_width / 2.) / img_width;
// ymin
top_data[idx++] = (center_y - box_height / 2.) / img_height;
// xmax
top_data[idx++] = (center_x + box_width / 2.) / img_width;
// ymax
top_data[idx++] = (center_y + box_height / 2.) / img_height;
}
}
}
}
// 对框进行剪裁,不能越图像边界
// clip the prior's coordidate such that it is within [0, 1]
if (clip_) {
for (int d = 0; d < dim; ++d) {
top_data[d] = std::min<Dtype>(std::max<Dtype>(top_data[d], 0.), 1.);
}
}
// 看了一些解释还是不是很懂,先贴上个人感觉讲的不错的解释
// set the variance.
// 这一句得到的是特征图的height*width
top_data += top[0]->offset(0, 1);
if (variance_.size() == 1) {
caffe_set<Dtype>(dim, Dtype(variance_[0]), top_data);
} else {
// 变量variance用来对bbox的回归目标进行放大,从而加速对应权重的收敛
int count = 0;
for (int h = 0; h < layer_height; ++h) {
for (int w = 0; w < layer_width; ++w) {
for (int i = 0; i < num_priors_; ++i) {
for (int j = 0; j < 4; ++j) {
top_data[count] = variance_[j];
++count;
}
}
}
}
}
}
INSTANTIATE_CLASS(PriorBoxLayer);
REGISTER_LAYER_CLASS(PriorBox);
} // namespace caffe