MTCNN训练不收敛原因:
地址: https://github.com/dlunion/mtcnn
我们的训练数据标签格式:
wider face:
pos/001.jpg 1 x1 y1 x2 y2 (x1 y1 x2 y2) -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
part/001.jpg -1 x1 y1 x2 y2 (x1 y1 x2 y2) -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
neg/001.jpg 0 -1 -1 -1 -1 (x1 y1 x2 y2) -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
celebA:
landmark/001.jpg -1 -1 -1 -1 -1 pst1_x pst1_y pst2_x pst2_y pst3_x pst3_y pst4_x pst4_y pst5_x pst5_y
作者要求的训练数据标签格式:
pos/001.jpg 1 x1 y1 x2 y2 (x1 y1 x2 y2) pst1_x pst1_y pst2_x pst2_y pst3_x pst3_y pst4_x pst4_y pst5_x pst5_y
part/001.jpg -1 x1 y1 x2 y2 (x1 y1 x2 y2) pst1_x pst1_y pst2_x pst2_y pst3_x pst3_y pst4_x pst4_y pst5_x pst5_y
neg/001.jpg 0 -1 -1 -1 -1 (x1 y1 x2 y2) pst1_x pst1_y pst2_x pst2_y pst3_x pst3_y pst4_x pst4_y pst5_x pst5_y
在“pts_loss”层(type: "MTCNNEuclideanLoss")中,以"label"(分类的标签)来判断是否ignore。对于我们的训练数据标签格式:
class: ignore_label=-1, 可以正常分类;
bbox regression: ignore_label=0, 有landmark中-1参加计算,导致loss无法收敛;
landmark: ignore_label=0, 有part中-1参加计算,导致loss无法收敛;
解决思路:
在做class,bbox regression,landmark任务时,判断便签值是否全部为—1,来作为ignore条件。
修改后"MTCNNEuclideanLoss.cpp"如下:
-
#include <vector>
-
#include "caffe/layers/mtcnn_euclidean_loss_layer.hpp"
-
#include "caffe/util/math_functions.hpp"
-
#include <iostream>
-
using namespace std;
-
namespace caffe {
-
template <typename Dtype>
-
void MTCNNEuclideanLossLayer<Dtype>::Reshape(
-
const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
-
LossLayer<Dtype>::Reshape(bottom, top);
-
CHECK_EQ(bottom[0]->count(1), bottom[1]->count(1))
-
<< "Inputs must have the same dimension.";
-
int has_ignore_label = this->layer_param().loss_param().has_ignore_label();
-
if (has_ignore_label)
-
CHECK_EQ(bottom.size(), 3) << "has_ignore_label=true but not input label";
-
if (!has_ignore_label)
-
CHECK_EQ(bottom.size(), 2) << "has_ignore_label=false but input mismatch";
-
diff_.ReshapeLike(*bottom[0]);
-
}
-
template <typename Dtype>
-
void MTCNNEuclideanLossLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
-
const vector<Blob<Dtype>*>& top) {
-
int count = bottom[0]->count();
-
int has_ignore_label = this->layer_param().loss_param().has_ignore_label();
-
int ignore_label = has_ignore_label ? this->layer_param().loss_param().ignore_label() : -1;
-
if (has_ignore_label){
-
const Dtype* label = bottom[2]->cpu_data();
-
int countLabel = bottom[2]->num();
-
//label
-
Dtype* diff = diff_.mutable_cpu_data();
-
int channel = bottom[0]->channels();
-
//cout << "countLabel_forward: " << countLabel << endl;
-
//cout << "channel_forward: " << channel << endl;
-
//cout << "ignore_label_forward: " << ignore_label << endl;
-
memset(diff, 0, sizeof(Dtype)*count);
-
const Dtype* b0 = bottom[0]->cpu_data();
-
const Dtype* b1 = bottom[1]->cpu_data();
-
Dtype loss = 0;
-
// bbox regression
-
if (channel == 4)
-
{
-
for (int i = 0; i < countLabel; ++i)
-
{
-
//cout << "forware_b1_4: " << b1[i*channel + 0] << " " << b1[i*channel + 1] << " " << b1[i*channel + 2] << " " << b1[i*channel + 3] << endl;
-
int dec = (b1[i*channel + 0] != ignore_label) && (b1[i*channel + 1] != ignore_label) && (b1[i*channel + 2] != ignore_label) && (b1[i*channel + 3] != ignore_label);
-
if ( dec==1 )
-
{
-
caffe_sub(
-
channel,
-
b0 + i * channel,
-
b1 + i * channel,
-
diff + i * channel);
-
Dtype dot = caffe_cpu_dot(channel, diff + i * channel, diff + i * channel);
-
loss += dot / Dtype(2);
-
//cout << "forware_b1_4: " << b1[i*channel + 0] << " " << b1[i*channel + 1] << " " << b1[i*channel + 2] << " " << b1[i*channel + 3] << endl;
-
}
-
}
-
}
-
// landmark
-
else if (channel == 10)
-
{
-
for (int i = 0; i < countLabel; ++i)
-
{
-
//cout << "forward_b1_10: " << b1[i*channel + 0] << " " << b1[i*channel + 1] << " " << b1[i*channel + 2] << " " << b1[i*channel + 3] << " " << b1[i*channel + 4] << " ";
-
//cout << b1[i*channel + 5] << " " << b1[i*channel + 6] << " " << b1[i*channel + 7] << " " << b1[i*channel + 8] << " " << b1[i*channel + 9] << endl;
-
int dec1 = (b1[i*channel + 0] != ignore_label) && (b1[i*channel + 1] != ignore_label) && (b1[i*channel + 2] != ignore_label) && (b1[i*channel + 3] != ignore_label) && (b1[i*channel + 4] != ignore_label);
-
int dec2 = (b1[i*channel + 5] != ignore_label) && (b1[i*channel + 6] != ignore_label) && (b1[i*channel + 7] != ignore_label) && (b1[i*channel + 8] != ignore_label) && (b1[i*channel + 9] != ignore_label);
-
if (dec1==1 && dec2==1)
-
{
-
caffe_sub(
-
channel,
-
b0 + i * channel,
-
b1 + i * channel,
-
diff + i * channel);
-
Dtype dot = caffe_cpu_dot(channel, diff + i * channel, diff + i * channel);
-
loss += dot / Dtype(2);
-
//cout << "forward_b1_10: " << b1[i*channel + 0] << " " << b1[i*channel + 1] << " " << b1[i*channel + 2] << " " << b1[i*channel + 3] << " " << b1[i*channel + 4] << " ";
-
//cout << b1[i*channel + 5] << " " << b1[i*channel + 6] << " " << b1[i*channel + 7] << " " << b1[i*channel + 8] << " " << b1[i*channel + 9] << endl;
-
}
-
}
-
}
-
// ****************org data ********************
-
//for (int i = 0; i < countLabel; ++i){
-
// if (label[i] != ignore_label){
-
// caffe_sub(
-
// channel,
-
// b0 + i * channel,
-
// b1 + i * channel,
-
// diff + i * channel);
-
// Dtype dot = caffe_cpu_dot(channel, diff + i * channel, diff + i * channel);
-
// loss += dot / Dtype(2);
-
// }
-
//}
-
// ***************** ********************
-
top[0]->mutable_cpu_data()[0] = loss;
-
}
-
else{
-
caffe_sub(
-
count,
-
bottom[0]->cpu_data(),
-
bottom[1]->cpu_data(),
-
diff_.mutable_cpu_data());
-
Dtype dot = caffe_cpu_dot(count, diff_.cpu_data(), diff_.cpu_data());
-
Dtype loss = dot / bottom[0]->num() / Dtype(2);
-
top[0]->mutable_cpu_data()[0] = loss;
-
}
-
}
-
template <typename Dtype>
-
void MTCNNEuclideanLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
-
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
-
int has_ignore_label = this->layer_param().loss_param().has_ignore_label();
-
int ignore_label = has_ignore_label ? this->layer_param().loss_param().ignore_label() : -1;
-
if (has_ignore_label){
-
const Dtype* b1 = bottom[1]->cpu_data();
-
const Dtype* label = bottom[2]->cpu_data();
-
int countLabel = bottom[2]->num();
-
int channels = bottom[0]->channels();
-
//cout << "countLabel_backword: " << countLabel << endl;
-
//cout << "channels_backword: " << channels << endl;
-
//cout << "ignore_label_backword: " << ignore_label << endl;
-
for (int i = 0; i < 2; ++i) {
-
if (propagate_down[i]) {
-
memset(bottom[i]->mutable_cpu_diff(), 0, sizeof(Dtype)*bottom[i]->count());
-
const Dtype sign = (i == 0) ? 1 : -1;
-
const Dtype alpha = sign * top[0]->cpu_diff()[0] / bottom[i]->num();
-
// bbox regression
-
if (channels == 4)
-
{
-
for (int j = 0; j < countLabel; ++j)
-
{
-
int dec = (b1[j*channels + 0] != ignore_label) && (b1[j*channels + 1] != ignore_label) && (b1[j*channels + 2] != ignore_label) && (b1[j*channels + 3] != ignore_label);
-
if (dec==1)
-
{
-
caffe_cpu_axpby(
-
channels, // count
-
alpha, // alpha
-
diff_.cpu_data() + channels * j, // a
-
Dtype(0), // beta
-
bottom[i]->mutable_cpu_diff() + channels * j); // b
-
}
-
}
-
}
-
// landmark
-
else if (channels == 10)
-
{
-
for (int j = 0; j < countLabel; ++j)
-
{
-
int dec1 = (b1[j*channels + 0] != ignore_label) && (b1[j*channels + 1] != ignore_label) && (b1[j*channels + 2] != ignore_label) && (b1[j*channels + 3] != ignore_label) && (b1[j*channels + 4] != ignore_label);
-
int dec2 = (b1[j*channels + 5] != ignore_label) && (b1[j*channels + 6] != ignore_label) && (b1[j*channels + 7] != ignore_label) && (b1[j*channels + 8] != ignore_label) && (b1[j*channels + 9] != ignore_label);
-
if (dec1 == 1 && dec2 == 1)
-
{
-
caffe_cpu_axpby(
-
channels, // count
-
alpha, // alpha
-
diff_.cpu_data() + channels * j, // a
-
Dtype(0), // beta
-
bottom[i]->mutable_cpu_diff() + channels * j); // b
-
}
-
}
-
}
-
// ***********************org data********************
-
//for (int j = 0; j < countLabel; ++j){
-
// if (label[j] != ignore_label){
-
// caffe_cpu_axpby(
-
// channels, // count
-
// alpha, // alpha
-
// diff_.cpu_data() + channels * j, // a
-
// Dtype(0), // beta
-
// bottom[i]->mutable_cpu_diff() + channels * j); // b
-
// }
-
//}
-
}
-
}
-
}
-
else{
-
for (int i = 0; i < 2; ++i) {
-
if (propagate_down[i]) {
-
const Dtype sign = (i == 0) ? 1 : -1;
-
const Dtype alpha = sign * top[0]->cpu_diff()[0] / bottom[i]->num();
-
caffe_cpu_axpby(
-
bottom[i]->count(), // count
-
alpha, // alpha
-
diff_.cpu_data(), // a
-
Dtype(0), // beta
-
bottom[i]->mutable_cpu_diff()); // b
-
}
-
}
-
}
-
}
-
#ifdef CPU_ONLY
-
STUB_GPU(MTCNNEuclideanLossLayer);
-
#endif
-
INSTANTIATE_CLASS(MTCNNEuclideanLossLayer);
-
REGISTER_LAYER_CLASS(MTCNNEuclideanLoss);
-
} // namespace caffe
相应的"MTCNNEuclideanLoss.cu"如下:
-
#include <vector>
-
#include "caffe/layers/mtcnn_euclidean_loss_layer.hpp"
-
#include "caffe/util/math_functions.hpp"
-
#include <iostream>
-
using namespace std;
-
namespace caffe {
-
template <typename Dtype>
-
void MTCNNEuclideanLossLayer<Dtype>::Reshape(
-
const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
-
LossLayer<Dtype>::Reshape(bottom, top);
-
CHECK_EQ(bottom[0]->count(1), bottom[1]->count(1))
-
<< "Inputs must have the same dimension.";
-
int has_ignore_label = this->layer_param().loss_param().has_ignore_label();
-
if (has_ignore_label)
-
CHECK_EQ(bottom.size(), 3) << "has_ignore_label=true but not input label";
-
if (!has_ignore_label)
-
CHECK_EQ(bottom.size(), 2) << "has_ignore_label=false but input mismatch";
-
diff_.ReshapeLike(*bottom[0]);
-
}
-
template <typename Dtype>
-
void MTCNNEuclideanLossLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
-
const vector<Blob<Dtype>*>& top) {
-
int count = bottom[0]->count();
-
int has_ignore_label = this->layer_param().loss_param().has_ignore_label();
-
int ignore_label = has_ignore_label ? this->layer_param().loss_param().ignore_label() : -1;
-
if (has_ignore_label){
-
//label
-
const Dtype* label = bottom[2]->cpu_data();
-
Dtype* diff = diff_.mutable_gpu_data();
-
int countLabel = bottom[2]->num();
-
int channel = bottom[0]->channels();
-
//cout << "ignore_label_forward: " << ignore_label << endl; //
-
caffe_gpu_memset(sizeof(Dtype)*count, 0, diff);
-
const Dtype* b0 = bottom[0]->gpu_data();
-
const Dtype* b1 = bottom[1]->gpu_data();
-
const Dtype* b1_cpu = bottom[1]->cpu_data();
-
Dtype loss = 0;
-
//cout << "channel_forward " << channel << endl;
-
// bbox regression
-
if (channel == 4)
-
{
-
for (int i = 0; i < countLabel; ++i)
-
{
-
//cout << "forware_b1_4: " << b1_cpu[i*channel + 0] << " " << b1_cpu[i*channel + 1] << " " << b1_cpu[i*channel + 2] << " " << b1_cpu[i*channel + 3] << endl;
-
int dec = (b1_cpu[i*channel + 0] != ignore_label) && (b1_cpu[i*channel + 1] != ignore_label) && (b1_cpu[i*channel + 2] != ignore_label) && (b1_cpu[i*channel + 3] != ignore_label);
-
if (dec == 1)
-
{
-
caffe_gpu_sub(
-
channel,
-
b0 + i * channel,
-
b1 + i * channel,
-
diff + i * channel);
-
Dtype dot;
-
caffe_gpu_dot(channel, diff + i * channel, diff + i * channel, &dot);
-
loss += dot / Dtype(2);
-
}
-
}
-
}
-
// landmark
-
else if (channel == 10)
-
{
-
for (int i = 0; i < countLabel; ++i)
-
{
-
int dec1 = (b1_cpu[i*channel + 0] != ignore_label) && (b1_cpu[i*channel + 1] != ignore_label) && (b1_cpu[i*channel + 2] != ignore_label) && (b1_cpu[i*channel + 3] != ignore_label) && (b1_cpu[i*channel + 4] != ignore_label);
-
int dec2 = (b1_cpu[i*channel + 5] != ignore_label) && (b1_cpu[i*channel + 6] != ignore_label) && (b1_cpu[i*channel + 7] != ignore_label) && (b1_cpu[i*channel + 8] != ignore_label) && (b1_cpu[i*channel + 9] != ignore_label);
-
if (dec1 == 1 && dec2 == 1)
-
{
-
caffe_gpu_sub(
-
channel,
-
b0 + i * channel,
-
b1 + i * channel,
-
diff + i * channel);
-
Dtype dot;
-
caffe_gpu_dot(channel, diff + i * channel, diff + i * channel, &dot);
-
loss += dot / Dtype(2);
-
}
-
}
-
}
-
// ***********************org data ********************
-
//for (int i = 0; i < countLabel; ++i){
-
// if (label[i] != ignore_label){
-
// caffe_gpu_sub(
-
// channel,
-
// b0 + i * channel,
-
// b1 + i * channel,
-
// diff + i * channel);
-
// Dtype dot;
-
// caffe_gpu_dot(channel, diff + i * channel, diff + i * channel, &dot);
-
// loss += dot / Dtype(2);
-
// }
-
//}
-
// **************************** **********************
-
top[0]->mutable_cpu_data()[0] = loss;
-
}
-
else{
-
int count = bottom[0]->count();
-
caffe_gpu_sub(
-
count,
-
bottom[0]->gpu_data(),
-
bottom[1]->gpu_data(),
-
diff_.mutable_gpu_data());
-
Dtype dot;
-
caffe_gpu_dot(count, diff_.gpu_data(), diff_.gpu_data(), &dot);
-
Dtype loss = dot / bottom[0]->num() / Dtype(2);
-
top[0]->mutable_cpu_data()[0] = loss;
-
}
-
}
-
template <typename Dtype>
-
void MTCNNEuclideanLossLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
-
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
-
int has_ignore_label = this->layer_param().loss_param().has_ignore_label();
-
int ignore_label = has_ignore_label ? this->layer_param().loss_param().ignore_label() : -1;
-
if (has_ignore_label){
-
const Dtype* b1 = bottom[1]->cpu_data();
-
const Dtype* label = bottom[2]->cpu_data();
-
int countLabel = bottom[2]->num();
-
int channels = bottom[0]->channels();
-
for (int i = 0; i < 2; ++i) {
-
if (propagate_down[i]) {
-
caffe_gpu_memset(sizeof(Dtype)*bottom[i]->count(), 0, bottom[i]->mutable_gpu_diff());
-
const Dtype sign = (i == 0) ? 1 : -1;
-
// bbox regression
-
if (channels == 4)
-
{
-
for (int j = 0; j < countLabel; ++j)
-
{
-
const Dtype alpha = sign * top[0]->cpu_diff()[0];
-
int dec = (b1[j*channels + 0] != ignore_label) && (b1[j*channels + 1] != ignore_label) && (b1[j*channels + 2] != ignore_label) && (b1[j*channels + 3] != ignore_label);
-
if (dec == 1)
-
{
-
caffe_gpu_axpby(
-
channels, // count
-
alpha, // alpha
-
diff_.gpu_data() + channels * j, // a
-
Dtype(0), // beta
-
bottom[i]->mutable_gpu_diff() + channels * j); // b
-
}
-
}
-
}
-
// landmark
-
else if (channels == 10)
-
{
-
for (int j = 0; j < countLabel; ++j)
-
{
-
const Dtype alpha = sign * top[0]->cpu_diff()[0];
-
int dec1 = (b1[j*channels + 0] != ignore_label) && (b1[j*channels + 1] != ignore_label) && (b1[j*channels + 2] != ignore_label) && (b1[j*channels + 3] != ignore_label) && (b1[j*channels + 4] != ignore_label);
-
int dec2 = (b1[j*channels + 5] != ignore_label) && (b1[j*channels + 6] != ignore_label) && (b1[j*channels + 7] != ignore_label) && (b1[j*channels + 8] != ignore_label) && (b1[j*channels + 9] != ignore_label);
-
if (dec1 == 1 && dec2 == 1)
-
{
-
caffe_gpu_axpby(
-
channels, // count
-
alpha, // alpha
-
diff_.gpu_data() + channels * j, // a
-
Dtype(0), // beta
-
bottom[i]->mutable_gpu_diff() + channels * j); // b
-
}
-
}
-
}
-
// ******************* org data**********************
-
//for (int j = 0; j < countLabel; ++j){
-
// const Dtype alpha = sign * top[0]->cpu_diff()[0];
-
// if (label[j] != ignore_label){
-
// caffe_gpu_axpby(
-
// channels, // count
-
// alpha, // alpha
-
// diff_.gpu_data() + channels * j, // a
-
// Dtype(0), // beta
-
// bottom[i]->mutable_gpu_diff() + channels * j); // b
-
// }
-
//}
-
}
-
}
-
}
-
else{
-
for (int i = 0; i < 2; ++i) {
-
if (propagate_down[i]) {
-
const Dtype sign = (i == 0) ? 1 : -1;
-
const Dtype alpha = sign * top[0]->cpu_diff()[0] / bottom[i]->num();
-
caffe_gpu_axpby(
-
bottom[i]->count(), // count
-
alpha, // alpha
-
diff_.gpu_data(), // a
-
Dtype(0), // beta
-
bottom[i]->mutable_gpu_diff()); // b
-
}
-
}
-
}
-
}
-
INSTANTIATE_LAYER_GPU_FUNCS(MTCNNEuclideanLossLayer);
-
} // namespace caffe
小计,完成mtcnn_educlidean_loss_layer.cu的修改,发现:const Dtype* b1 = bottom[1]->gpu_data();,gpu读取的数据是不能打印和取出来的,改成cpu模式:const Dtype* b1_cpu = bottom[1]->cpu_data()就行了;