MTCNN训练不收敛原因:
地址: https://github.com/dlunion/mtcnn
我们的训练数据标签格式:
wider face:
pos/001.jpg 1 x1 y1 x2 y2 (x1 y1 x2 y2) -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
part/001.jpg -1 x1 y1 x2 y2 (x1 y1 x2 y2) -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
neg/001.jpg 0 -1 -1 -1 -1 (x1 y1 x2 y2) -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
celebA:
landmark/001.jpg -1 -1 -1 -1 -1 pst1_x pst1_y pst2_x pst2_y pst3_x pst3_y pst4_x pst4_y pst5_x pst5_y
作者要求的
训练数据标签格式:
pos/001.jpg 1 x1 y1 x2 y2 (x1 y1 x2 y2)
pst1_x pst1_y
pst2_x pst2_y
pst3_x pst3_y
pst4_x pst4_y
pst5_x pst5_y
part/001.jpg -1 x1 y1 x2 y2 (x1 y1 x2 y2)
pst1_x pst1_y
pst2_x pst2_y
pst3_x pst3_y
pst4_x pst4_y
pst5_x pst5_y
neg/001.jpg 0 -1 -1 -1 -1 (x1 y1 x2 y2)
pst1_x pst1_y
pst2_x pst2_y
pst3_x pst3_y
pst4_x pst4_y
pst5_x pst5_y
在
“
pts_loss
”层(
type: "MTCNNEuclideanLoss"
)中,以
"label"(分类的标签)来判断是否ignore。对于我们的训练数据标签格式:
class: ignore_label=-1, 可以正常分类;
bbox regression:
ignore_label=0, 有landmark中-1参加计算,导致loss无法收敛;
landmark:
ignore_label=0, 有part中-1参加计算,导致loss无法收敛;
解决思路:
在做
class,
bbox regression,
landmark任务时,判断便签值是否全部为—1,来作为ignore条件。
修改后"MTCNNEuclideanLoss.cpp"如下:
#include <vector> #include "caffe/layers/mtcnn_euclidean_loss_layer.hpp" #include "caffe/util/math_functions.hpp" #include <iostream> using namespace std; namespace caffe { template <typename Dtype> void MTCNNEuclideanLossLayer<Dtype>::Reshape( const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { LossLayer<Dtype>::Reshape(bottom, top); CHECK_EQ(bottom[0]->count(1), bottom[1]->count(1)) << "Inputs must have the same dimension."; int has_ignore_label = this->layer_param().loss_param().has_ignore_label(); if (has_ignore_label) CHECK_EQ(bottom.size(), 3) << "has_ignore_label=true but not input label"; if (!has_ignore_label) CHECK_EQ(bottom.size(), 2) << "has_ignore_label=false but input mismatch"; diff_.ReshapeLike(*bottom[0]); } template <typename Dtype> void MTCNNEuclideanLossLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { int count = bottom[0]->count(); int has_ignore_label = this->layer_param().loss_param().has_ignore_label(); int ignore_label = has_ignore_label ? this->layer_param().loss_param().ignore_label() : -1; if (has_ignore_label){ const Dtype* label = bottom[2]->cpu_data(); int countLabel = bottom[2]->num(); //label Dtype* diff = diff_.mutable_cpu_data(); int channel = bottom[0]->channels(); //cout << "countLabel_forward: " << countLabel << endl; //cout << "channel_forward: " << channel << endl; //cout << "ignore_label_forward: " << ignore_label << endl; memset(diff, 0, sizeof(Dtype)*count); const Dtype* b0 = bottom[0]->cpu_data(); const Dtype* b1 = bottom[1]->cpu_data(); Dtype loss = 0; // bbox regression if (channel == 4) { for (int i = 0; i < countLabel; ++i) { //cout << "forware_b1_4: " << b1[i*channel + 0] << " " << b1[i*channel + 1] << " " << b1[i*channel + 2] << " " << b1[i*channel + 3] << endl; int dec = (b1[i*channel + 0] != ignore_label) && (b1[i*channel + 1] != ignore_label) && (b1[i*channel + 2] != ignore_label) && (b1[i*channel + 3] != ignore_label); if ( dec==1 ) { caffe_sub( channel, b0 + i * channel, b1 + i * channel, diff + i * channel); Dtype dot = caffe_cpu_dot(channel, diff + i * channel, diff + i * channel); loss += dot / Dtype(2); //cout << "forware_b1_4: " << b1[i*channel + 0] << " " << b1[i*channel + 1] << " " << b1[i*channel + 2] << " " << b1[i*channel + 3] << endl; } } } // landmark else if (channel == 10) { for (int i = 0; i < countLabel; ++i) { //cout << "forward_b1_10: " << b1[i*channel + 0] << " " << b1[i*channel + 1] << " " << b1[i*channel + 2] << " " << b1[i*channel + 3] << " " << b1[i*channel + 4] << " "; //cout << b1[i*channel + 5] << " " << b1[i*channel + 6] << " " << b1[i*channel + 7] << " " << b1[i*channel + 8] << " " << b1[i*channel + 9] << endl; int dec1 = (b1[i*channel + 0] != ignore_label) && (b1[i*channel + 1] != ignore_label) && (b1[i*channel + 2] != ignore_label) && (b1[i*channel + 3] != ignore_label) && (b1[i*channel + 4] != ignore_label); int dec2 = (b1[i*channel + 5] != ignore_label) && (b1[i*channel + 6] != ignore_label) && (b1[i*channel + 7] != ignore_label) && (b1[i*channel + 8] != ignore_label) && (b1[i*channel + 9] != ignore_label); if (dec1==1 && dec2==1) { caffe_sub( channel, b0 + i * channel, b1 + i * channel, diff + i * channel); Dtype dot = caffe_cpu_dot(channel, diff + i * channel, diff + i * channel); loss += dot / Dtype(2); //cout << "forward_b1_10: " << b1[i*channel + 0] << " " << b1[i*channel + 1] << " " << b1[i*channel + 2] << " " << b1[i*channel + 3] << " " << b1[i*channel + 4] << " "; //cout << b1[i*channel + 5] << " " << b1[i*channel + 6] << " " << b1[i*channel + 7] << " " << b1[i*channel + 8] << " " << b1[i*channel + 9] << endl; } } } // ****************org data ******************** //for (int i = 0; i < countLabel; ++i){ // if (label[i] != ignore_label){ // caffe_sub( // channel, // b0 + i * channel, // b1 + i * channel, // diff + i * channel); // Dtype dot = caffe_cpu_dot(channel, diff + i * channel, diff + i * channel); // loss += dot / Dtype(2); // } //} // ***************** ******************** top[0]->mutable_cpu_data()[0] = loss; } else{ caffe_sub( count, bottom[0]->cpu_data(), bottom[1]->cpu_data(), diff_.mutable_cpu_data()); Dtype dot = caffe_cpu_dot(count, diff_.cpu_data(), diff_.cpu_data()); Dtype loss = dot / bottom[0]->num() / Dtype(2); top[0]->mutable_cpu_data()[0] = loss; } } template <typename Dtype> void MTCNNEuclideanLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { int has_ignore_label = this->layer_param().loss_param().has_ignore_label(); int ignore_label = has_ignore_label ? this->layer_param().loss_param().ignore_label() : -1; if (has_ignore_label){ const Dtype* b1 = bottom[1]->cpu_data(); const Dtype* label = bottom[2]->cpu_data(); int countLabel = bottom[2]->num(); int channels = bottom[0]->channels(); //cout << "countLabel_backword: " << countLabel << endl; //cout << "channels_backword: " << channels << endl; //cout << "ignore_label_backword: " << ignore_label << endl; for (int i = 0; i < 2; ++i) { if (propagate_down[i]) { memset(bottom[i]->mutable_cpu_diff(), 0, sizeof(Dtype)*bottom[i]->count()); const Dtype sign = (i == 0) ? 1 : -1; const Dtype alpha = sign * top[0]->cpu_diff()[0] / bottom[i]->num(); // bbox regression if (channels == 4) { for (int j = 0; j < countLabel; ++j) { int dec = (b1[j*channels + 0] != ignore_label) && (b1[j*channels + 1] != ignore_label) && (b1[j*channels + 2] != ignore_label) && (b1[j*channels + 3] != ignore_label); if (dec==1) { caffe_cpu_axpby( channels, // count alpha, // alpha diff_.cpu_data() + channels * j, // a Dtype(0), // beta bottom[i]->mutable_cpu_diff() + channels * j); // b } } } // landmark else if (channels == 10) { for (int j = 0; j < countLabel; ++j) { int dec1 = (b1[j*channels + 0] != ignore_label) && (b1[j*channels + 1] != ignore_label) && (b1[j*channels + 2] != ignore_label) && (b1[j*channels + 3] != ignore_label) && (b1[j*channels + 4] != ignore_label); int dec2 = (b1[j*channels + 5] != ignore_label) && (b1[j*channels + 6] != ignore_label) && (b1[j*channels + 7] != ignore_label) && (b1[j*channels + 8] != ignore_label) && (b1[j*channels + 9] != ignore_label); if (dec1 == 1 && dec2 == 1) { caffe_cpu_axpby( channels, // count alpha, // alpha diff_.cpu_data() + channels * j, // a Dtype(0), // beta bottom[i]->mutable_cpu_diff() + channels * j); // b } } } // ***********************org data******************** //for (int j = 0; j < countLabel; ++j){ // if (label[j] != ignore_label){ // caffe_cpu_axpby( // channels, // count // alpha, // alpha // diff_.cpu_data() + channels * j, // a // Dtype(0), // beta // bottom[i]->mutable_cpu_diff() + channels * j); // b // } //} } } } else{ for (int i = 0; i < 2; ++i) { if (propagate_down[i]) { const Dtype sign = (i == 0) ? 1 : -1; const Dtype alpha = sign * top[0]->cpu_diff()[0] / bottom[i]->num(); caffe_cpu_axpby( bottom[i]->count(), // count alpha, // alpha diff_.cpu_data(), // a Dtype(0), // beta bottom[i]->mutable_cpu_diff()); // b } } } } #ifdef CPU_ONLY STUB_GPU(MTCNNEuclideanLossLayer); #endif INSTANTIATE_CLASS(MTCNNEuclideanLossLayer); REGISTER_LAYER_CLASS(MTCNNEuclideanLoss); } // namespace caffe
相应的 "MTCNNEuclideanLoss.cu"如下:
#include <vector> #include "caffe/layers/mtcnn_euclidean_loss_layer.hpp" #include "caffe/util/math_functions.hpp" #include <iostream> using namespace std; namespace caffe { template <typename Dtype> void MTCNNEuclideanLossLayer<Dtype>::Reshape( const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { LossLayer<Dtype>::Reshape(bottom, top); CHECK_EQ(bottom[0]->count(1), bottom[1]->count(1)) << "Inputs must have the same dimension."; int has_ignore_label = this->layer_param().loss_param().has_ignore_label(); if (has_ignore_label) CHECK_EQ(bottom.size(), 3) << "has_ignore_label=true but not input label"; if (!has_ignore_label) CHECK_EQ(bottom.size(), 2) << "has_ignore_label=false but input mismatch"; diff_.ReshapeLike(*bottom[0]); } template <typename Dtype> void MTCNNEuclideanLossLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { int count = bottom[0]->count(); int has_ignore_label = this->layer_param().loss_param().has_ignore_label(); int ignore_label = has_ignore_label ? this->layer_param().loss_param().ignore_label() : -1; if (has_ignore_label){ //label const Dtype* label = bottom[2]->cpu_data(); Dtype* diff = diff_.mutable_gpu_data(); int countLabel = bottom[2]->num(); int channel = bottom[0]->channels(); //cout << "ignore_label_forward: " << ignore_label << endl; // caffe_gpu_memset(sizeof(Dtype)*count, 0, diff); const Dtype* b0 = bottom[0]->gpu_data(); const Dtype* b1 = bottom[1]->gpu_data(); const Dtype* b1_cpu = bottom[1]->cpu_data(); Dtype loss = 0; //cout << "channel_forward " << channel << endl; // bbox regression if (channel == 4) { for (int i = 0; i < countLabel; ++i) { //cout << "forware_b1_4: " << b1_cpu[i*channel + 0] << " " << b1_cpu[i*channel + 1] << " " << b1_cpu[i*channel + 2] << " " << b1_cpu[i*channel + 3] << endl; int dec = (b1_cpu[i*channel + 0] != ignore_label) && (b1_cpu[i*channel + 1] != ignore_label) && (b1_cpu[i*channel + 2] != ignore_label) && (b1_cpu[i*channel + 3] != ignore_label); if (dec == 1) { caffe_gpu_sub( channel, b0 + i * channel, b1 + i * channel, diff + i * channel); Dtype dot; caffe_gpu_dot(channel, diff + i * channel, diff + i * channel, &dot); loss += dot / Dtype(2); } } } // landmark else if (channel == 10) { for (int i = 0; i < countLabel; ++i) { int dec1 = (b1_cpu[i*channel + 0] != ignore_label) && (b1_cpu[i*channel + 1] != ignore_label) && (b1_cpu[i*channel + 2] != ignore_label) && (b1_cpu[i*channel + 3] != ignore_label) && (b1_cpu[i*channel + 4] != ignore_label); int dec2 = (b1_cpu[i*channel + 5] != ignore_label) && (b1_cpu[i*channel + 6] != ignore_label) && (b1_cpu[i*channel + 7] != ignore_label) && (b1_cpu[i*channel + 8] != ignore_label) && (b1_cpu[i*channel + 9] != ignore_label); if (dec1 == 1 && dec2 == 1) { caffe_gpu_sub( channel, b0 + i * channel, b1 + i * channel, diff + i * channel); Dtype dot; caffe_gpu_dot(channel, diff + i * channel, diff + i * channel, &dot); loss += dot / Dtype(2); } } } // ***********************org data ******************** //for (int i = 0; i < countLabel; ++i){ // if (label[i] != ignore_label){ // caffe_gpu_sub( // channel, // b0 + i * channel, // b1 + i * channel, // diff + i * channel); // Dtype dot; // caffe_gpu_dot(channel, diff + i * channel, diff + i * channel, &dot); // loss += dot / Dtype(2); // } //} // **************************** ********************** top[0]->mutable_cpu_data()[0] = loss; } else{ int count = bottom[0]->count(); caffe_gpu_sub( count, bottom[0]->gpu_data(), bottom[1]->gpu_data(), diff_.mutable_gpu_data()); Dtype dot; caffe_gpu_dot(count, diff_.gpu_data(), diff_.gpu_data(), &dot); Dtype loss = dot / bottom[0]->num() / Dtype(2); top[0]->mutable_cpu_data()[0] = loss; } } template <typename Dtype> void MTCNNEuclideanLossLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { int has_ignore_label = this->layer_param().loss_param().has_ignore_label(); int ignore_label = has_ignore_label ? this->layer_param().loss_param().ignore_label() : -1; if (has_ignore_label){ const Dtype* b1 = bottom[1]->cpu_data(); const Dtype* label = bottom[2]->cpu_data(); int countLabel = bottom[2]->num(); int channels = bottom[0]->channels(); for (int i = 0; i < 2; ++i) { if (propagate_down[i]) { caffe_gpu_memset(sizeof(Dtype)*bottom[i]->count(), 0, bottom[i]->mutable_gpu_diff()); const Dtype sign = (i == 0) ? 1 : -1; // bbox regression if (channels == 4) { for (int j = 0; j < countLabel; ++j) { const Dtype alpha = sign * top[0]->cpu_diff()[0]; int dec = (b1[j*channels + 0] != ignore_label) && (b1[j*channels + 1] != ignore_label) && (b1[j*channels + 2] != ignore_label) && (b1[j*channels + 3] != ignore_label); if (dec == 1) { caffe_gpu_axpby( channels, // count alpha, // alpha diff_.gpu_data() + channels * j, // a Dtype(0), // beta bottom[i]->mutable_gpu_diff() + channels * j); // b } } } // landmark else if (channels == 10) { for (int j = 0; j < countLabel; ++j) { const Dtype alpha = sign * top[0]->cpu_diff()[0]; int dec1 = (b1[j*channels + 0] != ignore_label) && (b1[j*channels + 1] != ignore_label) && (b1[j*channels + 2] != ignore_label) && (b1[j*channels + 3] != ignore_label) && (b1[j*channels + 4] != ignore_label); int dec2 = (b1[j*channels + 5] != ignore_label) && (b1[j*channels + 6] != ignore_label) && (b1[j*channels + 7] != ignore_label) && (b1[j*channels + 8] != ignore_label) && (b1[j*channels + 9] != ignore_label); if (dec1 == 1 && dec2 == 1) { caffe_gpu_axpby( channels, // count alpha, // alpha diff_.gpu_data() + channels * j, // a Dtype(0), // beta bottom[i]->mutable_gpu_diff() + channels * j); // b } } } // ******************* org data********************** //for (int j = 0; j < countLabel; ++j){ // const Dtype alpha = sign * top[0]->cpu_diff()[0]; // if (label[j] != ignore_label){ // caffe_gpu_axpby( // channels, // count // alpha, // alpha // diff_.gpu_data() + channels * j, // a // Dtype(0), // beta // bottom[i]->mutable_gpu_diff() + channels * j); // b // } //} } } } else{ for (int i = 0; i < 2; ++i) { if (propagate_down[i]) { const Dtype sign = (i == 0) ? 1 : -1; const Dtype alpha = sign * top[0]->cpu_diff()[0] / bottom[i]->num(); caffe_gpu_axpby( bottom[i]->count(), // count alpha, // alpha diff_.gpu_data(), // a Dtype(0), // beta bottom[i]->mutable_gpu_diff()); // b } } } } INSTANTIATE_LAYER_GPU_FUNCS(MTCNNEuclideanLossLayer); } // namespace caffe
小计,完成mtcnn_educlidean_loss_layer.cu的修改,发现:const Dtype* b1 = bottom[1]->gpu_data();,gpu读取的数据是不能打印和取出来的,改成cpu模式:const Dtype* b1_cpu = bottom[1]->cpu_data()就行了;