修改MTCNN中caffe源码，时计算loss时以对应标签是否为“-1”判别

MTCNN训练不收敛原因:

地址： https://github.com/dlunion/mtcnn

我们的训练数据标签格式：

wider face:

pos/001.jpg 1 x1 y1 x2 y2 (x1 y1 x2 y2) -1 -1 -1 -1 -1 -1 -1 -1 -1 -1

part/001.jpg -1 x1 y1 x2 y2 (x1 y1 x2 y2) -1 -1 -1 -1 -1 -1 -1 -1 -1 -1

neg/001.jpg 0 -1 -1 -1 -1 (x1 y1 x2 y2) -1 -1 -1 -1 -1 -1 -1 -1 -1 -1

celebA:

landmark/001.jpg -1 -1 -1 -1 -1 pst1_x pst1_y pst2_x pst2_y pst3_x pst3_y pst4_x pst4_y pst5_x pst5_y

作者要求的训练数据标签格式：

pos/001.jpg 1 x1 y1 x2 y2 (x1 y1 x2 y2) pst1_x pst1_y pst2_x pst2_y pst3_x pst3_y pst4_x pst4_y pst5_x pst5_y

part/001.jpg -1 x1 y1 x2 y2 (x1 y1 x2 y2) pst1_x pst1_y pst2_x pst2_y pst3_x pst3_y pst4_x pst4_y pst5_x pst5_y

neg/001.jpg 0 -1 -1 -1 -1 (x1 y1 x2 y2) pst1_x pst1_y pst2_x pst2_y pst3_x pst3_y pst4_x pst4_y pst5_x pst5_y

在 “ pts_loss ”层（ type: "MTCNNEuclideanLoss" ）中，以 "label"(分类的标签)来判断是否ignore。对于我们的训练数据标签格式：

class: ignore_label=-1, 可以正常分类；

bbox regression: ignore_label=0, 有landmark中-1参加计算，导致loss无法收敛；

landmark: ignore_label=0, 有part中-1参加计算，导致loss无法收敛；

解决思路：

在做 class， bbox regression， landmark任务时，判断便签值是否全部为—1，来作为ignore条件。

修改后"MTCNNEuclideanLoss.cpp"如下：

#include <vector>

#include "caffe/layers/mtcnn_euclidean_loss_layer.hpp"
#include "caffe/util/math_functions.hpp"

#include <iostream>
using namespace std;

namespace caffe {

template <typename Dtype>
void MTCNNEuclideanLossLayer<Dtype>::Reshape(
  const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
  LossLayer<Dtype>::Reshape(bottom, top);
  CHECK_EQ(bottom[0]->count(1), bottom[1]->count(1))
      << "Inputs must have the same dimension.";
   
  int has_ignore_label = this->layer_param().loss_param().has_ignore_label();
  if (has_ignore_label)
	  CHECK_EQ(bottom.size(), 3) << "has_ignore_label=true but not input label";

  if (!has_ignore_label)
	  CHECK_EQ(bottom.size(), 2) << "has_ignore_label=false but input mismatch";

  diff_.ReshapeLike(*bottom[0]);
}

template <typename Dtype>
void MTCNNEuclideanLossLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
    const vector<Blob<Dtype>*>& top) {
  int count = bottom[0]->count();
  int has_ignore_label = this->layer_param().loss_param().has_ignore_label();
  int ignore_label = has_ignore_label ? this->layer_param().loss_param().ignore_label() : -1;

  if (has_ignore_label){
	  const Dtype* label = bottom[2]->cpu_data();
	  int countLabel = bottom[2]->num();

	  //label
	  Dtype* diff = diff_.mutable_cpu_data();
	  int channel = bottom[0]->channels();
	  //cout << "countLabel_forward: " << countLabel << endl;
	 //cout << "channel_forward: " << channel << endl;
	  //cout << "ignore_label_forward: " << ignore_label << endl;
	  memset(diff, 0, sizeof(Dtype)*count);

	  const Dtype* b0 = bottom[0]->cpu_data();
	  const Dtype* b1 = bottom[1]->cpu_data();
	  Dtype loss = 0;

	  // bbox regression
	  if (channel == 4)
	  {
		  for (int i = 0; i < countLabel; ++i)
		  {
			  //cout << "forware_b1_4: " << b1[i*channel + 0] << " " << b1[i*channel + 1] << " " << b1[i*channel + 2] << " " << b1[i*channel + 3] << endl;

			  int dec = (b1[i*channel + 0] != ignore_label) && (b1[i*channel + 1] != ignore_label) && (b1[i*channel + 2] != ignore_label) && (b1[i*channel + 3] != ignore_label);
			  if ( dec==1 )
			  {
				  caffe_sub(
					  channel,
					  b0 + i * channel,
					  b1 + i * channel,
					  diff + i * channel);
				  Dtype dot = caffe_cpu_dot(channel, diff + i * channel, diff + i * channel);
				  loss += dot / Dtype(2);

				  //cout << "forware_b1_4: " << b1[i*channel + 0] << " " << b1[i*channel + 1] << " " << b1[i*channel + 2] << " " << b1[i*channel + 3] << endl;
			  }
		  }
	  }

	  // landmark
	  else if (channel == 10)
	  {
		  for (int i = 0; i < countLabel; ++i)
		  {
			  //cout << "forward_b1_10: " << b1[i*channel + 0] << " " << b1[i*channel + 1] << " " << b1[i*channel + 2] << " " << b1[i*channel + 3] << " " << b1[i*channel + 4] << " ";
			  //cout << b1[i*channel + 5] << " " << b1[i*channel + 6] << " " << b1[i*channel + 7] << " " << b1[i*channel + 8] << " " << b1[i*channel + 9] << endl;

			  int dec1 = (b1[i*channel + 0] != ignore_label) && (b1[i*channel + 1] != ignore_label) && (b1[i*channel + 2] != ignore_label) && (b1[i*channel + 3] != ignore_label) && (b1[i*channel + 4] != ignore_label);
			  int dec2 = (b1[i*channel + 5] != ignore_label) && (b1[i*channel + 6] != ignore_label) && (b1[i*channel + 7] != ignore_label) && (b1[i*channel + 8] != ignore_label) && (b1[i*channel + 9] != ignore_label);
			  if (dec1==1 && dec2==1)
			  {
				  caffe_sub(
					  channel,
					  b0 + i * channel,
					  b1 + i * channel,
					  diff + i * channel);
				  Dtype dot = caffe_cpu_dot(channel, diff + i * channel, diff + i * channel);
				  loss += dot / Dtype(2);

				  //cout << "forward_b1_10: " << b1[i*channel + 0] << " " << b1[i*channel + 1] << " " << b1[i*channel + 2] << " " << b1[i*channel + 3] << " " << b1[i*channel + 4] << " ";
				  //cout << b1[i*channel + 5] << " " << b1[i*channel + 6] << " " << b1[i*channel + 7] << " " << b1[i*channel + 8] << " " << b1[i*channel + 9] << endl;
			  }
		  }
	  }

	  // ****************org data ********************
	  //for (int i = 0; i < countLabel; ++i){
		 // if (label[i] != ignore_label){
			//  caffe_sub(
			//	  channel,
			//	  b0 + i * channel,
			//	  b1 + i * channel,
			//	  diff + i * channel);
			//  Dtype dot = caffe_cpu_dot(channel, diff + i * channel, diff + i * channel);
			//  loss += dot / Dtype(2);
		 // }
	  //}
	  // ***************** ********************

	  top[0]->mutable_cpu_data()[0] = loss;
  }
  else{
	  caffe_sub(
		  count,
		  bottom[0]->cpu_data(),
		  bottom[1]->cpu_data(),
		  diff_.mutable_cpu_data());
	  Dtype dot = caffe_cpu_dot(count, diff_.cpu_data(), diff_.cpu_data());
	  Dtype loss = dot / bottom[0]->num() / Dtype(2);
	  top[0]->mutable_cpu_data()[0] = loss;
  }
}

template <typename Dtype>
void MTCNNEuclideanLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {

	int has_ignore_label = this->layer_param().loss_param().has_ignore_label();
	int ignore_label = has_ignore_label ? this->layer_param().loss_param().ignore_label() : -1;

	if (has_ignore_label){
		const Dtype* b1 = bottom[1]->cpu_data();
		const Dtype* label = bottom[2]->cpu_data();
		int countLabel = bottom[2]->num();
		int channels = bottom[0]->channels();
		//cout << "countLabel_backword: " << countLabel << endl;
		//cout << "channels_backword: " << channels << endl;
		//cout << "ignore_label_backword: " << ignore_label << endl;
		for (int i = 0; i < 2; ++i) {
			if (propagate_down[i]) {
				memset(bottom[i]->mutable_cpu_diff(), 0, sizeof(Dtype)*bottom[i]->count());

				const Dtype sign = (i == 0) ? 1 : -1;
				const Dtype alpha = sign * top[0]->cpu_diff()[0] / bottom[i]->num();

				// bbox regression
				if (channels == 4)
				{
					for (int j = 0; j < countLabel; ++j)
					{
						int dec = (b1[j*channels + 0] != ignore_label) && (b1[j*channels + 1] != ignore_label) && (b1[j*channels + 2] != ignore_label) && (b1[j*channels + 3] != ignore_label);
						if (dec==1)
						{
							caffe_cpu_axpby(
								channels,							// count
								alpha,                              // alpha
								diff_.cpu_data() + channels * j,                   // a
								Dtype(0),                           // beta
								bottom[i]->mutable_cpu_diff() + channels * j);  // b
						}
					}
				}

				// landmark
				else if (channels == 10)
				{
					for (int j = 0; j < countLabel; ++j)
					{
						int dec1 = (b1[j*channels + 0] != ignore_label) && (b1[j*channels + 1] != ignore_label) && (b1[j*channels + 2] != ignore_label) && (b1[j*channels + 3] != ignore_label) && (b1[j*channels + 4] != ignore_label);
						int dec2 = (b1[j*channels + 5] != ignore_label) && (b1[j*channels + 6] != ignore_label) && (b1[j*channels + 7] != ignore_label) && (b1[j*channels + 8] != ignore_label) && (b1[j*channels + 9] != ignore_label);
						if (dec1 == 1 && dec2 == 1)
						{
							caffe_cpu_axpby(
								channels,							// count
								alpha,                              // alpha
								diff_.cpu_data() + channels * j,                   // a
								Dtype(0),                           // beta
								bottom[i]->mutable_cpu_diff() + channels * j);  // b
						}
					}
				}

				// ***********************org data********************
				//for (int j = 0; j < countLabel; ++j){
				//	if (label[j] != ignore_label){
				//		caffe_cpu_axpby(
				//			channels,							// count
				//			alpha,                              // alpha
				//			diff_.cpu_data() + channels * j,                   // a
				//			Dtype(0),                           // beta
				//			bottom[i]->mutable_cpu_diff() + channels * j);  // b
				//	}
				//}


			}
		}
	}
	else{
		for (int i = 0; i < 2; ++i) {
			if (propagate_down[i]) {
				const Dtype sign = (i == 0) ? 1 : -1;
				const Dtype alpha = sign * top[0]->cpu_diff()[0] / bottom[i]->num();
				caffe_cpu_axpby(
					bottom[i]->count(),              // count
					alpha,                              // alpha
					diff_.cpu_data(),                   // a
					Dtype(0),                           // beta
					bottom[i]->mutable_cpu_diff());  // b
			}
		}
	}
}

#ifdef CPU_ONLY
STUB_GPU(MTCNNEuclideanLossLayer);
#endif

INSTANTIATE_CLASS(MTCNNEuclideanLossLayer);
REGISTER_LAYER_CLASS(MTCNNEuclideanLoss);

}  // namespace caffe

相应的 "MTCNNEuclideanLoss.cu"如下：

#include <vector>

#include "caffe/layers/mtcnn_euclidean_loss_layer.hpp"
#include "caffe/util/math_functions.hpp"

#include <iostream>
using namespace std;

namespace caffe {

template <typename Dtype>
void MTCNNEuclideanLossLayer<Dtype>::Reshape(
	const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
	LossLayer<Dtype>::Reshape(bottom, top);
	CHECK_EQ(bottom[0]->count(1), bottom[1]->count(1))
		<< "Inputs must have the same dimension.";
	 
	int has_ignore_label = this->layer_param().loss_param().has_ignore_label();
	if (has_ignore_label)
		CHECK_EQ(bottom.size(), 3) << "has_ignore_label=true but not input label";
	
	if (!has_ignore_label)
		CHECK_EQ(bottom.size(), 2) << "has_ignore_label=false but input mismatch";

	diff_.ReshapeLike(*bottom[0]);
}

template <typename Dtype>
void MTCNNEuclideanLossLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
    const vector<Blob<Dtype>*>& top) {
  int count = bottom[0]->count();
  int has_ignore_label = this->layer_param().loss_param().has_ignore_label();
  int ignore_label = has_ignore_label ? this->layer_param().loss_param().ignore_label() : -1;

  if (has_ignore_label){
	  //label
	  const Dtype* label = bottom[2]->cpu_data();
	  Dtype* diff = diff_.mutable_gpu_data();
	  int countLabel = bottom[2]->num();
	  int channel = bottom[0]->channels();
	  //cout << "ignore_label_forward: " << ignore_label << endl; //
	  caffe_gpu_memset(sizeof(Dtype)*count, 0, diff);

	  const Dtype* b0 = bottom[0]->gpu_data();
	  const Dtype* b1 = bottom[1]->gpu_data();
	  const Dtype* b1_cpu = bottom[1]->cpu_data();
	  Dtype loss = 0;

	  //cout << "channel_forward " << channel << endl;
	  // bbox regression
	  if (channel == 4)
	  {
		  for (int i = 0; i < countLabel; ++i)
		  {
			  //cout << "forware_b1_4: " << b1_cpu[i*channel + 0] << " " << b1_cpu[i*channel + 1] << " " << b1_cpu[i*channel + 2] << " " << b1_cpu[i*channel + 3] << endl;

			  int dec = (b1_cpu[i*channel + 0] != ignore_label) && (b1_cpu[i*channel + 1] != ignore_label) && (b1_cpu[i*channel + 2] != ignore_label) && (b1_cpu[i*channel + 3] != ignore_label);
			  if (dec == 1)
			  {
				  caffe_gpu_sub(
					  channel,
					  b0 + i * channel,
					  b1 + i * channel,
					  diff + i * channel);
				  Dtype dot;
				  caffe_gpu_dot(channel, diff + i * channel, diff + i * channel, &dot);
				  loss += dot / Dtype(2);
			  }
		  }
	  }

	  // landmark
	  else if (channel == 10)
	  {
		  for (int i = 0; i < countLabel; ++i)
		  {
			  int dec1 = (b1_cpu[i*channel + 0] != ignore_label) && (b1_cpu[i*channel + 1] != ignore_label) && (b1_cpu[i*channel + 2] != ignore_label) && (b1_cpu[i*channel + 3] != ignore_label) && (b1_cpu[i*channel + 4] != ignore_label);
			  int dec2 = (b1_cpu[i*channel + 5] != ignore_label) && (b1_cpu[i*channel + 6] != ignore_label) && (b1_cpu[i*channel + 7] != ignore_label) && (b1_cpu[i*channel + 8] != ignore_label) && (b1_cpu[i*channel + 9] != ignore_label);
			  if (dec1 == 1 && dec2 == 1)
			  {
				  caffe_gpu_sub(
					  channel,
					  b0 + i * channel,
					  b1 + i * channel,
					  diff + i * channel);
				  Dtype dot;
				  caffe_gpu_dot(channel, diff + i * channel, diff + i * channel, &dot);
				  loss += dot / Dtype(2);
			  }
		  }
	  }


	  // ***********************org data ********************
	  //for (int i = 0; i < countLabel; ++i){
		 // if (label[i] != ignore_label){
			//  caffe_gpu_sub(
			//	  channel,
			//	  b0 + i * channel,
			//	  b1 + i * channel,
			//	  diff + i * channel);
			//  Dtype dot;
			//  caffe_gpu_dot(channel, diff + i * channel, diff + i * channel, &dot);
			//  loss += dot / Dtype(2);
		 // }
	  //}
	  // ****************************  **********************

	  top[0]->mutable_cpu_data()[0] = loss;
  }
  else{
	  int count = bottom[0]->count();
	  caffe_gpu_sub(
		  count,
		  bottom[0]->gpu_data(),
		  bottom[1]->gpu_data(),
		  diff_.mutable_gpu_data());
	  Dtype dot;
	  caffe_gpu_dot(count, diff_.gpu_data(), diff_.gpu_data(), &dot);
	  Dtype loss = dot / bottom[0]->num() / Dtype(2);
	  top[0]->mutable_cpu_data()[0] = loss;
  }
}

template <typename Dtype>
void MTCNNEuclideanLossLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {

	int has_ignore_label = this->layer_param().loss_param().has_ignore_label();
	int ignore_label = has_ignore_label ? this->layer_param().loss_param().ignore_label() : -1;

	if (has_ignore_label){
		const Dtype* b1 = bottom[1]->cpu_data();
		const Dtype* label = bottom[2]->cpu_data();
		int countLabel = bottom[2]->num();
		int channels = bottom[0]->channels();
		for (int i = 0; i < 2; ++i) {
			if (propagate_down[i]) {
				caffe_gpu_memset(sizeof(Dtype)*bottom[i]->count(), 0, bottom[i]->mutable_gpu_diff());

				const Dtype sign = (i == 0) ? 1 : -1;

				// bbox regression
				if (channels == 4)
				{
					for (int j = 0; j < countLabel; ++j)
					{
						const Dtype alpha = sign * top[0]->cpu_diff()[0];
						int dec = (b1[j*channels + 0] != ignore_label) && (b1[j*channels + 1] != ignore_label) && (b1[j*channels + 2] != ignore_label) && (b1[j*channels + 3] != ignore_label);
						if (dec == 1)
						{
							caffe_gpu_axpby(
								channels,							// count
								alpha,                              // alpha
								diff_.gpu_data() + channels * j,                   // a
								Dtype(0),                           // beta
								bottom[i]->mutable_gpu_diff() + channels * j);  // b
						}
					}
				}

				// landmark
				else if (channels == 10)
				{
					for (int j = 0; j < countLabel; ++j)
					{
						const Dtype alpha = sign * top[0]->cpu_diff()[0];
						int dec1 = (b1[j*channels + 0] != ignore_label) && (b1[j*channels + 1] != ignore_label) && (b1[j*channels + 2] != ignore_label) && (b1[j*channels + 3] != ignore_label) && (b1[j*channels + 4] != ignore_label);
						int dec2 = (b1[j*channels + 5] != ignore_label) && (b1[j*channels + 6] != ignore_label) && (b1[j*channels + 7] != ignore_label) && (b1[j*channels + 8] != ignore_label) && (b1[j*channels + 9] != ignore_label);
						if (dec1 == 1 && dec2 == 1)
						{
							caffe_gpu_axpby(
								channels,							// count
								alpha,                              // alpha
								diff_.gpu_data() + channels * j,                   // a
								Dtype(0),                           // beta
								bottom[i]->mutable_gpu_diff() + channels * j);  // b
						}
					}
				}

				// ******************* org data**********************
				//for (int j = 0; j < countLabel; ++j){
				//	const Dtype alpha = sign * top[0]->cpu_diff()[0];
				//	if (label[j] != ignore_label){
				//		caffe_gpu_axpby(
				//			channels,							// count
				//			alpha,                              // alpha
				//			diff_.gpu_data() + channels * j,                   // a
				//			Dtype(0),                           // beta
				//			bottom[i]->mutable_gpu_diff() + channels * j);  // b
				//	}
				//}



			}
		}
	}
	else{
		for (int i = 0; i < 2; ++i) {
			if (propagate_down[i]) {
				const Dtype sign = (i == 0) ? 1 : -1;
				const Dtype alpha = sign * top[0]->cpu_diff()[0] / bottom[i]->num();
				caffe_gpu_axpby(
					bottom[i]->count(),              // count
					alpha,                              // alpha
					diff_.gpu_data(),                   // a
					Dtype(0),                           // beta
					bottom[i]->mutable_gpu_diff());  // b
			}
		}
	}
}

INSTANTIATE_LAYER_GPU_FUNCS(MTCNNEuclideanLossLayer);

}  // namespace caffe

小计，完成mtcnn_educlidean_loss_layer.cu的修改，发现：const Dtype* b1 = bottom[1]->gpu_data();，gpu读取的数据是不能打印和取出来的，改成cpu模式：const Dtype* b1_cpu = bottom[1]->cpu_data()就行了；

修改MTCNN中caffe源码，时计算loss时以对应标签是否为“-1”判别

猜你喜欢