版权声明:如果感觉写的不错,转载标明出处链接哦~blog.csdn.net/wyg1997 https://blog.csdn.net/wyg1997/article/details/81568134
由于当前任务是量化网络模型,用到了caffe的Ristretto,这里解读一下源码以便以后自己灵活运用
首先看一下量化模型时的脚本:
#!/usr/bin/env sh
folder=examples/ristretto/carperson_wyg
echo ${folder}
LD_LIBRARY_PATH=/usr/local/lib/
./build/tools/ristretto quantize \
--model=${folder}/train.prototxt \
--weights=${folder}/carperson.caffemodel \
--model_quantized=${folder}/quantize/train_quantized.prototxt \
--trimming_mode=dynamic_fixed_point --gpu=0 --iterations=50 \
--error_margin=60
所以我们从tools/ristretto.cpp开始看起,这个代码从main函数开始看:
#include <glog/logging.h>
#include <cstring>
#include <map>
#include <string>
#include <vector>
#include "boost/algorithm/string.hpp"
#include "caffe/caffe.hpp"
#include "caffe/proto/caffe.pb.h"
#include "caffe/util/upgrade_proto.hpp"
#include "ristretto/quantization.hpp"
using caffe::Blob;
using caffe::Caffe;
using caffe::Net;
using caffe::Layer;
using caffe::Solver;
using caffe::shared_ptr;
using caffe::string;
using caffe::Timer;
using caffe::vector;
using std::ostringstream;
//Debug-wyg
using namespace std;
DEFINE_string(model, "",
"The model definition protocol buffer text file..");
DEFINE_string(weights, "",
"The trained weights.");
DEFINE_string(trimming_mode, "",
"Available options: dynamic_fixed_point, minifloat or "
"integer_power_of_2_weights.");
DEFINE_string(model_quantized, "",
"The output path of the quantized net");
DEFINE_string(gpu, "",
"Optional: Run in GPU mode on given device ID.");
DEFINE_int32(iterations, 50,
"Optional: The number of iterations to run.");
DEFINE_double(error_margin, 2,
"Optional: the allowed accuracy drop in %");
// A simple registry for caffe commands.
typedef int (*BrewFunction)();
typedef std::map<caffe::string, BrewFunction> BrewMap;
BrewMap g_brew_map;
#define RegisterBrewFunction(func) \
namespace { \
class __Registerer_##func { \
public: /* NOLINT */ \
__Registerer_##func() { \
g_brew_map[#func] = &func; \
} \
}; \
__Registerer_##func g_registerer_##func; \
}
//wyg: 通过函数名取出函数指针,实现调用
static BrewFunction GetBrewFunction(const caffe::string& name) {
if (g_brew_map.count(name)) {
return g_brew_map[name];
} else {
LOG(ERROR) << "Available ristretto actions:";
for (BrewMap::iterator it = g_brew_map.begin();
it != g_brew_map.end(); ++it) {
LOG(ERROR) << "\t" << it->first;
}
LOG(FATAL) << "Unknown action: " << name;
return NULL; // not reachable, just to suppress old compiler warnings.
}
}
// ristretto commands to call by
// ristretto <command> <args>
//
// To add a command, define a function "int command()" and register it with
// RegisterBrewFunction(action);
// Quantize a 32-bit FP network to smaller word width.
int quantize(){
CHECK_GT(FLAGS_model.size(), 0) << "Need a model definition to score.";
CHECK_GT(FLAGS_weights.size(), 0) << "Need model weights to score.";
CHECK_GT(FLAGS_model_quantized.size(), 0) << "Need network description "
"output path.";
CHECK_GT(FLAGS_trimming_mode.size(), 0) << "Need trimming mode.";
//wyg: 定义指向Quantization类的指针,使用QuantizeNet方法来量化网络
Quantization* q = new Quantization(FLAGS_model, FLAGS_weights,
FLAGS_model_quantized, FLAGS_iterations, FLAGS_trimming_mode,
FLAGS_error_margin, FLAGS_gpu);
q->QuantizeNet();
delete q;
return 0;
}
RegisterBrewFunction(quantize);
int main(int argc, char** argv) {
// Print output to stderr (while still logging).
FLAGS_alsologtostderr = 1;
// Set version
gflags::SetVersionString(AS_STRING(CAFFE_VERSION));
// Usage message.
gflags::SetUsageMessage("command line brew\n"
"usage: ristretto <command> <args>\n\n"
"commands:\n"
" quantize Trim 32bit floating point net\n");
// Run tool or show usage.
// wyg: 通过输出可以看到参数在GlobalInit()中被解析,通过FLAGS_name取出,解析后只保留了第一个参数quantize
cout << "argc: " << argc << endl;
caffe::GlobalInit(&argc, &argv);
cout << "argc: " << argc << endl;
if (argc == 2) {
//wyg: GetBrewFunction返回一个要调用函数的指针来完成调用
return GetBrewFunction(caffe::string(argv[1]))();
} else {
gflags::ShowUsageWithFlagsRestrict(argv[0], "tools/ristretto");
}
}
总结一下,这一段代码的作用就是:解析读入的参数,创建Quantization类的指针,调用QuantizeNet()方法,完成量化工作。
上一个代码只是初步实现一个过程的框架,具体实现量化的代码在src/caffe/quantization.cpp中,我们来看一下,这个我们从构造函数开始,然后再看QuantizeNet()方法:
#include "boost/algorithm/string.hpp"
#include "caffe/caffe.hpp"
#include "ristretto/quantization.hpp"
using caffe::Caffe;
using caffe::Net;
using caffe::string;
using caffe::vector;
using caffe::Blob;
using caffe::LayerParameter;
using caffe::NetParameter;
//Debug-wyg
using namespace std;
//wyg: 这是Quantization类的构造函数,主要工作是加载了类的成员变量
Quantization::Quantization(string model, string weights, string model_quantized,
int iterations, string trimming_mode, double error_margin, string gpus) {
this->model_ = model;
this->weights_ = weights;
this->model_quantized_ = model_quantized;
this->iterations_ = iterations;
this->trimming_mode_ = trimming_mode;
this->error_margin_ = error_margin;
this->gpus_ = gpus;
this->max_bit = 8;
this->min_bit = 8;
// Could possibly improve choice of exponent. Experiments show LeNet needs
// 4bits, but the saturation border is at 3bits (when assuming infinitely long
// mantisssa).
//wyg: 这里对应论文中的第二步,先预留出足够的位数来生成激活层参数
this->exp_bits_ = 4;
}
//wyg: 这个方法是量化的入口函数
void Quantization::QuantizeNet() {
//wyg: model_quantized_是要写出的模型文件的位置,先检查一下写权限
CheckWritePermissions(model_quantized_);
SetGpu();
// Run the reference floating point network on validation set to find baseline
// accuracy.
//wyg: 先跑一个Batch的数据,测出初始的accuracy
//wyg: 定义出caffe网络,载入网络模型和权重
Net<float>* net_val = new Net<float>(model_, caffe::TEST);
net_val->CopyTrainedLayersFrom(weights_);
float accuracy;
//wyg: 这个是Quantization类的正向传播的方法,可以向下搜索先读这个方法
RunForwardBatches(this->iterations_, net_val, &accuracy);
test_score_baseline_ = accuracy;
delete net_val;
// Run the reference floating point network on train data set to find maximum
// values. Do statistic for 10 batches.
Net<float>* net_test = new Net<float>(model_, caffe::TRAIN);
net_test->CopyTrainedLayersFrom(weights_);
// RunForwardBatches(10, net_test, &accuracy, true);
RunForwardBatches(this->iterations_, net_test, &accuracy, true);
delete net_test;
// Do network quantization and scoring.
if (trimming_mode_ == "dynamic_fixed_point") {
Quantize2DynamicFixedPoint();
} else if (trimming_mode_ == "minifloat") {
Quantize2MiniFloat();
} else if (trimming_mode_ == "integer_power_of_2_weights") {
Quantize2IntegerPowerOf2Weights();
} else {
LOG(FATAL) << "Unknown trimming mode: " << trimming_mode_;
}
}
void Quantization::CheckWritePermissions(const string path) {
std::ofstream probe_ofs(path.c_str());
if (probe_ofs.good()) {
probe_ofs.close();
std::remove(path.c_str());
} else {
LOG(FATAL) << "Missing write permissions";
}
}
void Quantization::SetGpu() {
// Parse GPU ids or use all available devices
vector<int> gpus;
if (gpus_ == "all") {
int count = 0;
#ifndef CPU_ONLY
CUDA_CHECK(cudaGetDeviceCount(&count));
#else
NO_GPU;
#endif
for (int i = 0; i < count; ++i) {
gpus.push_back(i);
}
} else if (gpus_.size()) {
vector<string> strings;
boost::split(strings, gpus_, boost::is_any_of(","));
for (int i = 0; i < strings.size(); ++i) {
gpus.push_back(boost::lexical_cast<int>(strings[i]));
}
} else {
CHECK_EQ(gpus.size(), 0);
}
// Set device id and mode
if (gpus.size() != 0) {
LOG(INFO) << "Use GPU with device ID " << gpus[0];
Caffe::SetDevice(gpus[0]);
Caffe::set_mode(Caffe::GPU);
} else {
LOG(INFO) << "Use CPU.";
Caffe::set_mode(Caffe::CPU);
}
}
//wyg: 正向传播指定迭代次数,accuracy以指针的方式传递。
//wyg: 输入参数分别为:迭代次数、caffe网络、要传播的accuracy指针、是否计算层(Conv和InnerProduct)参数范围(默认false)、影响accuracy的层数(默认0)
void Quantization::RunForwardBatches(const int iterations,
Net<float>* caffe_net, float* accuracy, const bool do_stats,
const int score_number) {
LOG(INFO) << "Running for " << iterations << " iterations.";
vector<Blob<float>* > bottom_vec;
vector<int> test_score_output_id;
vector<float> test_score;
float loss = 0;
for (int i = 0; i < iterations; ++i) {
float iter_loss;
// Do forward propagation.
//wyg: 这里可以看一下net.cpp的代码,表示网络正向传播一次,计算出一个batch的loss值,result即网络的输出
//wyg: 注意这里caffe返回的iter_loss值为负,越大越好
const vector<Blob<float>*>& result =
caffe_net->Forward(bottom_vec, &iter_loss);
// Find maximal values in network.
//wyg: 统计Conv层和InnerProduct层的最大最小值
if(do_stats) {//update
//wyg: 这里有个改动:原来的代码不计算bias的范围,但这里我们还想量化bias,所以也计算,照原代码稍作修改即可
caffe_net->RangeInLayers(&layer_names_, &max_in_, &max_out_,
&max_params_, &max_params_bias_);
}
// Keep track of network score over multiple batches.
loss += iter_loss;
int idx = 0;
//wyg: 遍历网络传播结果
//Debug-wyg
//cout << "result.size(): " << result.size() << endl;
//cout << "result[0].count(): " << result[0]->count() << endl;
for (int j = 0; j < result.size(); ++j) {
//wyg: 实际上这里Blob调用的是SyncedMemory类的cpu_data()方法,这里只要理解为读取该层的数据就行
const float* result_vec = result[j]->cpu_data();
//wyg: 然后处理每一层的数据
for (int k = 0; k < result[j]->count(); ++k, ++idx) {
//wyg: i -> 第i次迭代
//wyg: j -> 网络第j层
//wyg: k -> 第k个数据的值
//wyg: idx -> 每次迭代读取的第idx个数
printf("i,j,k,idx=%d,%d,%d,%d\n",i,j,k,idx);
const float score = result_vec[k];
printf("score = result_vec[%d] = %f\n",k,score);
//wyg: 对网络的每一个输出进行累加
if (i == 0) {
test_score.push_back(score);
test_score_output_id.push_back(j);
} else {
test_score[idx] += score;
}
//wyg: 输出每一组Batch传播得到的输出值以及其名称,例如:mbox_loss
const std::string& output_name = caffe_net->blob_names()[
caffe_net->output_blob_indices()[j]];
LOG(INFO) << "Batch " << i << ", " << output_name << " = " << score;
//Debug-wyg
printf("content in test_score\n");
for(int ii=0 ; ii<test_score.size() ; ii++){
printf("%d %f, ",ii,test_score[ii]);
}
printf("\n");
}
}
}
//output the result
//wyg: 计算并输出平均loss
loss /= iterations;
LOG(INFO) << "Loss: " << loss;
//wyg: 下面计算的是每一张图片的loss,但结果是正数,所以最后我们还换算为负数,方便和上面的loss比较
for (int i = 0; i < test_score.size(); ++i) {
const std::string& output_name = caffe_net->blob_names()[
caffe_net->output_blob_indices()[test_score_output_id[i]]];
const float loss_weight = caffe_net->blob_loss_weights()[
caffe_net->output_blob_indices()[test_score_output_id[i]]];
std::ostringstream loss_msg_stream;
const float mean_score = test_score[i] / iterations;
if (loss_weight) {
loss_msg_stream << " (* " << loss_weight
<< " = " << loss_weight * mean_score << " loss)";
}
LOG(INFO) << output_name << " = " << mean_score << " score_number= "<<score_number<<" set accuracy= "<<test_score[score_number] / iterations<<" "<< loss_msg_stream.str();
}
*accuracy = test_score[score_number] / iterations;
//added by starimpact
*accuracy *= -1;
}
void Quantization::Quantize2DynamicFixedPoint() {
// Find the integer length for dynamic fixed point numbers.
// The integer length is chosen such that no saturation occurs.
// This approximation assumes an infinitely long factional part.
// For layer activations, we reduce the integer length by one bit.
for(int i=0;i<40;i++){
printf("step1 \n");
}
for (int i = 0; i < layer_names_.size(); ++i) {
il_in_.push_back((int)ceil(log2(max_in_[i])));
il_out_.push_back((int)ceil(log2(max_out_[i])));
il_params_.push_back((int)ceil(log2(max_params_[i])+1));
il_params_bias_.push_back((int)ceil(log2(max_params_bias_[i])+1));
}
// Debug
for (int k = 0; k < layer_names_.size(); ++k) {
LOG(INFO) << "Layer " << layer_names_[k] <<
", max value input=" << max_in_[k] <<
", max value output=" << max_out_[k] <<
", max value parameters=" << max_params_[k] <<
", max value parameters bias=" << max_params_bias_[k];
}
// Debug
for (int k = 0; k < layer_names_.size(); ++k) {
LOG(INFO) << "Layer " << layer_names_[k] <<
", integer length input=" << il_in_[k] <<
", integer length output=" << il_out_[k] <<
", integer length parameters=" << il_params_[k] <<
", integer length parameters bias=" << il_params_bias_[k];
}
//return;
NetParameter param;
float accuracy;
Net<float>* net_test;
// Score net with dynamic fixed point layer activations.
// The rest of the net remains in high precision format.
for(int i=0;i<40;i++){
printf("step4 Score net with dynamic fixed point layer activations. \n");
}
caffe::ReadNetParamsFromTextFileOrDie(model_, ¶m);
param.mutable_state()->set_phase(caffe::TEST);
vector<int> test_bw_layer_activations;
vector<float> test_scores_layer_activations;
for (int bitwidth = max_bit; bitwidth >= min_bit; bitwidth /= 2) {
for(int h=0;h<60;h++){
printf("step4 ------now the bitwidth is %d\n",bitwidth);
}
EditNetDescriptionDynamicFixedPoint(¶m, "Convolution_and_InnerProduct",
"Activations", -1, -1, bitwidth, bitwidth, -1, -1);
net_test = new Net<float>(param);
net_test->CopyTrainedLayersFrom(weights_);
RunForwardBatches(iterations_, net_test, &accuracy);
test_bw_layer_activations.push_back(bitwidth);
test_scores_layer_activations.push_back(accuracy);
delete net_test;
if ( accuracy + error_margin_ / 100 < test_score_baseline_ ) {
EditNetDescriptionDynamicFixedPoint(¶m, "Convolution_and_InnerProduct","Activations", -1, -1, max_bit, min_bit, -1, -1);
break;
}
}
// Score net with dynamic fixed point convolution parameters.
// The rest of the net remains in high precision format.
for(int i=0;i<40;i++){
printf("step2 Score net with dynamic fixed point convolution parameters \n");
}
caffe::ReadNetParamsFromTextFileOrDie(model_, ¶m);
param.mutable_state()->set_phase(caffe::TEST);
vector<int> test_bw_conv_params;
vector<float> test_scores_conv_params;
for (int bitwidth = max_bit; bitwidth >= min_bit; bitwidth /= 2) {
for(int h=0;h<60;h++){
printf("step2 ------now the bitwidth is %d\n",bitwidth);
}
EditNetDescriptionDynamicFixedPoint(¶m, "Convolution", "Parameters",
bitwidth, -1, -1, -1, -1, -1);
net_test = new Net<float>(param);
net_test->CopyTrainedLayersFrom(weights_);
RunForwardBatches(iterations_, net_test, &accuracy);
test_bw_conv_params.push_back(bitwidth);
test_scores_conv_params.push_back(accuracy);
delete net_test;
if ( accuracy + error_margin_ / 100 < test_score_baseline_ ) {
EditNetDescriptionDynamicFixedPoint(¶m, "Convolution", "Parameters", max_bit, -1, -1, -1, -1, -1);
break;
}
}
// Score net with dynamic fixed point convolution parameters bias.
// The rest of the net remains in high precision format.
for(int i=0;i<40;i++){
printf("step2_bias Score net with dynamic fixed point convolution parameters bias\n");
}
caffe::ReadNetParamsFromTextFileOrDie(model_, ¶m);
param.mutable_state()->set_phase(caffe::TEST);
vector<int> test_bw_conv_params_bias;
vector<float> test_scores_conv_params_bias;
for (int bitwidth = max_bit; bitwidth >= min_bit; bitwidth /= 2) {
for(int h=0;h<60;h++){
printf("step2_bias ------now the bitwidth is %d\n",bitwidth);
}
EditNetDescriptionDynamicFixedPoint(¶m, "Convolution", "Bias",
-1, -1, -1, -1, bitwidth, -1);
net_test = new Net<float>(param);
net_test->CopyTrainedLayersFrom(weights_);
RunForwardBatches(iterations_, net_test, &accuracy);
test_bw_conv_params_bias.push_back(bitwidth);
test_scores_conv_params_bias.push_back(accuracy);
delete net_test;
if ( accuracy + error_margin_ / 100 < test_score_baseline_ ) {
EditNetDescriptionDynamicFixedPoint(¶m, "Convolution", "Bias", -1, -1, -1, -1, max_bit, -1);
break;
}
}
// Score net with dynamic fixed point inner product parameters.
// The rest of the net remains in high precision format.
for(int i=0;i<40;i++){
printf("step3 Score net with dynamic fixed point inner product parameters\n");
}
caffe::ReadNetParamsFromTextFileOrDie(model_, ¶m);
param.mutable_state()->set_phase(caffe::TEST);
vector<int> test_bw_fc_params;
vector<float> test_scores_fc_params;
for (int bitwidth = max_bit; bitwidth >= min_bit; bitwidth /= 2) {
for(int h=0;h<60;h++){
printf("step3 ------now the bitwidth is %d\n",bitwidth);
}
EditNetDescriptionDynamicFixedPoint(¶m, "InnerProduct", "Parameters",
-1, bitwidth, -1, -1, -1, -1);
net_test = new Net<float>(param);
net_test->CopyTrainedLayersFrom(weights_);
RunForwardBatches(iterations_, net_test, &accuracy);
test_bw_fc_params.push_back(bitwidth);
test_scores_fc_params.push_back(accuracy);
delete net_test;
if ( accuracy + error_margin_ / 100 < test_score_baseline_ ) {
EditNetDescriptionDynamicFixedPoint(¶m, "InnerProduct", "Parameters", -1, max_bit, -1, -1, -1, -1);
break;
}
}
// Score net with dynamic fixed point inner product parameters bias.
// The rest of the net remains in high precision format.
for(int i=0;i<40;i++){
printf("step3_bias Score net with dynamic fixed point inner product parameters bias\n");
}
caffe::ReadNetParamsFromTextFileOrDie(model_, ¶m);
param.mutable_state()->set_phase(caffe::TEST);
vector<int> test_bw_fc_params_bias;
vector<float> test_scores_fc_params_bias;
for (int bitwidth = max_bit; bitwidth >= min_bit; bitwidth /= 2) {
for(int h=0;h<60;h++){
printf("step3_bias ------now the bitwidth is %d\n",bitwidth);
}
EditNetDescriptionDynamicFixedPoint(¶m, "InnerProduct", "Bias",
-1, -1, -1, -1, -1, bitwidth);
net_test = new Net<float>(param);
net_test->CopyTrainedLayersFrom(weights_);
RunForwardBatches(iterations_, net_test, &accuracy);
test_bw_fc_params_bias.push_back(bitwidth);
test_scores_fc_params_bias.push_back(accuracy);
delete net_test;
if ( accuracy + error_margin_ / 100 < test_score_baseline_ ) {
EditNetDescriptionDynamicFixedPoint(¶m, "InnerProduct", "Bias",-1, -1, -1, -1, -1, max_bit);
break;
}
}
// Choose bit-width for different network parts
for(int i=0;i<40;i++){
printf("step5 bit-width for different network parts \n");
}
bw_conv_params_ = max_bit;
bw_conv_params_bias_ = max_bit;
bw_fc_params_ = max_bit;
bw_fc_params_bias_ = max_bit;
bw_out_ = max_bit;
for (int i = 0; i < test_scores_conv_params.size(); ++i) {
if (test_scores_conv_params[i] + error_margin_ / 100 >=
test_score_baseline_)
bw_conv_params_ = test_bw_conv_params[i];
else
break;
}
for (int i = 0; i < test_scores_conv_params_bias.size(); ++i) {
if (test_scores_conv_params_bias[i] + error_margin_ / 100 >=
test_score_baseline_)
bw_conv_params_bias_ = test_bw_conv_params_bias[i];
else
break;
}
for (int i = 0; i < test_scores_fc_params.size(); ++i) {
if (test_scores_fc_params[i] + error_margin_ / 100 >=
test_score_baseline_)
bw_fc_params_ = test_bw_fc_params[i];
else
break;
}
for (int i = 0; i < test_scores_fc_params_bias.size(); ++i) {
if (test_scores_fc_params_bias[i] + error_margin_ / 100 >=
test_score_baseline_)
bw_fc_params_bias_ = test_bw_fc_params_bias[i];
else
break;
}
for (int i = 0; i < test_scores_layer_activations.size(); ++i) {
if (test_scores_layer_activations[i] + error_margin_ / 100 >=
test_score_baseline_)
bw_out_ = test_bw_layer_activations[i];
else
break;
}
bw_in_ = bw_out_;
// Score dynamic fixed point network.
// This network combines dynamic fixed point parameters in convolutional and
// inner product layers, as well as dynamic fixed point activations.
for(int i=0;i<40;i++){
printf("step6 Score dynamic fixed point network. \n");
}
caffe::ReadNetParamsFromTextFileOrDie(model_, ¶m);
param.mutable_state()->set_phase(caffe::TEST);
EditNetDescriptionDynamicFixedPoint(¶m, "Convolution_and_InnerProduct",
"Parameters_and_Activations_and_Bias", bw_conv_params_, bw_fc_params_, bw_in_,
bw_out_, bw_conv_params_bias_, bw_fc_params_bias_);
net_test = new Net<float>(param);
net_test->CopyTrainedLayersFrom(weights_);
RunForwardBatches(iterations_, net_test, &accuracy);
delete net_test;
param.release_state();
WriteProtoToTextFile(param, model_quantized_);
for(int i=0;i<40;i++){
printf("step7 result \n");
}
// Write summary of dynamic fixed point analysis to log
LOG(INFO) << "------------------------------";
LOG(INFO) << "Network accuracy analysis for";
LOG(INFO) << "Convolutional (CONV) and fully";
LOG(INFO) << "connected (FC) layers.";
LOG(INFO) << "Baseline 32bit float: " << test_score_baseline_;
LOG(INFO) << "Dynamic fixed point CONV";
LOG(INFO) << "weights: ";
for (int j = 0; j < test_scores_conv_params.size(); ++j) {
LOG(INFO) << test_bw_conv_params[j] << "bit: \t" <<
test_scores_conv_params[j];
}
LOG(INFO) << "bias: ";
for (int j = 0; j < test_scores_conv_params_bias.size(); ++j) {
LOG(INFO) << test_bw_conv_params_bias[j] << "bit: \t" <<
test_scores_conv_params_bias[j];
}
LOG(INFO) << "Dynamic fixed point FC";
LOG(INFO) << "weights: ";
for (int j = 0; j < test_scores_fc_params.size(); ++j) {
LOG(INFO) << test_bw_fc_params[j] << "bit: \t" << test_scores_fc_params[j];
}
LOG(INFO) << "bias: ";
for (int j = 0; j < test_scores_fc_params_bias.size(); ++j) {
LOG(INFO) << test_bw_fc_params_bias[j] << "bit: \t" << test_scores_fc_params_bias[j];
}
LOG(INFO) << "Dynamic fixed point layer";
LOG(INFO) << "activations:";
for (int j = 0; j < test_scores_layer_activations.size(); ++j) {
LOG(INFO) << test_bw_layer_activations[j] << "bit: \t" <<
test_scores_layer_activations[j];
}
LOG(INFO) << "Dynamic fixed point net:";
LOG(INFO) << bw_conv_params_ << "bit CONV weights,";
LOG(INFO) << bw_conv_params_bias_ << "bit CONV bias,";
LOG(INFO) << bw_fc_params_ << "bit FC weights,";
LOG(INFO) << bw_fc_params_bias_ << "bit FC bias,";
LOG(INFO) << bw_out_ << "bit layer activations:";
LOG(INFO) << "Accuracy: " << accuracy;
LOG(INFO) << "Please fine-tune.";
}
void Quantization::Quantize2MiniFloat() {
// Find the necessary amount of exponent bits.
// The exponent bits are chosen such that no saturation occurs.
// This approximation assumes an infinitely long mantissa.
// Parameters are ignored, since they are normally smaller than layer
// activations.
for ( int i = 0; i < layer_names_.size(); ++i ) {
int exp_in = ceil(log2(log2(max_in_[i]) - 1) + 1);
int exp_out = ceil(log2(log2(max_out_[i]) - 1) + 1);
exp_bits_ = std::max( std::max( exp_bits_, exp_in ), exp_out);
}
// Score net with minifloat parameters and activations.
NetParameter param;
caffe::ReadNetParamsFromTextFileOrDie(model_, ¶m);
param.mutable_state()->set_phase(caffe::TEST);
vector<int> test_bitwidth;
vector<float> test_scores;
float accuracy;
Net<float>* net_test;
// Test the net with different bit-widths
for (int bitwidth = 16; bitwidth - 1 - exp_bits_ > 0; bitwidth /= 2) {
EditNetDescriptionMiniFloat(¶m, bitwidth);
net_test = new Net<float>(param);
net_test->CopyTrainedLayersFrom(weights_);
RunForwardBatches(iterations_, net_test, &accuracy);
test_bitwidth.push_back(bitwidth);
test_scores.push_back(accuracy);
delete net_test;
if ( accuracy + error_margin_ / 100 < test_score_baseline_ ) break;
}
// Choose bitwidth for network
int best_bitwidth = 32;
for(int i = 0; i < test_scores.size(); ++i) {
if (test_scores[i] + error_margin_ / 100 >= test_score_baseline_)
best_bitwidth = test_bitwidth[i];
else
break;
}
// Write prototxt file of net with best bitwidth
caffe::ReadNetParamsFromTextFileOrDie(model_, ¶m);
EditNetDescriptionMiniFloat(¶m, best_bitwidth);
WriteProtoToTextFile(param, model_quantized_);
// Write summary of minifloat analysis to log
LOG(INFO) << "------------------------------";
LOG(INFO) << "Network accuracy analysis for";
LOG(INFO) << "Convolutional (CONV) and fully";
LOG(INFO) << "connected (FC) layers.";
LOG(INFO) << "Baseline 32bit float: " << test_score_baseline_;
LOG(INFO) << "Minifloat net:";
for(int j = 0; j < test_scores.size(); ++j) {
LOG(INFO) << test_bitwidth[j] << "bit: \t" << test_scores[j];
}
LOG(INFO) << "Please fine-tune.";
}
void Quantization::Quantize2IntegerPowerOf2Weights() {
// Find the integer length for dynamic fixed point numbers.
// The integer length is chosen such that no saturation occurs.
// This approximation assumes an infinitely long factional part.
// For layer activations, we reduce the integer length by one bit.
for (int i = 0; i < layer_names_.size(); ++i) {
il_in_.push_back((int)ceil(log2(max_in_[i])));
il_out_.push_back((int)ceil(log2(max_out_[i])));
}
// Score net with integer-power-of-two weights and dynamic fixed point
// activations.
NetParameter param;
caffe::ReadNetParamsFromTextFileOrDie(model_, ¶m);
param.mutable_state()->set_phase(caffe::TEST);
float accuracy;
Net<float>* net_test;
EditNetDescriptionIntegerPowerOf2Weights(¶m);
// Bit-width of layer activations is hard-coded to 8-bit.
EditNetDescriptionDynamicFixedPoint(¶m, "Convolution_and_InnerProduct",
"Activations", -1, -1, 8, 8, -1, -1);
net_test = new Net<float>(param);
net_test->CopyTrainedLayersFrom(weights_);
RunForwardBatches(iterations_, net_test, &accuracy);
delete net_test;
// Write prototxt file of quantized net
param.release_state();
WriteProtoToTextFile(param, model_quantized_);
// Write summary of integer-power-of-2-weights analysis to log
LOG(INFO) << "------------------------------";
LOG(INFO) << "Network accuracy analysis for";
LOG(INFO) << "Integer-power-of-two weights";
LOG(INFO) << "in Convolutional (CONV) and";
LOG(INFO) << "fully connected (FC) layers.";
LOG(INFO) << "Baseline 32bit float: " << test_score_baseline_;
LOG(INFO) << "Quantized net:";
LOG(INFO) << "4bit: \t" << accuracy;
LOG(INFO) << "Please fine-tune.";
}
void Quantization::EditNetDescriptionDynamicFixedPoint(NetParameter* param,
const string layers_2_quantize, const string net_part, const int bw_conv,
const int bw_fc, const int bw_in, const int bw_out, const int bw_conv_bias, const int bw_fc_bias) {
for (int i = 0; i < param->layer_size(); ++i) {
// if this is a convolutional layer which should be quantized ...
if (layers_2_quantize.find("Convolution") != string::npos &&
param->layer(i).type().find("Convolution") != string::npos) {
// quantize parameters
if (net_part.find("Parameters") != string::npos) {
LayerParameter* param_layer = param->mutable_layer(i);
param_layer->set_type("ConvolutionRistretto");
param_layer->mutable_quantization_param()->set_fl_params(bw_conv -
GetIntegerLengthParams(param->layer(i).name()));
param_layer->mutable_quantization_param()->set_bw_params(bw_conv);
}
// quantize parameters bias
if (net_part.find("Bias") != string::npos) {
LayerParameter* param_layer = param->mutable_layer(i);
param_layer->set_type("ConvolutionRistretto");
param_layer->mutable_quantization_param()->set_fl_params_bias(bw_conv_bias -
GetIntegerLengthParamsBias(param->layer(i).name()));
param_layer->mutable_quantization_param()->set_bw_params_bias(bw_conv_bias);
}
// quantize activations
if (net_part.find("Activations") != string::npos) {
LayerParameter* param_layer = param->mutable_layer(i);
param_layer->set_type("ConvolutionRistretto");
param_layer->mutable_quantization_param()->set_fl_layer_in(bw_in -
GetIntegerLengthIn(param->layer(i).name()));
param_layer->mutable_quantization_param()->set_bw_layer_in(bw_in);
param_layer->mutable_quantization_param()->set_fl_layer_out(bw_out -
GetIntegerLengthOut(param->layer(i).name()));
param_layer->mutable_quantization_param()->set_bw_layer_out(bw_out);
}
}
// if this is an inner product layer which should be quantized ...
if (layers_2_quantize.find("InnerProduct") != string::npos &&
(param->layer(i).type().find("InnerProduct") != string::npos ||
param->layer(i).type().find("FcRistretto") != string::npos)) {
// quantize parameters
if (net_part.find("Parameters") != string::npos) {
LayerParameter* param_layer = param->mutable_layer(i);
param_layer->set_type("FcRistretto");
param_layer->mutable_quantization_param()->set_fl_params(bw_fc -
GetIntegerLengthParams(param->layer(i).name()));
param_layer->mutable_quantization_param()->set_bw_params(bw_fc);
}
// quantize parameters bias
if (net_part.find("Bias") != string::npos) {
LayerParameter* param_layer = param->mutable_layer(i);
param_layer->set_type("FcRistretto");
param_layer->mutable_quantization_param()->set_fl_params_bias(bw_fc_bias -
GetIntegerLengthParamsBias(param->layer(i).name()));
param_layer->mutable_quantization_param()->set_bw_params_bias(bw_fc_bias);
}
// quantize activations
if (net_part.find("Activations") != string::npos) {
LayerParameter* param_layer = param->mutable_layer(i);
param_layer->set_type("FcRistretto");
param_layer->mutable_quantization_param()->set_fl_layer_in(bw_in -
GetIntegerLengthIn(param->layer(i).name()) );
param_layer->mutable_quantization_param()->set_bw_layer_in(bw_in);
param_layer->mutable_quantization_param()->set_fl_layer_out(bw_out -
GetIntegerLengthOut(param->layer(i).name()) );
param_layer->mutable_quantization_param()->set_bw_layer_out(bw_out);
}
}
}
}
void Quantization::EditNetDescriptionMiniFloat(NetParameter* param,
const int bitwidth) {
caffe::QuantizationParameter_Precision precision =
caffe::QuantizationParameter_Precision_MINIFLOAT;
for (int i = 0; i < param->layer_size(); ++i) {
if ( param->layer(i).type() == "Convolution" ||
param->layer(i).type() == "ConvolutionRistretto") {
LayerParameter* param_layer = param->mutable_layer(i);
param_layer->set_type("ConvolutionRistretto");
param_layer->mutable_quantization_param()->set_precision(precision);
param_layer->mutable_quantization_param()->set_mant_bits(bitwidth
- exp_bits_ - 1);
param_layer->mutable_quantization_param()->set_exp_bits(exp_bits_);
} else if ( param->layer(i).type() == "InnerProduct" ||
param->layer(i).type() == "FcRistretto") {
LayerParameter* param_layer = param->mutable_layer(i);
param_layer->set_type("FcRistretto");
param_layer->mutable_quantization_param()->set_precision(precision);
param_layer->mutable_quantization_param()->set_mant_bits(bitwidth
- exp_bits_ - 1);
param_layer->mutable_quantization_param()->set_exp_bits(exp_bits_);
}
}
}
void Quantization::EditNetDescriptionIntegerPowerOf2Weights(
NetParameter* param) {
caffe::QuantizationParameter_Precision precision =
caffe::QuantizationParameter_Precision_INTEGER_POWER_OF_2_WEIGHTS;
for (int i = 0; i < param->layer_size(); ++i) {
if ( param->layer(i).type() == "Convolution" ||
param->layer(i).type() == "ConvolutionRistretto") {
LayerParameter* param_layer = param->mutable_layer(i);
param_layer->set_type("ConvolutionRistretto");
param_layer->mutable_quantization_param()->set_precision(precision);
// Weights are represented as 2^e where e in [-8,...,-1].
// This choice of exponents works well for AlexNet.
param_layer->mutable_quantization_param()->set_exp_min(-8);
param_layer->mutable_quantization_param()->set_exp_max(-1);
} else if ( param->layer(i).type() == "InnerProduct" ||
param->layer(i).type() == "FcRistretto") {
LayerParameter* param_layer = param->mutable_layer(i);
param_layer->set_type("FcRistretto");
param_layer->mutable_quantization_param()->set_precision(precision);
// Weights are represented as 2^e where e in [-8,...,-1].
// This choice of exponents works well for AlexNet.
param_layer->mutable_quantization_param()->set_exp_min(-8);
param_layer->mutable_quantization_param()->set_exp_max(-1);
}
}
}
int Quantization::GetIntegerLengthParams(const string layer_name) {
int pos = find(layer_names_.begin(), layer_names_.end(), layer_name)
- layer_names_.begin();
return il_params_[pos];
}
int Quantization::GetIntegerLengthParamsBias(const string layer_name) {
int pos = find(layer_names_.begin(), layer_names_.end(), layer_name)
- layer_names_.begin();
return il_params_bias_[pos];
}
int Quantization::GetIntegerLengthIn(const string layer_name) {
int pos = find(layer_names_.begin(), layer_names_.end(), layer_name)
- layer_names_.begin();
return il_in_[pos];
}
int Quantization::GetIntegerLengthOut(const string layer_name) {
int pos = find(layer_names_.begin(), layer_names_.end(), layer_name)
- layer_names_.begin();
return il_out_[pos];
}
未完待续……