http://www.infocool.net/kb/WWW/201703/317548.html
-
[
net]
-
batch=
64 每batch个样本更新一次参数。
-
subdivisions=
8 如果内存不够大,将batch分割为subdivisions个子batch,每个子batch的大小为batch/subdivisions。
-
在darknet代码中,会将batch/subdivisions命名为batch。
-
height=
416 input图像的高
-
width=
416 Input图像的宽
-
channels=
3 Input图像的通道数
-
momentum=
0.9 动量
-
decay=
0.0005 权重衰减正则项,防止过拟合
-
angle=
0 通过旋转角度来生成更多训练样本
-
saturation =
1.5 通过调整饱和度来生成更多训练样本
-
exposure =
1.5 通过调整曝光量来生成更多训练样本
-
hue=
.1 通过调整色调来生成更多训练样本
-
-
learning_rate=
0.0001 初始学习率
-
max_batches =
45000 训练达到max_batches后停止学习
-
policy=steps 调整学习率的policy,有如下policy:CONSTANT, STEP, EXP, POLY, STEPS, SIG, RANDOM
-
steps=
100,
25000,
35000 根据batch_num调整学习率
-
scales=
10,
.1,
.1 学习率变化的比例,累计相乘
-
-
[
convolutional]
-
batch_normalize=
1 是否做BN
-
filters=
32 输出多少个特征图
-
size=
3 卷积核的尺寸
-
stride=
1 做卷积运算的步长
-
pad=
1 如果pad为
0,padding由 padding参数指定。如果pad为
1,padding大小为size/
2
-
activation=leaky 激活函数:
-
logistic,loggy,relu,elu,relie,plse,hardtan,lhtan,linear,ramp,leaky,tanh,stair
-
-
[
maxpool]
-
size=
2 池化层尺寸
-
stride=
2 池化步进
-
-
[
convolutional]
-
batch_normalize=
1
-
filters=
64
-
size=
3
-
stride=
1
-
pad=
1
-
activation=leaky
-
-
[
maxpool]
-
size=
2
-
stride=
2
-
-
......
-
......
-
-
-
#######
-
-
[
convolutional]
-
batch_normalize=
1
-
size=
3
-
stride=
1
-
pad=
1
-
filters=
1024
-
activation=leaky
-
-
[
convolutional]
-
batch_normalize=
1
-
size=
3
-
stride=
1
-
pad=
1
-
filters=
1024
-
activation=leaky
-
-
[
route] the route layer
is to bring finer grained features
in
from earlier
in the network
-
layers=
-9
-
-
[
reorg] the reorg layer
is to make these features match the feature map size at the later layer.
-
The end feature map
is
13x13, the feature map
from earlier
is
26x26x512.
-
The reorg layer maps the
26x26x512 feature map onto a
13x13x2048 feature map
-
so that it can be concatenated with the feature maps at
13x13 resolution.
-
stride=
2
-
-
[
route]
-
layers=
-1,
-3
-
-
[
convolutional]
-
batch_normalize=
1
-
size=
3
-
stride=
1
-
pad=
1
-
filters=
1024
-
activation=leaky
-
-
[
convolutional]
-
size=
1
-
stride=
1
-
pad=
1
-
filters=
125 region前最后一个卷积层的filters数是特定的,计算公式为filter=num*(classes+
5)
-
5的意义是
5个坐标,论文中的tx,ty,tw,th,to
-
activation=linear
-
-
[
region]
-
anchors =
1.08,
1.19,
3.42,
4.41,
6.63,
11.38,
9.42,
5.11,
16.62,
10.52 预选框,可以手工挑选,
-
也可以通过k means 从训练样本中学出
-
bias_match=
1
-
classes=
20 网络需要识别的物体种类数
-
coords=
4 每个box的
4个坐标tx,ty,tw,th
-
num=
5 每个grid cell预测几个box
-
softmax=
1 使用softmax做激活函数
-
jitter=
.2 通过抖动增加噪声来抑制过拟合
-
rescore=
1 暂理解为一个开关,非
0时通过重打分来调整l.delta(预测值与真实值的差)
-
-
object_scale=
5 暂理解为计算损失时预测框中有物体时的权重
-
noobject_scale=
1 暂理解为计算损失时预测框中无物体时的权重
-
class_scale=
1 暂理解为计算类别损失时的权重
-
coord_scale=
1 暂理解为计算损失时坐标偏差的权重
-
-
absolute=
1
-
thresh =
.6
-
random=
0 是否随机确定最后一个预测框
darknet对应代码
找到cfg文件解析的代码,选择detector demo 作为入口
darknet.c文件 main 函数开始
-
}
else
if (
0 ==
strcmp(argv[
1],
"detector")){
-
run_detector(argc, argv);
Detector.c文件 run_detector函数
-
char *prefix = find_char_arg(argc, argv,
"-prefix",
0);
-
float thresh = find_float_arg(argc, argv,
"-thresh",
.24);
-
float hier_thresh = find_float_arg(argc, argv,
"-hier",
.5);
-
int cam_index = find_int_arg(argc, argv,
"-c",
0);
-
int frame_skip = find_int_arg(argc, argv,
"-s",
0);
-
if(argc <
4){
-
fprintf(
stderr,
"usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[
0], argv[
1]);
-
return;
-
}
-
char *gpu_list = find_char_arg(argc, argv,
"-gpus",
0);
-
char *outfile = find_char_arg(argc, argv,
"-out",
0);
-
-
......
-
......
-
-
else
if(
0==
strcmp(argv[
2],
"demo")) {
-
list *options = read_data_cfg(datacfg);
-
int classes = option_find_int(options,
"classes",
20);
-
char *name_list = option_find_str(options,
"names",
"data/names.list");
-
char **names = get_labels(name_list);
-
demo(cfg, weights, thresh, cam_index, filename, names, classes, frame_skip, prefix, hier_thresh);
-
}
read_data_cfg函数解析配置文件,保存到options指针。
class
int classes = option_find_int(options, "classes", 20);
classes为YOLO可识别的种类数
batch、learning_rate、momentum、decay和 subdivisions
demo.c文件demo函数
net = parse_network_cfg(cfgfile);
Parser.c文件 parse_network_cfg函数
-
list *sections = read_cfg(filename);
-
node *n = sections->front;
-
if(!n) error(
"Config file has no sections");
-
network net = make_network(sections->size -
1);
-
net.gpu_index = gpu_index;
-
size_params params;
-
-
section *s = (section *)n->val;
-
list *options = s->options;
-
if(!is_network(s)) error(
"First section must be [net] or [network]");
-
parse_net_options(options, &net);
parse_net_options函数
-
net->batch = option_find_int(options,
"batch",
1);
-
net->learning_rate = option_find_float(options,
"learning_rate",
.001);
-
net->momentum = option_find_float(options,
"momentum",
.9);
-
net->decay = option_find_float(options,
"decay",
.0001);
-
int subdivs = option_find_int(options,
"subdivisions",
1);
-
net->time_steps = option_find_int_quiet(options,
"time_steps",
1);
-
net->batch /= subdivs;
-
net->batch *= net->time_steps;
-
net->subdivisions = subdivs;
learning_rate为初始学习率,训练时的真正学习率和学习率的策略及初始学习率有关。
momentum为动量,在训练时加入动量可以帮助走出local minima 以及saddle point。
decay是权重衰减正则项,用来防止过拟合。
batch的值等于cfg文件中的batch/subdivisions 再乘以time_steps。
time_steps在yolo默认的cfg中是没有配置的,所以是默认值1。
因此batch可以认为就是cfg文件中的batch/subdivisions。
前面有提到batch的意义是每batch个样本更新一次参数。
而subdivisions的意义在于降低对GPU memory的要求。
darknet将batch分割为subdivisions个子batch,每个子batch的大小为batch/subdivisions,并将子batch命名为batch。
我们看下训练时和batch有关的代码
Detector.c文件的train_detector函数
-
#ifdef GPU
-
if(ngpus ==
1){
-
loss = train_network(net, train);
-
}
else {
-
loss = train_networks(nets, ngpus, train,
4);
-
}
-
#else
-
loss = train_network(net, train);
-
#endif
Network.c文件的train_network函数
-
int batch = net.batch;
-
int n = d.X.rows / batch;
-
float *X =
calloc(batch*d.X.cols,
sizeof(
float));
-
float *y =
calloc(batch*d.y.cols,
sizeof(
float));
-
-
int i;
-
float sum =
0;
-
for(i =
0; i < n; ++i){
-
get_next_batch(d, batch, i*batch, X, y);
-
float err = train_network_datum(net, X, y);
-
sum += err;
-
}
train_network_datum函数
-
*net.seen += net.batch;
-
......
-
......
-
forward_network(net,
state);
-
backward_network(net,
state);
-
float error = get_network_cost(net);
-
if(((*net.seen)/net.batch)%net.subdivisions ==
0) update_network(net);
我们看到,只有((*net.seen)/net.batch)%net.subdivisions == 0时才会更新网络参数。
*net.seen是已经训练过的子batch数,((*net.seen)/net.batch)%net.subdivisions的意义正是已经训练过了多少个真正的batch。
policy、steps和scales
Parser.c文件 parse_network_cfg函数
-
char *policy_s = option_find_str(options,
"policy",
"constant");
-
net->policy = get_policy(policy_s);
-
net->burn_in = option_find_int_quiet(options,
"burn_in",
0);
-
if(net->policy == STEP){
-
net->step = option_find_int(options,
"step",
1);
-
net->scale = option_find_float(options,
"scale",
1);
-
}
else
if (net->policy == STEPS){
-
char *l = option_find(options,
"steps");
-
char *p = option_find(options,
"scales");
-
if(!l || !p) error(
"STEPS policy must have steps and scales in cfg file");
-
-
int len =
strlen(l);
-
int n =
1;
-
int i;
-
for(i =
0; i < len; ++i){
-
if (l[i] ==
',') ++n;
-
}
-
int *steps =
calloc(n,
sizeof(
int));
-
float *scales =
calloc(n,
sizeof(
float));
-
for(i =
0; i < n; ++i){
-
int step = atoi(l);
-
float scale = atof(p);
-
l =
strchr(l,
',')+
1;
-
p =
strchr(p,
',')+
1;
-
steps[i] = step;
-
scales[i] = scale;
-
}
-
net->scales = scales;
-
net->steps = steps;
-
net->num_steps = n;
-
}
else
if (net->policy == EXP){
-
net->gamma = option_find_float(options,
"gamma",
1);
-
}
else
if (net->policy == SIG){
-
net->gamma = option_find_float(options,
"gamma",
1);
-
net->step = option_find_int(options,
"step",
1);
-
}
else
if (net->policy == POLY || net->policy == RANDOM){
-
net->power = option_find_float(options,
"power",
1);
-
}
get_policy函数
-
if (
strcmp(s,
"random")==
0)
return RANDOM;
-
if (
strcmp(s,
"poly")==
0)
return POLY;
-
if (
strcmp(s,
"constant")==
0)
return CONSTANT;
-
if (
strcmp(s,
"step")==
0)
return STEP;
-
if (
strcmp(s,
"exp")==
0)
return EXP;
-
if (
strcmp(s,
"sigmoid")==
0)
return SIG;
-
if (
strcmp(s,
"steps")==
0)
return STEPS;
-
fprintf(
stderr,
"Couldn't find policy %s, going with constant\n", s);
-
return CONSTANT;
学习率动态调整的策略有多种,YOLO默认使用的是steps。
yolo-voc.cfg文件:
steps=100,25000,35000
scales=10,.1,.1
Network.c文件get_current_rate函数
-
int batch_num = get_current_batch(net);
-
int i;
-
float rate;
-
switch (net.policy) {
-
case CONSTANT:
-
return net.learning_rate;
-
case STEP:
-
return net.learning_rate *
pow(net.scale, batch_num/net.step);
-
case STEPS:
-
rate = net.learning_rate;
-
for(i =
0; i < net.num_steps; ++i){
-
if(net.steps[i] > batch_num)
return rate;
-
rate *= net.scales[i];
-
//if(net.steps[i] > batch_num - 1 && net.scales[i] > 1) reset_momentum(net);
-
}
-
return rate;
get_current_batch获取的是(*net.seen)/(net.batch*net.subdivisions),即真正的batch。
steps的每个阶段是根据batch_num划分的,根据配置文件,学习率会在batch_num达到100、25000、35000时发生改变。
当前的学习率是初始学习率与当前阶段及之前所有阶段对应的scale的总乘积。
convolutional超参数加载
Parser.c文件parse_network_cfg函数
-
LAYER_TYPE
lt = string_to_layer_type(
s->type);
-
if(
lt == CONVOLUTIONAL){
-
l = parse_convolutional(options, params);
parse_convolutional函数
-
int n = option_find_int(options,
"filters",
1);
-
int size = option_find_int(options,
"size",
1);
-
int stride = option_find_int(options,
"stride",
1);
-
int pad = option_find_int_quiet(options,
"pad",
0);
-
int padding = option_find_int_quiet(options,
"padding",
0);
-
if(pad) padding = size/
2;
-
-
char *activation_s = option_find_str(options,
"activation",
"logistic");
-
ACTIVATION activation = get_activation(activation_s);
-
-
int batch,h,w,c;
-
h =
params.h;
-
w =
params.w;
-
c =
params.c;
-
batch=
params.batch;
-
if(!(h && w && c)) error(
"Layer before convolutional layer must output image.");
-
int batch_normalize = option_find_int_quiet(options,
"batch_normalize",
0);
需要注意的是如果enable了pad,cfg文件中的padding不会生效,实际的padding值为size/2。