detector .c文件,这里仅分析train_detector
void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear)
{
list *options = read_data_cfg(datacfg);
char *train_images = option_find_str(options, "train", "data/train.list");
char *backup_directory = option_find_str(options, "backup", "/backup/");
/*srand函数是随机数发生器的初始化函数。srand和rand()配合使用产生伪随机数序列。
rand函数在产生随机数前,需要系统提供的生成伪随机数序列的种子,rand根据这个种子的值产生一系列随机数。
如果系统提供的种子没有变化,每次调用rand函数生成的伪随机数序列都是一样的。*/
srand(time(0));
/*第三个参数是:`cfg/yolo.train.cfg`,`basecfg()`这个函数把`cfg/yolo.train.cfg`
变成了`yolo0train.cfg`,然后用base指针指向`yolo0train.cfg`*/
char *base = basecfg(cfgfile);
printf("%s\n", base); //打印"yolo"字样
float avg_loss = -1;
network *nets = calloc(ngpus, sizeof(network));
srand(time(0));
int seed = rand();
int i;
for(i = 0; i < ngpus; ++i)
{
srand(seed);
#ifdef GPU
cuda_set_device(gpus[i]);
#endif
nets[i] = parse_network_cfg(cfgfile);//解析网络构架,下面会仔细分析该函数
if(weightfile)
{
load_weights(&nets[i], weightfile);//加载预训练参数,下面会仔细分析该函数
}
if(clear) *nets[i].seen = 0;
nets[i].learning_rate *= ngpus;
}
srand(time(0));
network net = nets[0];
/*imgs是一次加载到内存的图像数量,如果占内存太大的话可以把subdivisions调大或者batch调小一点 */
int imgs = net.batch * net.subdivisions * ngpus;
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
data train, buffer;
layer l = net.layers[net.n - 1];
int classes = l.classes;
float jitter = l.jitter; //jitter是什么意思呢?可以参考这篇博客:[非均衡数据集处理:利用抖动(jittering)生成额外数据]
list *plist = get_paths(train_images);
//int N = plist->size;
char **paths = (char **)list_to_array(plist);
load_args args = {0};
args.w = net.w;
args.h = net.h;
args.paths = paths;
args.n = imgs; //n就是一次加载到内存中的图片数量
args.m = plist->size;//m是待训练图片的总数量
args.classes = classes;
args.jitter = jitter;
args.num_boxes = l.max_boxes;
args.d = &buffer;
args.type = DETECTION_DATA;
args.threads = 8;
//调节图片旋转角度、曝光度、饱和度、色调等,来增加图片数量
args.angle = net.angle;
args.exposure = net.exposure;
args.saturation = net.saturation;
args.hue = net.hue;
pthread_t load_thread = load_data(args);
clock_t time;
int count = 0;
//while(i*imgs < N*120){
while(get_current_batch(net) < net.max_batches)
{
//进行10次迭代后,调整一次网络大小
if(l.random && count++%10 == 0)
{
printf("Resizing\n");
int dim = (rand() % 10 + 10) * 32;//dim为320,352,384,416。。。
if (get_current_batch(net)+100 > net.max_batches)
dim = 544;
//int dim = (rand() % 4 + 16) * 32;
printf("%d\n", dim);
//网络输入图片的宽高可调节,dim最小为320,最大为618,这样可以更好使用多尺度的目标
args.w = dim;
args.h = dim;
pthread_join(load_thread, 0);
train = buffer;
free_data(train);
load_thread = load_data(args);
for(i = 0; i < ngpus; ++i){
resize_network(nets + i, dim, dim);
}
net = nets[0];
}
time=clock();
pthread_join(load_thread, 0);
train = buffer;
load_thread = load_data(args);
printf("Loaded: %lf seconds\n", sec(clock()-time));
time=clock();
float loss = 0;
#ifdef GPU
if(ngpus == 1)
{
loss = train_network(net, train);
}
else
{
loss = train_networks(nets, ngpus, train, 4);//开始训练
}
#else
loss = train_network(net, train); //开始训练
#endif
if (avg_loss < 0) avg_loss = loss;
avg_loss = avg_loss*.9 + loss*.1;
i = get_current_batch(net);
printf("%d: %f, %f avg, %f rate, %lf seconds, %d images\n", get_current_batch(net), loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs);
//每100次或者1000次保存一次权重
if(i%1000==0 || (i < 1000 && i%100 == 0))
{
#ifdef GPU
if(ngpus != 1) sync_nets(nets, ngpus, 0);
#endif
char buff[256];
sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i);
save_weights(net, buff);
}
free_data(train);
}
#ifdef GPU
if(ngpus != 1) sync_nets(nets, ngpus, 0);
#endif
char buff[256];
sprintf(buff, "%s/%s_final.weights", backup_directory, base);
save_weights(net, buff);
}
注意
- resize网络是yolo v2版本新加的功能。即每进行10次迭代就会resize一次网络输入图片的宽和高,这样保证了网络可以试音各种不同尺度的目标,这样以来,即使没有dropout层,训练出来的网络也不会过拟合。
- 在imgs = net.batch * net.subdivisions * ngpus中,net.batch并不是cfg文件中的batch值,而是cfg文件中的batch值除以net.subdivisions,这样以来,一次加载imgs张图片到内存,while循环中每次count,就会处理完这些图片,完成一次迭代。比如,cfg文件中的batch为64,subdivisions为16,对应在计算imgs时,net.batch=64/16=4, net.subdivisions =16,因此imgs=64。为什么net.batch并不对应cfg文件中的batch值,请看3。
net在初始化时调用了parse_network_cfg函数,该函数调用parse_net_options,该函数修改了net->batch的值。
net->batch = option_find_int(options, “batch”,1)
int subdivs = option_find_int(options, “subdivisions”,1)
net->batch /= subdivs
net->subdivisions = subdivs