yolo v2物体分类工程的前期图像预处理关键代码解析

1、其物体分类工程的样本增强功能很强大，比caffe的好很多，下面是对训练工程的样本预处理代码进行解析，

其代码入口在data.c文件，其代码如下：

matrix load_image_augment_paths(char **paths, int n, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure)
{
    int i;
    matrix X;
    X.rows = n;
    X.vals = calloc(X.rows, sizeof(float*));
    X.cols = 0;

    for(i = 0; i < n; ++i){
        image im = load_image_color(paths[i], 0, 0);
		//这里进行旋转、裁剪
        image crop = random_augment_image(im, angle, aspect, min, max, size);
		//如果开启了多线程，当有多条线程抢占着个资源时，会报错。 解决方法就是，使用单batch=1即可。       
        show_image(im, "orig");
        show_image(crop, "crop");
        cvWaitKey(0);

		//这里是进行左右翻转，不需要配置文件制定
        int flip = random_gen()%2;
        if (flip) flip_image(crop);
		//这里是进行数据样本的色调、饱和度、曝光度的增强，其值尽可能设置小点。
        random_distort_image(crop, hue, saturation, exposure);
   
        free_image(im);
        X.vals[i] = crop.data;
        X.cols = crop.h*crop.w*crop.c;
    }
    return X;
}

其中random_augment_image()是进行样本增强的函数入口，其代码位置在image.c，代码如下：

image random_augment_image(image im, float angle, float aspect, int low, int high, int size)
{
    aspect = rand_scale(aspect);

	//这里的high的值是low的两倍，这个值或许有点大了，裁剪是单边裁剪。这里是把图像按照网络输入大小进行
	//放大操作，让后在这里面截取输入网络大小的区域，由于r的随意性，则这个函数起到随意裁剪图像的左右。
	int r = rand_int(low, high);
    int min = (im.h < im.w*aspect) ? im.h : im.w*aspect;
	//这里假设样本已经被归一化为w=h大小的样本了。如果要使用w，h不相等的样本，则需要修改代码，分别计算
	//scalew，scaleh大小。
    float scale = (float)r / min;

	//这里是进行仿射变化的角度值
    float rad = rand_uniform(-angle, angle) * TWO_PI / 360.;
	//float rad = 0 * TWO_PI / 360.;

    float dx = (im.w*scale/aspect - size) / 2.;
    float dy = (im.h*scale - size) / 2.;
    if(dx < 0) dx = 0;
    if(dy < 0) dy = 0;
    dx = rand_uniform(-dx, dx);
    dy = rand_uniform(-dy, dy);

	//dx = 0;
	//dy = 0;

	//这个函数进行旋转和单边裁剪。
    image crop = rotate_crop_image(im, rad, scale, size, size, dx, dy, aspect);
    return crop;
}

其中的rotate_crop_image()的代码位置在image.c，其代码如下：

image rotate_crop_image(image im, float rad, float s, int w, int h, float dx, float dy, float aspect)
{
    int x, y, c;
    float cx = im.w/2.;
    float cy = im.h/2.;
    image rot = make_image(w, h, im.c);
    for(c = 0; c < im.c; ++c){
        for(y = 0; y < h; ++y){
            for(x = 0; x < w; ++x){
				//这里假设现有的坐标x，y是旋转后的坐标，需要求出rx，ry是原始的坐标，其值可能是负值，或者大于w，h但是在双线性插值里
				//进行了判断，把其限制在适当的范围，这个就是为什么可以填补空白图像区域的原因，挺好的。
				//(x - w/2.)/s得出来的是样本的图像坐标点，这里只取了放大图像（r边长）中间的w，h大小的区域
				//所以起到随意裁剪图像样本大作用。
                float rx = cos(rad)*((x - w/2.)/s*aspect + dx/s*aspect) - sin(rad)*((y - h/2.)/s + dy/s) + cx;
                float ry = sin(rad)*((x - w/2.)/s*aspect + dx/s*aspect) + cos(rad)*((y - h/2.)/s + dy/s) + cy;
                //根据原始的坐标来进行双线性插值得出其像素值，很妙。
				float val = bilinear_interpolate(im, rx, ry, c);
                set_pixel(rot, x, y, c, val);
            }
        }
    }
    return rot;
}

样本例子展示：

a、这个是进行双线性插值后，填补无像素值的效果

b、使用的是w、h轴都不同比例的结果，并且没有假设图像被放大，而是网络输入等比例：

c、使用1724x724大小的样本，在没有任何裁剪和缩放的情况下，这种不完全情况，这是由于样本的x，y轴都是采用相同的scale比例大小：

yolo v2物体分类工程的前期图像预处理关键代码解析

猜你喜欢