大分辨率数据集切割

前言：对于航拍、遥感影像数据集而言，此类数据集包含较多目标，且目标相对较小，直接进行目标检测往往效果不佳，此时对大分辨率图像做切割就是一个不错选择，也扩充了数据量。

~~~本文的切割思路是先将xml文件转换为dota格式的txt文件，然后利用dota切割方法对图像进行切割，最后再将切割后的文件转换为yolo格式的txt文件~~~

1、xml文件转换为dota格式的txt文件

#  VOC标注格式为左上角与右下角坐标
#  这个文件直接将标注框左上角与右下角坐标转换为四个角的坐标（顺时针）
import xml.etree.ElementTree as ET
import pickle
import os
import os.path
from os import listdir, getcwd
from os.path import join
sets = ['train', 'test', 'val']
classes = ['num0', 'num1', 'num2', 'num3', 'num4', 'num5', 'num6', 'num7', 'num8', 'num9']


def convert_annotation(image_id):
    if os.path.isfile('datasets/mydata/Annotations/%s.xml' % (image_id)):
        in_file = open('datasets/mydata/Annotations/%s.xml' % (image_id))
        out_file = open('datasets/mydata/labels/%s.txt' % (image_id), 'w')
        tree = ET.parse(in_file)
        root = tree.getroot()
        size = root.find('size')
        w = int(size.find('width').text)
        h = int(size.find('height').text)
        for obj in root.iter('object'):
            difficult = obj.find('difficult').text
            cls = obj.find('name').text
            if cls not in classes or int(difficult) == 1:
                continue
            # cls_id = classes.index(cls)
            xmlbox = obj.find('bndbox')

            # 注意dota数据格式四个角点的坐标是顺时针分布的
            x0 = int(float(xmlbox.find('xmin').text))
            y0 = int(float(xmlbox.find('ymin').text))
            x1 = int(float(xmlbox.find('xmax').text))
            y1 = int(float(xmlbox.find('ymin').text))
            x2 = int(float(xmlbox.find('xmax').text))
            y2 = int(float(xmlbox.find('ymax').text))
            x3 = int(float(xmlbox.find('xmin').text))
            y3 = int(float(xmlbox.find('ymax').text))
            out_file.write("{} {} {} {} {} {} {} {} {} {}\n".format(x0, y0, x1, y1, x2, y2, x3, y3, cls, difficult))
wd = getcwd()
print(wd)
for image_set in sets:
    if not os.path.exists('datasets/mydata/labels/'):
        os.makedirs('datasets/mydata/labels/')
    image_ids = open('datasets/mydata/%s.txt' % (image_set)).read().strip().split()
    list_file = open('datasets/mydata/%s.txt' % (image_set), 'w')
    for image_id in image_ids:
        list_file.write('datasets/mydata/images/%s.jpg\n' % (image_id))
        convert_annotation(image_id)
    list_file.close()


#数据存放格式
#|——根目录
#    |——datasets
#        |——dota_images
#        |——dota_labels
#        |——yolo_labels
#        |——mydata
#            |——Annotations
#            |——images
#            |——labels
#               test.txt 
#               train.txt 
#               val.txt

2、图像切割

import cv2
import os

#  图像宽不足裁剪宽度,填充至裁剪宽度
def fill_right(img, size_w):
    size = img.shape
    #  填充值为数据集均值
    img_fill_right = cv2.copyMakeBorder(img, 0, 0, 0, size_w - size[1],
                                        cv2.BORDER_CONSTANT, value = (107, 113, 115))
    return img_fill_right

#  图像高不足裁剪高度,填充至裁剪高度
def fill_bottom(img, size_h):
    size = img.shape
    img_fill_bottom = cv2.copyMakeBorder(img, 0, size_h - size[0], 0, 0,
                                         cv2.BORDER_CONSTANT, value = (107, 113, 115))
    return img_fill_bottom

#  图像宽高不足裁剪宽高度,填充至裁剪宽高度
def fill_right_bottom(img, size_w, size_h):
    size = img.shape
    img_fill_right_bottom = cv2.copyMakeBorder(img, 0, size_h - size[0], 0, size_w - size[1],
                                               cv2.BORDER_CONSTANT, value = (107, 113, 115))
    return img_fill_right_bottom

#  图像切割
#  img_floder 图像文件夹
#  out_img_floder 图像切割输出文件夹
#  size_w 切割图像宽
#  size_h 切割图像高
#  step 切割步长
#  不管输入图片是什么格式，切割出来的图片都为.bmp格式（可自行设置）
def image_split(img_floder, out_img_floder, size_w, size_h, step):
    img_list = os.listdir(img_floder)
    # print(img_list)
    count = 0
    for img_name in img_list:
        number = 0
        #  去除.png后缀
        name = img_name[:-4]
        img = cv2.imread(img_floder + "\\" + img_name)
        size = img.shape
        #  若图像宽高大于切割宽高
        if size[0] >= size_h and size[1] >= size_w:
           count = count + 1
           for h in range(0, size[0] - 1, step):
               start_h = h
               for w in range(0, size[1] - 1, step):
                   start_w = w
                   end_h = start_h + size_h
                   if end_h > size[0]:
                      start_h = size[0] - size_h
                      end_h = start_h + size_h
                   end_w = start_w + size_w
                   if end_w > size[1]:
                      start_w = size[1] - size_w
                   end_w = start_w + size_w
                   cropped = img[start_h : end_h, start_w : end_w]
                   #  用起始坐标来命名切割得到的图像，为的是方便后续标签数据抓取
                   name_img = name + '_'+ str(start_h) +'_' + str(start_w)
                   cv2.imwrite('{}/{}.bmp'.format(out_img_floder, name_img), cropped)
                   number = number + 1
        #  若图像高大于切割高,但宽小于切割宽
        elif size[0] >= size_h and size[1] < size_w:
            print('图片{}需要在右面补齐'.format(name))
            count = count + 1
            img0 = fill_right(img, size_w)
            for h in range(0, size[0] - 1, step):
               start_h = h
               start_w = 0
               end_h = start_h + size_h
               if end_h > size[0]:
                  start_h = size[0] - size_h
                  end_h = start_h + size_h
               end_w = start_w + size_w
               cropped = img0[start_h : end_h, start_w : end_w]
               name_img = name + '_' + str(start_h) + '_' + str(start_w)
               cv2.imwrite('{}/{}.bmp'.format(out_img_floder, name_img), cropped)
               number = number + 1
        #  若图像宽大于切割宽,但高小于切割高
        elif size[0] < size_h and size[1] >= size_w:
            count = count + 1
            print('图片{}需要在下面补齐'.format(name))
            img0 = fill_bottom(img, size_h)
            for w in range(0, size[1] - 1, step):
               start_h = 0
               start_w = w
               end_w = start_w + size_w
               if end_w > size[1]:
                  start_w = size[1] - size_w
                  end_w = start_w + size_w
               end_h = start_h + size_h
               cropped = img0[start_h : end_h, start_w : end_w]
               name_img = name + '_'+ str(start_h) +'_' + str(start_w)
               cv2.imwrite('{}/{}.bmp'.format(out_img_floder, name_img), cropped)
               number = number + 1
        #  若图像宽高小于切割宽高
        elif size[0] < size_h and size[1] < size_w:
            count = count + 1
            print('图片{}需要在下面和右面补齐'.format(name))
            img0 = fill_right_bottom(img,  size_w, size_h)
            cropped = img0[0 : size_h, 0 : size_w]
            name_img = name + '_'+ '0' +'_' + '0'
            cv2.imwrite('{}/{}.bmp'.format(out_img_floder, name_img), cropped)
            number = number + 1
        print('{}.bmp切割成{}张.'.format(name,number))
    print('共完成{}张图片'.format(count))

#  txt切割
#  out_img_floder 图像切割输出文件夹
#  txt_floder txt文件夹
#  out_txt_floder txt切割输出文件夹
#  size_w 切割图像宽
#  size_h 切割图像高
def txt_split(out_img_floder, txt_floder, out_txt_floder, size_h, size_w):
    img_list = os.listdir(out_img_floder)
    for img_name in img_list:
        #  去除.png后缀
        name = img_name[:-4]
        #  得到原图像(也即txt)索引 + 切割高 + 切割宽
        name_list = name.split('_')
        # 易报错之处，此处需要跟据图片的具体名称来改
        txt_name = name_list[0] + '_' + name_list[1]
        h = int(name_list[2])
        w = int(name_list[3])
        # txt_name = name_list[0]
        # h = int(name_list[1])
        # w = int(name_list[2])
        txtpath = txt_floder + "\\" + txt_name + '.txt'
        out_txt_path = out_txt_floder + "\\" + name + '.txt'
        f = open(out_txt_path, 'a')
        #  打开txt文件
        with open(txtpath, 'r') as f_in:
             lines = f_in.readlines()
             #  逐行读取
             for line in lines:
                     splitline = line.split(' ')
                     label = splitline[8]
                     difficult = splitline[9]
                     x1 = int(float(splitline[0]))
                     y1 = int(float(splitline[1]))
                     x2 = int(float(splitline[2]))
                     y2 = int(float(splitline[3]))
                     x3 = int(float(splitline[4]))
                     y3 = int(float(splitline[5]))
                     x4 = int(float(splitline[6]))
                     y4 = int(float(splitline[7]))
                     if w <= x1 <= w + size_w and w <= x2 <= w + size_w and \
                     w <= x3 <= w + size_w and w <= x4 <= w + size_w and \
                     h <= y1 <= h + size_h and h <= y2 <= h + size_h and \
                     h <= y3 <= h + size_h and h <= y4 <= h + size_h:
                         f.write('{} {} {} {} {} {} {} {} {} {}'.format(int(x1 - w),
                                 int(y1 - h), int(x2 - w), int(y2 - h), int(x3 - w),
                                 int(y3 - h), int(x4 - w), int(y4 - h),
                                 label, difficult))
        f.close()
        # print('{}.txt切割完成.'.format(name))
    print('txt切割完成')

#  图像数据集文件夹
img_floder = r'D:\cnn\yolov5-6.10\datasets\mydata\images'
#  切割得到的图像数据集存放文件夹
out_img_floder = r'D:\cnn\yolov5-6.10\datasets\dota_images'
#  txt数据集文件夹
txt_floder = r'D:\cnn\yolov5-6.10\datasets\mydata\labels'
#  切割后数据集的标签文件存放文件夹
out_txt_floder = r'D:\cnn\yolov5-6.10\datasets\dota_labels'
#  此为切割出来的图像宽
size_w = 640
#  此为切割出来的图像高
size_h = 640
#  切割步长,重叠度为size_w - step
step = 540

image_split(img_floder, out_img_floder, size_w, size_h, step)
txt_split(out_img_floder, txt_floder, out_txt_floder, size_h, size_w)

# 清除未包含标注框的图片和txt文件
txt_lists = os.listdir(out_txt_floder) # 读取输出的txt文件
j = 0
for txt_list in txt_lists:
    # print(txt_list)
    name = txt_list[:-4]  # 去除.txt后缀
    # print(name)
    data = open(r'D:\cnn\yolov5-6.10\datasets\dota_labels\{}'.format(txt_list)).read()
    if len(data) == 0:
        j = j + 1
        # print("{}文件为空！".format(txt_list))
        # os.remove('D:\cnn\yolov5-6.10\datasets\dota_labels')
        path1 = r'D:\cnn\yolov5-6.10\datasets\dota_labels\{}'.format(txt_list)
        os.remove(path1) # 删除空的txt文件
        path2 = r'D:\cnn\yolov5-6.10\datasets\dota_images\{}'.format(name + '.bmp')
        os.remove(path2) # 删除对应的没有标注框的图片
print('共{}个txt文件为空'.format(j))


#数据存放格式
#|——根目录
#    |——datasets
#        |——dota_images
#        |——dota_labels
#        |——yolo_labels
#        |——mydata
#            |——Annotations
#            |——images
#            |——labels
#               test.txt 
#               train.txt 
#               val.txt

运行完后会在dota_images和dota_labels文件夹中生成切割后的图片和txt文件。

3、将dota格式的txt文件转换为yolo格式的txt文件

import os

classes = ['num0', 'num1', 'num2', 'num3', 'num4', 'num5', 'num6', 'num7', 'num8', 'num9']
path = "D:\cnn\yolov5-6.10\datasets\dota_labels"  # dota格式txt文件所在目录
yolo_label = 'D:\cnn\yolov5-6.10\datasets\yolo_labels' # yolo格式txt文件保存目录

label_lists = os.listdir(path)  # 得到文件夹下的所有文件名称
for label_list in label_lists:  # 遍历文件夹
    out_txt_path = yolo_label + "\\" + label_list # 构造输出绝对路径，label_list为txt文件名称，即保证文件命名不变
    position = path + '\\' + label_list  # 构造绝对路径，"\\"，其中一个'\'为转义符
    # print(position)
    with open(position, 'r') as f:  # 打开文件
        lines = f.readlines()  # 逐行读取文件
        file = open(out_txt_path, 'w') # 写入
        nums = len(lines) # 总共有多少行
        for i in range(nums): # 遍历每一行
            values = lines[i]
            value = values.split(' ')
            # 因为dota格式四个角为顺时针，转为yolo格式需要取出其左上角和右下角
            xmin = float(value[0])
            ymin = float(value[1])
            xmax = float(value[4])
            ymax = float(value[5])

            x = (xmin + xmax) / 2.0  # 标注框中心点x坐标
            y = (ymin + ymax) / 2.0  # 标注框中心点y坐标
            w = xmax - xmin  # 标注框宽
            h = ymax - ymin  # 标注框高
            # 归一化处理
            d = 1. / 640  # 归一化处理，此处切割出来的图片宽高都是640
            x = x * d
            w = w * d
            y = y * d
            h = h * d

            cls = value[8]
            cls_id = classes.index(cls)
            file.write('{} {} {} {} {}\n'.format(cls_id, x, y, w, h))

tips：一定要记得归一化，因为yolo格式是要求归一化后的坐标，格式为cls_id, x, y, w, h！！！

至此，就得到了用于yolo训练的切割数据集！！！

4、将yolo格式的txt文件转换为xml文件，此时可以用labelimg查看切割效果

import os
import glob
from PIL import Image

dota_labels = r'D:\cnn\yolov5-6.10\datasets\dota_labels'  # dota格式下的存放txt标注文件的文件夹
# 这里将图片和xml文件存放在一个文件夹中，方便后续使用labelimg查看
xml_labels = r'D:\cnn\yolov5-6.10\datasets\dota_images'  # 转换后储存xml的文件夹地址
dota_images = r'D:\cnn\yolov5-6.10\datasets\dota_images'  # 存放图片的文件夹

labels = ['num0', 'num1', 'num2', 'num3', 'num4', 'num5', 'num6', 'num7', 'num8', 'num9']
src_img_dir = dota_images
src_txt_dir = dota_labels
src_xml_dir = xml_labels  # 转换后储存xml的文件夹地址

img_lists = glob.glob(src_img_dir + '/*.bmp')
img_basenames = []
for item in img_lists:
    img_basenames.append(os.path.basename(item))  # os.path.basename返回path最后的文件名

img_names = []
for item in img_basenames:
    temp1, temp2 = os.path.splitext(item)  # os.path.splitext(“文件路径”)  分离文件名与扩展名
    img_names.append(temp1)

total_num = len(img_names)  # 统计当前总共要转换的图片标注数量
count = 0  # 技术变量
for img in img_names:  # 这里的img是不加后缀的图片名称
    count += 1
    if count % 1000 == 0:
        print("当前转换进度{}/{}".format(count, total_num))
    im = Image.open((src_img_dir + '/' + img + '.bmp'))
    width, height = im.size

    # 打开yolo格式下的txt文件
    gt = open(src_txt_dir + '/' + img + '.txt').read().splitlines()
    if gt:
        # 将主干部分写入xml文件中
        xml_file = open((src_xml_dir + '/' + img + '.xml'), 'w')
        xml_file.write('<annotation>\n')
        xml_file.write('    <folder>VOC2007</folder>\n')
        xml_file.write('    <filename>' + str(img) + '.bmp' + '</filename>\n')
        xml_file.write('    <size>\n')
        xml_file.write('        <width>' + str(width) + '</width>\n')
        xml_file.write('        <height>' + str(height) + '</height>\n')
        xml_file.write('        <depth>3</depth>\n')
        xml_file.write('    </size>\n')

        # write the region of image on xml file
        for img_each_label in gt:
            spt = img_each_label.split(' ')  # 这里如果txt里面是以逗号‘，’隔开的，那么就改为spt = img_each_label.split(',')。
            xml_file.write('    <object>\n')
            xml_file.write('        <name>' + spt[8] + '</name>\n')
            xml_file.write('        <pose>Unspecified</pose>\n')
            xml_file.write('        <truncated>0</truncated>\n')
            xml_file.write('        <difficult>0</difficult>\n')
            xml_file.write('        <bndbox>\n')

            xmin = str(float(spt[0]))
            ymin = str(float(spt[1]))
            xmax = str(float(spt[4]))
            ymax = str(float(spt[5]))

            xml_file.write('            <xmin>' + xmin + '</xmin>\n')
            xml_file.write('            <ymin>' + ymin + '</ymin>\n')
            xml_file.write('            <xmax>' + xmax + '</xmax>\n')
            xml_file.write('            <ymax>' + ymax + '</ymax>\n')
            xml_file.write('        </bndbox>\n')
            xml_file.write('    </object>\n')

        xml_file.write('</annotation>')
    else:
        # 将主干部分写入xml文件中
        xml_file = open((src_xml_dir + '/' + img + '.xml'), 'w')
        xml_file.write('<annotation>\n')
        xml_file.write('    <folder>VOC2007</folder>\n')
        xml_file.write('    <filename>' + str(img) + '.bmp' + '</filename>\n')
        xml_file.write('    <size>\n')
        xml_file.write('        <width>' + str(width) + '</width>\n')
        xml_file.write('        <height>' + str(height) + '</height>\n')
        xml_file.write('        <depth>3</depth>\n')
        xml_file.write('    </size>\n')
        xml_file.write('</annotation>')

使用labelimg打开dota_images文件夹就可以查看切割后的效果啦！！！

大分辨率数据集切割

1、xml文件转换为dota格式的txt文件

2、图像切割

3、将dota格式的txt文件转换为yolo格式的txt文件

4、将yolo格式的txt文件转换为xml文件，此时可以用labelimg查看切割效果

猜你喜欢