一 提取对应类别的图片和标注文件
import os
import xml.etree.ElementTree as ET
import shutil
# Path
# 源数据集的位置
ann_filepath = '/data/VOCdevkit/VOC2007/Annotations/'
img_filepath = '/data/VOCdevkit/VOC2007/JPEGImages/'
# 新建保存数据集的位置
img_savepath = 'data/VOCdevkit/VOC2007_1/JPEGImages/'
ann_savepath = 'data/VOCdevkit/VOC2007_1/Annotations/'
if not os.path.exists(img_savepath):
os.mkdir(img_savepath)
if not os.path.exists(ann_savepath):
os.mkdir(ann_savepath)
# VOC class information
# 需要的标签
classes = ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle',
'bus', 'car', 'cat', 'chair', 'cow', 'diningtable',
'dog', 'horse', 'motorbike', 'pottedplant',] # The classes needed
# classes = ['sheep', 'sofa', 'train', 'person','tvmonitor']
def save_annotation(file):
tree = ET.parse(ann_filepath + '/' + file)
root = tree.getroot()
result = root.findall("object")
bool_num = 0
for obj in result:
if obj.find("name").text not in classes:
root.remove(obj)
else:
bool_num = 1
if bool_num:
tree.write(ann_savepath + file)
return True
else:
return False
def save_images(file):
name_img = img_filepath + os.path.splitext(file)[0] + ".jpg"
shutil.copy(name_img, img_savepath)
# 图片名称txt保存的位置
with open('list.txt', 'a') as file_txt:
file_txt.write(os.path.splitext(file)[0])
file_txt.write("\n")
return True
if __name__ == '__main__':
for f in os.listdir(ann_filepath):
if save_annotation(f):
save_images(f)
只需修改对应的文件路径和需要的类别名称即可。
二、将txt中保存的图片名分为训练集和测试集
import random
import os
# 设置随机数种子,以保证每次运行结果相同
random.seed(1234)
# 定义训练集和测试集的比例
train_ratio = 0.8
test_ratio = 0.2
# 读取包含图片名的txt文件
with open("list.txt", "r") as f:
lines = f.readlines()
# 随机打乱图片名的顺序
random.shuffle(lines)
# 计算训练集和测试集的数量
num_train = int(train_ratio * len(lines))
num_test = len(lines) - num_train
# 分割训练集和测试集
train_lines = lines[:num_train]
test_lines = lines[num_train:]
# 将训练集和测试集写入txt文件
with open("train.txt", "w") as f:
f.writelines(train_lines)
with open("test.txt", "w") as f:
f.writelines(test_lines)
将代码中txt文件修改为自己真实的路径即可。