用的时候看注释即可
以下代码作用
- 同级目录下创建datasets 包含 训练集、验证集
- 随机将 train_rate80%数据分为训练集、100%-train_rate20%分为验证集 并复制到对应文件夹
import numpy as np
import os
from shutil import copy2
import random
# ---------配置文件 需要确定
orgin_path='..\\'
orgin_image_path = orgin_path+'images'
orgin_label_path = orgin_path+'YOLO'
train_rate = 0.8
support_suffix = ['.tiff', '.png', '.jpg']
# ---------配置文件
path = r'datasets' # dataset root dir
train = r'images/train' # train images (relative to 'path')
val = r'images/val' # val images (relative to 'path')
train_label = r'labels/train' # train images (relative to 'path')
val_label = r'labels/val' # val images (relative to 'path')
# 以下代码作用
# 1. 同级目录下创建datasets 包含 训练集、验证集
# 2. 随机将 train_rate*80%数据分为训练集、100%-train_rate*20%分为验证集 并复制到对应文件夹
train_path = f'{
path}/{
train}'
val_path = f'{
path}/{
val}'
train_label_path = f'{
path}/{
train_label}'
val_label_path = f'{
path}/{
val_label}'
def check_file(path_data):
if not os.path.exists(path_data):
os.makedirs(path_data)
check_file(train_path)
check_file(val_path)
check_file(train_label_path)
check_file(val_label_path)
all_data = os.listdir(orgin_image_path) # (图片文件夹)
all_data_img = []
for i in all_data:
suffix_str = '.' + i.split('.')[-1:][0]
if suffix_str in support_suffix:
all_data_img.append(i)
num_all_data = len(all_data_img)
print("分离图片总数: " + str(num_all_data))
# 随机下标
index_list = list(range(num_all_data))
print(index_list)
random.shuffle(index_list)
print(index_list)
num = 0
for i in index_list:
fileName = os.path.join(orgin_image_path, all_data_img[i])
# 获取后缀名称 包括 .
suffix_str = '.' + all_data_img[i].split('.')[-1:][0]
yolo_label = all_data_img[i][:-len(suffix_str)] + ".txt"
yolo_label_file = os.path.join(orgin_label_path, yolo_label)
if num < num_all_data * train_rate:
copy2(fileName, train_path)
copy2(yolo_label_file, train_label_path)
else:
copy2(fileName, val_path)
copy2(yolo_label_file, val_label_path)
num += 1
print("finished!")