版权声明:转载请说明来源,谢谢 https://blog.csdn.net/wsp_1138886114/article/details/84029488
通过 excel 获取数据集
数据集放在一个总文件夹中,excel中对图像标识做了记录,我们需要通过读取excel中的图像文件名来获取相应的图像。
实例文件结构如下:
源目录
├── 二级目录1
│ ├──example_01.jpg
│ └──example_02.jpg
├── 二级目录2
│ ├──example_03.jpg
│ ├──example_04.jpg
│ └──example_05.jpg
目标目录
excel 文件:img_list.xlsx
# -*- coding: utf-8 -*-
import xlrd
import os
import shutil
def read_excel(excel_path):
workbook = xlrd.open_workbook(excel_path)
sheet = workbook.sheet_by_index(0)
nrows = sheet.nrows
list1 = []
for i in range(nrows):
list0 = str(sheet.row_values(i)[0])
list1.append(list0[-14:])
return list1
def file_and_forder(original_path):
folder_filename_list = []
for root_dir,dir_name,filenames in os.walk(original_path):
for filename in filenames:
folder_filename_list.append(root_dir+"/"+filename)
return folder_filename_list
def copy_img_move(original_path,Target_path,excel_path):
list1 = read_excel(excel_path)
folder_filename_list = file_and_forder(original_path)
for filename_single in folder_filename_list:
print("filename_single",filename_single)
if filename_single[-14:] in list1:
shutil.copy(filename_single,Target_path)
print("处理完成!")
if __name__ == '__main__':
copy_img_move("./源目录","./目标目录","img_list.xlsx")
通过 json 获取数据集
import json
import shutil
import os
from glob import glob
from tqdm import tqdm
# 该json文件中种类,先生成59个文件夹,用于放置图像文件
try:
for i in range(0,59):
os.mkdir("./data/train/" + str(i))
except:
pass
file_train = json.load(open("./data/labels/train_annotations.json","r",encoding="utf-8"))
file_val = json.load(open("./data/labels/validation_annotations.json","r",encoding="utf-8"))
file_list = file_train + file_val
for file in tqdm(file_list):
filename = file["image_id"]
origin_path = "./data/images/" + filename
ids = file["disease_class"]
if ids == 44:
continue
if ids == 45:
continue
if ids > 45:
ids = ids -2
save_path = "./data/train/" + str(ids) + "/"
shutil.copy(origin_path,save_path)