针对训练数据xml中获取需要的某一类或几类,主要是这段时间自己处理数据需要用到的一些工具,做下笔记,若能帮助到需要的你,非常高兴!
import os import os.path import shutil #fileDir_ann = "F:\\work\\python_data\\" fileDir_ann = "F:\\work\\python_data\\Annotations\\" //xml路径 fileDir_img = "F:\\work\\python_data\\JPEGImages\\" //image路径 saveDir_img = "F:\\work\\python_data\\JPEGImages_ssd\\" //筛选后的image保存路径 if not os.path.exists(saveDir_img): os.mkdir(saveDir_img) names = locals() for files in os.walk(fileDir_ann): print('hello python! for') print('hello python!') for files in os.walk(fileDir_ann): #print('hello python!') for file in files[2]: print file + "-->start!" saveDir_ann = "F:\\work\\python_data\\Annotations_ssd\\" //筛选后的xml保存路径 if not os.path.exists(saveDir_ann): os.mkdir(saveDir_ann) fp = open(fileDir_ann + '\\' + file) saveDir_ann = saveDir_ann + file fp_w = open(saveDir_ann, 'w') classes = ["PEOPLE","CAR","motorake","baus","cat"]; #必须是涵盖原始xml中所有类别 lines = fp.readlines() ind_start = [] ind_end = [] lines_id_start = lines[:] lines_id_end = lines[:] while "\t<object>\n" in lines_id_start: a = lines_id_start.index("\t<object>\n") ind_start.append(a) lines_id_start[a] = "delete" while "\t</object>\n" in lines_id_end: b = lines_id_end.index("\t</object>\n") ind_end.append(b) lines_id_end[b] = "delete" i = 0 for k in range(0,len(ind_start)): for j in range(0,len(classes)): if classes[j] in lines[ind_start[i]+1]: a = ind_start[i] names['block%d'%k] = [lines[a],lines[a+1],lines[a+2],lines[a+3],lines[a+4],lines[a+5],lines[a+6],lines[a+7],lines[a+8],lines[a+9],lines[a+10],lines[ind_end[i]]] break i += 1 classes1 = '\t\t<name>PEOPLE</name>\n' #classes2 = '\t\t<name>motorake</name>\n' #classes3 = '\t\t<name>baus</name>\n' #classes4 = '\t\t<name>cat</name>\n' print(len(ind_start)) string_start = lines[0:ind_start[0]] string_end = [lines[len(lines)-1]] a = 0 for k in range(0,len(ind_start)): if classes1 in names['block%d'%k]: a += 1 string_start += names['block%d'%k] #if classes2 in names['block%d'%k]: #a += 1 #string_start += names['block%d'%k] #if classes3 in names['block%d'%k]: #a += 1 #string_start += names['block%d'%k] #if classes4 in names['block%d'%k]: #a += 1 #string_start += names['block%d'%k] string_start += string_end for c in range(0,len(string_start)): fp_w.write(string_start[c]) fp_w.close() if a == 0: os.remove(saveDir_ann) else: name_img = fileDir_img + os.path.splitext(file)[0]# + ".png" //根据自己的图片类型进行设置 shutil.copy(name_img,saveDir_img) fp.close()