习惯用caffe做东西,开源数据集格式不统一。
今儿写了一个将WIDER人脸数据格式转换为VOC格式的工具
#!/usr/bin/env python2 # -*- coding: utf-8 -*- """ Created on Wed May 16 14:28:19 2018 @author: hans """ import os import cv2 from lxml.etree import Element, SubElement, tostring, ElementTree def saveINFO(img_info,box_info,face_num): node_root = Element('annotation') node_folder = SubElement(node_root, 'folder') node_folder.text = img_info[0] node_filename = SubElement(node_root, 'filename') node_filename.text = img_info[1] node_size = SubElement(node_root, 'size') node_width = SubElement(node_size, 'width') node_width.text = str(img_info[3]) node_height = SubElement(node_size, 'height') node_height.text = str(img_info[2]) node_depth = SubElement(node_size, 'depth') node_depth.text = str(img_info[4]) for i in range(face_num): node_object = SubElement(node_root, 'object') node_name = SubElement(node_object, 'name') node_name.text = 'face' node_bndbox = SubElement(node_object, 'bndbox') node_xmin = SubElement(node_bndbox, 'xmin') node_xmin.text = str(box_info[i][0]) node_ymin = SubElement(node_bndbox, 'ymin') node_ymin.text = str(box_info[i][1]) node_xmax = SubElement(node_bndbox, 'xmax') node_xmax.text = str(box_info[i][2]) node_ymax = SubElement(node_bndbox, 'ymax') node_ymax.text = str(box_info[i][3]) # xml = tostring(node_root, pretty_print=True) #格式化显示,该换行的换行 if not os.path.exists(xml_root_path+img_info[0]+'/'): os.makedirs(xml_root_path+img_info[0]+'/') xml_path = xml_root_path+img_info[0]+'/'+img_info[1]+'.xml' doc = ElementTree(node_root) doc.write(open(xml_path, "w"), pretty_print=True) print xml_path if __name__=='__main__': # mode = 'val' mode = 'train' txt_name = 'wider_face_'+mode+'_bbx_gt.txt' img_root_path = 'WIDER/WIDER_'+mode+'/images/' txt_root_path = 'WIDER/wider_face_split/' xml_root_path = 'WIDER/WIDER_'+mode+'/anno/' f = open(txt_root_path+txt_name,'r') line = f.readline().split('\n')[0] while line: if '--' in line: img_info = [] box_info = [] img_info.append(line.split('/')[0]) # folder img_info.append(line.split('/')[1].split('.')[0]) # name img_path = img_root_path+line img = cv2.imread(img_path) img_info.append(img.shape[0]) # height img_info.append(img.shape[1]) # weight img_info.append(img.shape[2]) # channel face_num = f.readline().split('\n')[0] for i in range(int(face_num)): box_line = f.readline().split('\n')[0] xmin = int(box_line.split(' ')[0]) ymin = int(box_line.split(' ')[1]) xmax = xmin+int(box_line.split(' ')[2]) ymax = ymin+int(box_line.split(' ')[3]) box_info.append([]) box_info[i].append(xmin) box_info[i].append(ymin) box_info[i].append(xmax) box_info[i].append(ymax) saveINFO(img_info,box_info,int(face_num)) # cv2.rectangle(img,(xmin,ymin),(xmax,ymax),(0,255,0),1) # cv2.namedWindow("test", cv2.WINDOW_NORMAL) # cv2.imshow("test",img) # cv2.waitKey() # print 'asdasdasdasd' line = f.readline().split('\n')[0] f.close() print 'Done!'