import os
import sys
import json
import math
from xml.dom.minidom import Document # 用于构建xml文件
import xml.etree.ElementTree as ET # 用于解析xml文件
from xml.dom import minidom
import pic_xml_resize
# classes_num = input("请输入要训练类别数28或59:")
class XmlMaker: # 构建xml制作类
def __init__(self, share_folder, pic_name, pic_dir, data_base, pic_width, pic_height, pic_depth, pic_segmented, object_name, em_pose, em_truncated,
em_difficult, x_min, x_max, y_min, y_max):
self.xmlfolder = share_folder
self.filename = pic_name
self.xmlpath = pic_dir
self.database = data_base
self.width = pic_width
self.height = pic_height
self.depth = pic_depth
self.segmented = pic_segmented
self.name = object_name
self.pose = em_pose
self.truncated = em_truncated
self.difficult = em_difficult
self.xmin = x_min
self.ymin = y_min
self.xmax = x_max
self.ymax = y_max
def makexml(self):
root = doc.createElement('annotations') # 创建一个根节点Manager对象
folder = doc.createElement('folder')
filename = doc.createElement('filename')
path = doc.createElement('path')
folder.appendChild(doc.createTextNode(str(self.xmlfolder)))
filename.appendChild(doc.createTextNode(str(self.filename)))
path.appendChild(doc.createTextNode(str(self.xmlpath)))
root.appendChild(folder)
root.appendChild(filename)
root.appendChild(path)
database = doc.createElement('database')
source = doc.createElement('source')
database.appendChild(doc.createTextNode(self.database))
root.appendChild(source)
source.appendChild(database)
size = doc.createElement('size')
width = doc.createElement('width')
height = doc.createElement('height')
depth = doc.createElement('depth')
width.appendChild(doc.createTextNode(str(self.width)))
height.appendChild(doc.createTextNode(str(self.height)))
depth.appendChild(doc.createTextNode(str(self.depth)))
size.appendChild(width)
size.appendChild(height)
size.appendChild(depth)
root.appendChild(size)
segment = doc.createElement('segment')
segment.appendChild(doc.createTextNode(str(self.segmented)))
root.appendChild(segment)
file_Na = self.filename.split('.')[0]
with open(file_Na + '.xml', 'w', encoding="utf-8") as f:
root.writexml(f)
# with open(file_Na + '.xml', 'w', encoding="utf-8") as f:
# root.writexml(f, indent='', addindent='\t', newl='\n')
def makexml_object(self):
file_Na2 = self.filename.split('.')[0]
updateTree = ET.parse(file_Na2 + '.xml')
root = updateTree.getroot()
object = ET.SubElement(root, 'object')
name = ET.SubElement(object, 'name')
pose = ET.SubElement(object, 'pose')
truncated = ET.SubElement(object, 'truncated')
difficult = ET.SubElement(object, 'difficult')
bndbox = ET.SubElement(object, 'bndbox')
name.text = (str(self.name))
pose.text = (str(self.pose))
truncated.text = (str(self.truncated))
difficult.text = (str(self.difficult))
xmin = ET.SubElement(bndbox, 'xmin')
ymin = ET.SubElement(bndbox, 'ymin')
xmax = ET.SubElement(bndbox, 'xmax')
ymax = ET.SubElement(bndbox, 'ymax')
xmin.text = str(self.xmin)
ymin.text = str(self.ymin)
xmax.text = str(self.xmax)
ymax.text = str(self.ymax)
rawText = ET.tostring(root)
dom = minidom.parseString(rawText)
# with open(file_Na2 + '.xml', 'w', encoding="utf-8") as f:
# dom.writexml(f)
updateTree.write(file_Na2 + '.xml')
# 美化xml
def beauty_XML(filename, indent="\t", newl="\n", encoding="utf-8"):
beauty_Tree = ET.parse(filename)
root = beauty_Tree.getroot()
rawText = ET.tostring(root)
dom = minidom.parseString(rawText)
with open(filename, 'w') as f:
dom.writexml(f, "", indent, newl, encoding)
# ********begin***************************************
if __name__ == "__main__":
json_data = open('annotations.json').read() # 读入coco格式标记的文件
'''
coco格式数据:annotations里面的annotaitons里的bbox为[左上角x坐标,左上角y坐标,宽,高]
本段代码将coco格式数据转化为pascalvoc格式的数据
'''
data = json.loads(json_data)
images = data['images']
annotations = data['annotations']
doc = Document() # 制作xml文件
# name_record = []
record = []
count = 0
for i in range(len(annotations)):
# *****hahaha, 这一小段是整来耍的
sys.stdout.write('\r' + '/')
sys.stdout.flush()
sys.stdout.write('\r' + '|')
sys.stdout.flush()
sys.stdout.write('\r' + '\\')
sys.stdout.flush()
# print('\r'+'你的输出详情', flush=True)
folder = os.getcwd()
filename = data['images'][annotations[i]['image_id']]['file_name']
path = os.path.join(str(folder), str(filename))
database = 'Unknowm' # 没啥用
width = data['images'][annotations[i]['image_id']]['width']
height = data['images'][annotations[i]['image_id']]['height']
depth = 3 # 此步为偷懒手动直接定的,如有灰度图将不适宜,需要额外代码测量图片三通道
segmented = 0 # 此步为偷懒手动直接定的
# if classes_num == '28' :
# obj_name = data['categories'][annotations[i]['category_id']]['supercategory']
# elif classes_num == '59' :
# obj_name = data['categories'][annotations[i]['category_id']]['name']
data['categories'][annotations[i]['category_id']]['name']
pose = 'Unspecified'
truncated = '0' # 此步为偷懒手动直接定的, 好像annotaitons里面没有。
difficult = '0' # 此步为偷懒手动直接定的, 好像annotaitons里面没有。
# annotaions还有一些未用到的信息没有解析
xmin = annotations[i]['bbox'][0]
xmax = annotations[i]['bbox'][0] + annotations[i]['bbox'][2]
ymin = annotations[i]['bbox'][1]
ymax = annotations[i]['bbox'][1] + annotations[i]['bbox'][3]
# ***********************这里是尺度变换后数据*****************
# **文件与pic_resize.py文件绑定,因为640和448在该文件中定出***
width_original = width
height_original = height
width_changed = pic_xml_resize.width
height_changed = int(height_original * (width_changed / width_original))
xmin = math.ceil((width_changed / width_original) * xmin)
xmax = math.ceil((width_changed / width_original) * xmax)
ymin = math.ceil((height_changed / height_original) * ymin)
ymax = math.ceil((height_changed / height_original) * ymax)
width = width_changed
height = height_changed
xml_data = XmlMaker(folder, filename, path, database, width, height, depth, segmented, obj_name, pose, truncated,
difficult, xmin, xmax, ymin, ymax)
if filename not in record:
xml_data.makexml()
record.append(filename)
# if obj_name not in name_record: # 记录xml中所有分类名称
# count = count + 1
# name_record.append(obj_name)
xml_data.makexml_object()
wd = os.getcwd() # 获取当前文件目录
dir_list = os.listdir(wd)
dir_xml = []
for item in dir_list:
if os.path.isdir(item):
# dir.append(item)
dir_path = os.path.join(wd, item)
dir_list_temp = os.listdir(dir_path)
for item2 in dir_list_temp:
if os.path.isfile(dir_path.replace('\\', '/') + '/' + item2):
try:
if (item2.split('.')[1] == 'xml'):
dir_xml.append(dir_path + '/' + item2) # 此处搜集完所有xml文件路径名称放在dir中
except IndexError:
pass
for item in dir_xml:
beauty_XML(item)
# with open("taco.names", 'w') as f: # 将分类名称数据写入name.txt
# for item in name_record:
# f.write(item)
# f.write('\n')
coco转voc
猜你喜欢
转载自blog.csdn.net/ohhardtoname/article/details/115307022
今日推荐
周排行