在目标检测制作数据集时,有时需要检查或修改xml文件中的各种参数,该脚本可实现对xml文件的各种操作
import codecs
import cv2
import shutil
import os
import json
import numpy as np
import random
import xml.etree.ElementTree as ET
path = '' #xml路径
save_path = '' #保存路径
if not os.path.exists(save_path):
os.makedirs(save_path)
info_all = os.listdir(path)
def get_and_check(root, name, length):
vars = root.findall(name)
if len(vars) == 0:
raise NotImplementedError('Can not fing %s in %s.' % (name, root.tag))
if length > 0 and len(vars) != length:
raise NotImplementedError('The size of %s is supposed to be %d, but is %d.' % (name, length, len(vars)))
if length == 1:
vars = vars[0]
return vars
for info in info_all:
'''
根据类别名复制图片
'''
if info.endswith('.jpg'):
info_name = info.split('.')[0]
info_type = info.split('.')[1]
doc = ET.parse(os.path.join(path, info))
root = doc.getroot()
for obj in root.findall('object'):
category = get_and_check(obj, 'name', 1).text
if category == '' #类别
shutil.copyfile(os.path.join(path, info), os.path.join(save_path, info))
shutil.copyfile(os.path.join(path, info_name + '.jpg'), os.path.join(save_path, info_name + '.jpg'))
"""
xml更换图片名称
"""
if info.endswith('.xml'):
doc = ET.parse(os.path.join(save_path, info))
root = doc.getroot()
sub1 = root.find('filename')
name = sub1.text
info_1 = info.replace('xml', 'jpg')
print('xml更换图片名称:', info_1)
sub1.text = info_1
doc.write(os.path.join(save_path, info), encoding='utf-8')
"""
xml更换类别名
"""
if info.endswith('.xml'):
doc = ET.parse(os.path.join(path, info))
root = doc.getroot()
for obj in root.findall('object'):
category = get_and_check(obj, 'name', 1).text
if category == '' #类别
obj.find('name').text = '' #更换的类别
doc.write(os.path.join(path, info), encoding='utf-8')
'''
xml更换path
'''
if info.endswith('.xml'):
info_name = info.split('.')[0]
doc = ET.parse(os.path.join(save_path, info))
root = doc.getroot()
sub2 = root.find('path')
sub2.text = os.path.join(save_path, info_name + '.jpg')
doc.write(os.path.join(save_path, info), encoding='utf-8')
'''
xml删除类别名
'''
if info.endswith('.xml'):
doc = ET.parse(os.path.join(path, info))
root = doc.getroot()
for obj in root.findall('object'):
if category == '' #要删除的类别
root.remove(obj)
doc.write(os.path.join(path, info), encoding='utf-8')
'''
求所有类别
'''
category_list == []:
if info.endswith('.xml'):
doc = ET.parse(os.path.join(path, info))
root = doc.getroot()
for obj in root.findall('object'):
category = get_and_check(obj, 'name', 1).text
if category not in category_list:
category_list.append(category)
print(category_list)
'''
删除空xml及图片
'''
if info.endswith('.xml'):
doc = ET.parse(os.path.join(path, info))
root = doc.getroot()
for obj in root.findall('object'):
category = get_and_check(obj, 'name', 1).text
if category not in category_list:
category_list.append(category)
if category_list == []:
os.remove(os.path.join(path, info))
try:
os.remove(os.path.join(path, info.replace('xml', 'jpg')))
except:
os.remove(os.path.join(path, info.replace('xml', 'png')))