目录
数据集可视化
这是VOC 2007,放在VOC2007文件夹下 ,用opencv-python处理的,显示进度
import os
import cv2
import xml.dom.minidom
from tqdm import tqdm
image_path="./JPEGImages/"
annotation_path="./Annotations/"
files_name = os.listdir(image_path)
for filename_ in tqdm(files_name):
filename, extension= os.path.splitext(filename_)
#filename=filename.split('__')[0]
img_path =image_path+filename+'.jpg'
xml_path =annotation_path+filename+'.xml'
img = cv2.imread(img_path)
if img is None:
pass
try:
dom = xml.dom.minidom.parse(xml_path)
except:
#os.remove(img_path)
continue
root = dom.documentElement
objects=dom.getElementsByTagName("object")
for object in objects:
bndbox = object.getElementsByTagName('bndbox')[0]
xmin = bndbox.getElementsByTagName('xmin')[0]
ymin = bndbox.getElementsByTagName('ymin')[0]
xmax = bndbox.getElementsByTagName('xmax')[0]
ymax = bndbox.getElementsByTagName('ymax')[0]
xmin_data=int(xmin.childNodes[0].data)
ymin_data=int(ymin.childNodes[0].data)
xmax_data=int(xmax.childNodes[0].data)
ymax_data=int(ymax.childNodes[0].data)
label_name=object.getElementsByTagName('name')[0].childNodes[0].data
cv2.rectangle(img,(xmin_data,ymin_data),(xmax_data,ymax_data),(55,255,155),1)
cv2.putText(img,label_name,(int((xmin_data+xmax_data/2)),int((ymin_data+ymax_data)/2)),cv2.FONT_HERSHEY_SIMPLEX,2,(0,255,0),3)
flag=0
flag=cv2.imwrite("./Visualization/{}.jpg".format(filename),img)
if not (flag):
print(filename,"error")
print("all done ====================================")
解析一下: dom=xml.dom.minidom.parse(xml_path)解析xml文件,
objects=dom.getElementsByTagName("object"),将上面解析的dom获取全部名字为object的element为一个列表,通过.data属性可获取里面的内容
=========下面原文章xml读取==============================
xml文件贴一下
<annotation>
<folder>病灶数据集</folder>
<filename>4.jpg</filename>
<path>C:/Users/Administrator/Desktop/病灶数据集/4.jpg</path>
<source>
<database>Unknown</database>
</source>
<size>
<width>448</width>
<height>382</height>
<depth>3</depth>
</size>
<segmented>0</segmented>
<object>
<name>less</name>
<pose>Unspecified</pose>
<truncated>1</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>1</xmin>
<ymin>1</ymin>
<xmax>245</xmax>
<ymax>169</ymax>
</bndbox>
</object>
</annotation>
参考:https://blog.csdn.net/yixieling4397/article/details/82193391
更改标签名字
import os
import xml.etree.ElementTree as ET
#程序功能:批量修改VOC数据集中xml标签文件的标签名称
def changelabelname(inputpath):
listdir = os.listdir(inputpath)
for file in listdir:
if file.endswith('xml'):
file = os.path.join(inputpath,file)
tree = ET.parse(file)
root = tree.getroot()
for object1 in root.findall('object'):
for sku in object1.findall('name'): #查找需要修改的名称
if (sku.text == 'preName'): #‘preName’为修改前的名称
sku.text = 'TESTNAME' #‘TESTNAME’为修改后的名称
tree.write(file,encoding='utf-8') #写进原始的xml文件并避免原始xml中文字符乱码
else:
pass
else:
pass
if __name__ == '__main__':
inputpath = 'E:/Research/Dataset/Test/Annotations' #此处替换为自己的路径
changelabelname(inputpath)