目录
前言
该文章介绍了如何将YOLO格式转换为XML格式。XML格式转换为YOLO格式见另一篇文章。
一、VOC格式和YOLO格式介绍?
1.VOC格式
VOC数据集采用的格式为XML格式,下面为示例:
<annotation> <folder>img</folder> <filename>pikaqiu.jpg</filename> <path>E:\cv_code\image_processing\test\img\pikaqiu.jpg</path> <source> <database>Unknown</database> </source> <size> <width>1062</width> <height>974</height> <depth>3</depth> </size> <segmented>0</segmented> <object> <name>pikaqiu</name> <pose>Unspecified</pose> <truncated>0</truncated> <difficult>0</difficult> <bndbox> <xmin>83</xmin> <ymin>74</ymin> <xmax>987</xmax> <ymax>920</ymax> </bndbox> </object> </annotation>
其中我们要用到的信息有图片名称: <filename>pikaqiu.jpg</filename>,图片的宽度、高度、通道数信息: <size> <width>1062</width> <height>974</height> <depth>3</depth> </size>,类别名字:<name>pikaqiu</name>,box信息: <xmin>83</xmin> <ymin>74</ymin> <xmax>987</xmax> <ymax>920</ymax>。
2.YOLO格式
(class,xCenter,yCenter,w,h),其中分别代表内别、标注框中心坐标、标注框相对宽度和长度。
二、使用步骤
1.引入库
import xml.etree.ElementTree as ET
import os
2.设置文件所在地址和图像信息
# Yolo格式文件所在目录
yolo_dir = "E:/cv_code/image_processing/aug_datasets/label/"
# XML文件保存目录
xml_dir = "E:/cv_code/image_processing/aug_datasets/Annotations/"
# 图像尺寸
img_width, img_height = 1062, 974
# 类别数字标签和名称的映射
class_map = {"pikaqiu": 0}
3.遍历与解析YOLO文件
# 遍历Yolo格式文件夹中的所有文件
for yolo_file in os.listdir(yolo_dir):
if not yolo_file.endswith(".txt"):
continue
# 解析Yolo格式文件
with open(os.path.join(yolo_dir, yolo_file), "r") as f:
lines = f.readlines()
# 获取图像名称
img_file = os.path.splitext(yolo_file)[0] + ".jpg"
4.创建XML文件并写入信息
# 创建XML文件
#创建根节点
root = ET.Element("annotation")
#创建子节点
filename = ET.SubElement(root, "filename")
#添加文本
filename.text = img_file
size = ET.SubElement(root, "size")
width = ET.SubElement(size, "width")
width.text = str(img_width)
height = ET.SubElement(size, "height")
height.text = str(img_height)
depth = ET.SubElement(size, "depth")
depth.text = "3"
# 遍历所有目标
for line in lines:
parts = line.strip().split()
if len(parts) < 5:
continue
cls_id = int(parts[0])
if cls_id in class_map:
continue
for k, v in class_map.items():
cls_name = None
if v == cls_id:
cls_name = k
break
# cls_name = class_map[cls_id]
x = float(parts[1])
y = float(parts[2])
w = float(parts[3])
h = float(parts[4])
# 计算边界框坐标
xmin = int((x - w / 2) * img_width)
ymin = int((y - h / 2) * img_height)
xmax = int((x + w / 2) * img_width)
ymax = int((y + h / 2) * img_height)
# 将信息写入XML文件
obj = ET.SubElement(root, "object")
name = ET.SubElement(obj, "name")
name.text = cls_name
bndbox = ET.SubElement(obj, "bndbox")
xmin_node = ET.SubElement(bndbox, "xmin")
xmin_node.text = str(xmin)
ymin_node = ET.SubElement(bndbox, "ymin")
ymin_node.text = str(ymin)
xmax_node = ET.SubElement(bndbox, "xmax")
xmax_node.text = str(xmax)
ymax_node = ET.SubElement(bndbox, "ymax")
ymax_node.text = str(ymax)
#将树写入文件
tree = ET.ElementTree(root)
tree.write(xml_dir + os.path.splitext(yolo_file)[0] + ".xml")
5.总代码
import os
import xml.etree.ElementTree as ET
# Yolo格式文件所在目录
yolo_dir = "E:/cv_code/image_processing/aug_datasets/label/"
# XML文件保存目录
xml_dir = "E:/cv_code/image_processing/aug_datasets/Annotations/"
# 图像尺寸
img_width, img_height = 1062, 974
# 类别数字标签和名称的映射
class_map = {"pikaqiu": 0}
# 遍历Yolo格式文件夹中的所有文件
for yolo_file in os.listdir(yolo_dir):
if not yolo_file.endswith(".txt"):
continue
# 解析Yolo格式文件
with open(os.path.join(yolo_dir, yolo_file), "r") as f:
lines = f.readlines()
# 获取图像文件名
img_file = os.path.splitext(yolo_file)[0] + ".jpg"
# 创建XML文件
#创建根节点
root = ET.Element("annotation")
#创建子节点
filename = ET.SubElement(root, "filename")
#添加文本
filename.text = img_file
size = ET.SubElement(root, "size")
width = ET.SubElement(size, "width")
width.text = str(img_width)
height = ET.SubElement(size, "height")
height.text = str(img_height)
depth = ET.SubElement(size, "depth")
depth.text = "3"
# 遍历所有目标
for line in lines:
parts = line.strip().split()
if len(parts) < 5:
continue
cls_id = int(parts[0])
if cls_id in class_map:
continue
for k, v in class_map.items():
cls_name = None
if v == cls_id:
cls_name = k
break
x = float(parts[1])
y = float(parts[2])
w = float(parts[3])
h = float(parts[4])
# 计算边界框坐标
xmin = int((x - w / 2) * img_width)
ymin = int((y - h / 2) * img_height)
xmax = int((x + w / 2) * img_width)
ymax = int((y + h / 2) * img_height)
# 将信息写入XML文件
obj = ET.SubElement(root, "object")
name = ET.SubElement(obj, "name")
name.text = cls_name
bndbox = ET.SubElement(obj, "bndbox")
xmin_node = ET.SubElement(bndbox, "xmin")
xmin_node.text = str(xmin)
ymin_node = ET.SubElement(bndbox, "ymin")
ymin_node.text = str(ymin)
xmax_node = ET.SubElement(bndbox, "xmax")
xmax_node.text = str(xmax)
ymax_node = ET.SubElement(bndbox, "ymax")
ymax_node.text = str(ymax)
#将数据写入文件
tree = ET.ElementTree(root)
tree.write(xml_dir + os.path.splitext(yolo_file)[0] + ".xml")