数据转换篇---voc的xml标记批量转为labelme的json格式
数据转换篇—voc的xml标记批量转为labelme的json格式
)
由于工作需要准备先把手头的voc xml格式的标记转成labelme格式。参考网络的代码,编写了一个单文件的转换小工具 voc_to_labelme.py。
VOC格式 数据集的格式如下:
xml2json
Annotations/
JPEGImages/
命令行工具用法:
python voc_to_labelme.py
命令行参数解释:
--voc_dir VOC数据集目录,默认VOCdevkit/VOC2007
--labelme_version Labelme版本号,默认3.2.6
--labelme_shape Labelme标记框形状,支持rectangle或polygon,默认rectangle
--image_data Labelme的imageData节点是否输出数据,默认True
--out_dir Labelme格式数据集的输出目录
voc_to_labelme.py的源码:
# -*- coding:UTF-8 -*-
'''
VOC格式转换为labelme的json格式
'''
import argparse
import glob
import base64
import logging
import io
import os
import PIL
import PIL.Image
import xml.etree.ElementTree as ET
import json
import shutil
import numpy as np
def parse_opt(known=False):
parser = argparse.ArgumentParser(description='xml2json')
parser.add_argument('--voc_dir', default='xml2json', help='voc directory')
parser.add_argument('--labelme_version', default='5.1.1', help='labelme version')
parser.add_argument('--labelme_shape', default='rectangle', help='labelme shape')
parser.add_argument('--image_data', default=True, type=bool, help='wether write image data to xml2json')
parser.add_argument('--out_dir', default='labelme', help='the path of output directory')
opt = parser.parse_args()
return opt
def img_data_to_pil(img_data):
f = io.BytesIO()
f.write(img_data)
img_pil = PIL.Image.open(f)
return img_pil
def img_data_to_arr(img_data):
img_pil = img_data_to_pil(img_data)
img_arr = np.array(img_pil)
return img_arr
def img_arr_to_b64(img_arr):
img_pil = PIL.Image.fromarray(img_arr)
f = io.BytesIO()
img_pil.save(f, format="PNG")
img_bin = f.getvalue()
if hasattr(base64, "encodebytes"):
img_b64 = base64.encodebytes(img_bin)
else:
img_b64 = base64.encodestring(img_bin)
return img_b64
def apply_exif_orientation(image):
try:
exif = image._getexif()
except AttributeError:
exif = None
if exif is None:
return image
exif = {
PIL.ExifTags.TAGS[k]: v
for k, v in exif.items()
if k in PIL.ExifTags.TAGS
}
orientation = exif.get("Orientation", None)
if orientation == 1:
# do nothing
return image
elif orientation == 2:
# left-to-right mirror
return PIL.ImageOps.mirror(image)
elif orientation == 3:
# rotate 180
return image.transpose(PIL.Image.ROTATE_180)
elif orientation == 4:
# top-to-bottom mirror
return PIL.ImageOps.flip(image)
elif orientation == 5:
# top-to-left mirror
return PIL.ImageOps.mirror(image.transpose(PIL.Image.ROTATE_270))
elif orientation == 6:
# rotate 270
return image.transpose(PIL.Image.ROTATE_270)
elif orientation == 7:
# top-to-right mirror
return PIL.ImageOps.mirror(image.transpose(PIL.Image.ROTATE_90))
elif orientation == 8:
# rotate 90
return image.transpose(PIL.Image.ROTATE_90)
else:
return image
def load_image_file(filename):
image_pil = PIL.Image.open(filename)
# apply orientation to image according to exif
image_pil = apply_exif_orientation(image_pil)
with io.BytesIO() as f:
ext = os.path.splitext(filename)[1].lower()
if ext in [".jpg", ".jpeg"]:
format = "JPEG"
else:
format = "PNG"
image_pil.save(f, format=format)
f.seek(0)
return f.read()
def read_xml_gtbox_and_label(xml_path):
tree = ET.parse(xml_path)
root = tree.getroot()
size = root.find('size')
width = int(size.find('width').text)
height = int(size.find('height').text)
depth = int(size.find('depth').text)
points = []
for obj in root.iter('object'):
cls = obj.find('name').text
pose = obj.find('pose').text
xmlbox = obj.find('bndbox')
xmin = float(xmlbox.find('xmin').text)
xmax = float(xmlbox.find('xmax').text)
ymin = float(xmlbox.find('ymin').text)
ymax = float(xmlbox.find('ymax').text)
point = [cls, xmin, ymin, xmax, ymax]
points.append(point)
return points, width, height
def voc_bndbox_to_labelme(opt):
xml_dir = os.path.join(opt.voc_dir, 'Annotations')
img_dir = os.path.join(opt.voc_dir, 'JPEGImages')
if not os.path.exists(opt.out_dir):
os.makedirs(opt.out_dir)
xml_files = glob.glob(os.path.join(xml_dir, '*.xml'))
for xml_file in xml_files:
_, filename = os.path.split(xml_file)
filename = filename.rstrip('.xml')
img_name = filename + '.jpg'
img_path = os.path.join(img_dir, img_name)
points, width, height = read_xml_gtbox_and_label(xml_file)
json_str = {
}
json_str['version'] = opt.labelme_version
json_str['flags'] = {
}
shapes = []
for i in range(len(points)):
cls, xmin, ymin, xmax, ymax = points[i]
shape = {
}
shape['label'] = cls
if opt.labelme_shape == 'rectangle': # fixme:两个点
shape['points'] = [[xmin, ymin], [xmax, ymax]]
else: # fixme: 四个点polygon
shape['points'] = [[xmin, ymin], [xmax, ymin], [xmax, ymax], [xmin, ymax]]
shape['line_color'] = None
shape['fill_color'] = None
shape['shape_type'] = opt.labelme_shape
shape['flags'] = {
}
shapes.append(shape)
json_str['shapes'] = shapes
json_str['imagePath'] = img_name
if opt.image_data:
with open(img_path, "rb") as f:
image_data = f.read()
json_str['imageData'] = base64.b64encode(image_data).decode("utf-8")
else:
json_str['imageData'] = None
json_str['imageHeight'] = height
json_str['imageWidth'] = width
json_str['lineColor'] = [0, 255, 0, 128]
json_str['fillColor'] = [255, 0, 0, 128]
target_path = os.path.join(opt.out_dir, img_name)
shutil.copy(img_path, target_path)
json_file = os.path.join(opt.out_dir, filename + '.json')
print(json_file, "convert success")
with open(json_file, 'w') as f:
json.dump(json_str, f, indent=2)
def main(opt):
voc_bndbox_to_labelme(opt)
if __name__ == '__main__':
opt = parse_opt()
main(opt)
另一种:
# -*- coding: utf-8 -*-
import xml.etree.ElementTree as ET # 读取xml。
import os
from PIL import Image, ImageDraw, ImageFont
import os
import json
def parse_rec(rootPath, file):
pathFile = os.path.join(rootPath, file)
root = ET.parse(pathFile) # 解析读取xml函数
folder = root.find('folder').text
filename = root.find('filename').text
path = root.find('path').text
print(folder, filename, path)
sz = root.find('size')
width = int(sz[0].text)
height = int(sz[1].text)
print(width, height)
data = {
}
data['imagePath'] = filename
data['flags'] = {
}
data['imageWidth'] = width
data['imageHeight'] = height
data['imageData'] = None
data['version'] = "4.5.6"
data["shapes"] = []
for child in root.findall('object'): # 找到图片中的所有框
sub = child.find('bndbox') # 找到框的标注值并进行读取
xmin = float(sub[0].text)
ymin = float(sub[1].text)
xmax = float(sub[2].text)
ymax = float(sub[3].text)
# fixme: 此处新增两个点
###################################################3
# xmin,ymin --------------- xmax,ymin (新增)
# - -
# - -
# (新增) xmin,ymax --------------- xmax,ymax
points = [[xmin, ymin], [xmin, ymax], [xmax, ymin], [xmax, ymax]]
itemData = {
'points': []}
itemData['points'].extend(points)
name = child.find("name").text
itemData["flag"] = {
}
itemData["group_id"] = None
itemData["shape_type"] = "rectangle"
itemData["label"] = name
data["shapes"].append(itemData)
(filename, extension) = os.path.splitext(file)
jsonName = ".".join([filename, "json"])
print(rootPath, jsonName)
# jsonPath = os.path.join(rootPath, jsonName)
jsonPath = os.path.join("xml2json/json", jsonName)
with open(jsonPath, "w") as f:
json.dump(data, f)
print("加载入文件完成...")
if __name__ == '__main__':
path = "xml2json"
for root, dirs, files in os.walk(path):
for file in files:
if file.endswith(".xml"):
parse_rec(root, file)