【目标检测】通过k-means算法聚类一些主流数据集的anchors

git clone大佬的github代码仓：ybcc2015/DeepLearning-Utils
因为我是用来聚类COCO数据集的，其中不包含，因此个人修改datasets.py文件的代码如下：

import xml.etree.ElementTree as ET
import numpy as np
import glob
import os
import json
import cv2


class AnnotParser(object):
    def __init__(self, file_type):
        assert file_type in ['csv', 'xml', 'json'], "Unsupported file type."
        self.file_type = file_type

    def parse(self, annot_dir):
        """
        Parse annotation file, the file type must be csv or xml or json.

        :param annot_dir: directory path of annotation files
        :return: 2-d array, shape as (n, 2), each row represents a bbox, and each column
                 represents the corresponding width and height after normalized
        """
        if self.file_type == 'xml':
            return self.parse_xml(annot_dir)
        elif self.file_type == 'json':
            return self.parse_json(annot_dir)
        else:
            return self.parse_csv(annot_dir)

    @staticmethod
    def parse_xml(annot_dir):
        """
        Parse xml annotation file in VOC.
        """
        boxes = []

        for xml_file in glob.glob(os.path.join(annot_dir, '*.xml')):
            tree = ET.parse(xml_file)

            h_img = int(tree.findtext('./size/height'))
            w_img = int(tree.findtext('./size/width'))

            for obj in tree.iter('object'):
                xmin = int(round(float(obj.findtext('bndbox/xmin'))))
                ymin = int(round(float(obj.findtext('bndbox/ymin'))))
                xmax = int(round(float(obj.findtext('bndbox/xmax'))))
                ymax = int(round(float(obj.findtext('bndbox/ymax'))))

                w_norm = (xmax - xmin) / w_img
                h_norm = (ymax - ymin) / h_img

                boxes.append([w_norm, h_norm])

        return np.array(boxes)

    """
    @staticmethod
    def parse_json(annot_dir):
        boxes = []

        for js_file in glob.glob(os.path.join(annot_dir, '*.json')):
            with open(js_file) as f:
                data = json.load(f)


            h_img = data['imageHeight']
            w_img = data['imageWidth']

            for shape in data['shapes']:
                points = shape['points']
                xmin = int(round(points[0][0]))
                ymin = int(round(points[0][1]))
                xmax = int(round(points[1][0]))
                ymax = int(round(points[1][1]))

                w_norm = (xmax - xmin) / w_img
                h_norm = (ymax - ymin) / h_img

                boxes.append([w_norm, h_norm])

        return np.array(boxes)
"""

    @staticmethod
    def parse_json(annot_dir):
        """
        Parse labelme json annotation file.
        """
        boxes = []

        for js_file in glob.glob(os.path.join(annot_dir, '*.json')):
            with open(js_file) as f:
                data = json.load(f)
            images = data['images']

            annotations_index = {}
            if 'annotations' in data:
                for annotation in data['annotations']:
                    image_id = annotation['image_id']
                    if image_id not in annotations_index:
                        annotations_index[image_id] = []
                    annotations_index[image_id].append(annotation)
            
            for idx, image in enumerate(images):
                image_id = image['id']
                if image_id not in annotations_index:
                    continue
                else:
                    annotations_list = annotations_index[image['id']]
                    image_height = image['height']
                    image_width = image['width']
                    for object_annotations in annotations_list:
                        (x,y,width,height) = tuple(object_annotations['bbox'])
                        if width <= 0 or height <= 0:
                            continue
                        if x + width > image_width or y + height > image_height:
                            continue
                        #xmin = int(x) 
                        #xmax = int(x + width)
                        #ymin = int(y)
                        #ymax = int(y + height)
                        w_norm = float(width) / image_width
                        h_norm = float(height) / image_height
                        
                        boxes.append([w_norm, h_norm])

        return np.array(boxes)

    @staticmethod
    def parse_csv(annot_dir):
        """
        Parse csv annotation file.
        """
        boxes = []

        for csv_file in glob.glob(os.path.join(annot_dir, '*.csv')):
            with open(csv_file) as f:
                lines = f.readlines()

            for line in lines:
                items = line.strip().split(',')
                img = cv2.imread(items[0])
                h_img, w_img = img.shape[:2]
                xmin, ymin, xmax, ymax = list(map(int, items[1:-1]))

                w_norm = (xmax - xmin) / w_img
                h_norm = (ymax - ymin) / h_img

                boxes.append([w_norm, h_norm])

        return np.array(boxes)

使用该工具对COCO数据集进行anchors 聚类，执行代码如下，即可得到：

python3 gen_anchors.py -d ***/COCO/annotations -t json  -k 6

【目标检测】通过k-means算法聚类一些主流数据集的anchors

猜你喜欢