yolov3使用聚类重置anchor

1、原理

1.1 前言：

anchor box其实就是从训练集的所有ground truth box中统计(使用k-means)出来的在训练集中最经常出现的几个box形状和尺寸。可以预先将这些统计上的先验（或来自人类的）经验加入到模型中，这样模型在学习的时候，瞎找的可能性就更小了些（玄学思想）。

1.2 公式：
在这里插入图片描述
1.3 效果：

只能加快模型收敛的速度，对模型的其他性能无影响

2、重置

2.1 模型下载：
使用GitHub原有模型，在此基础上修改：
https://github.com/lars76/kmeans-anchor-boxes

2.2 模型修改：
将下载后的模型删去不需要的部分：
在这里插入图片描述
将test文件删除，留下example.py和kmeans.py，在根目录下创建plt_anchor.py：

2.3 文件内容：

example.py：

import glob
import xml.etree.ElementTree as ET
import tqdm
import numpy as np

from kmeans import kmeans, avg_iou

ANNOTATIONS_PATH = "E:/pytorch/ultralytics-yolov3/yolov3-sheep/data/Annotations/"
#以正斜杠/这种形式可以防止反斜杠带来的转义错误
CLUSTERS = 9

def load_dataset(path):
   dataset = []
   for xml_file in tqdm.tqdm(glob.glob("{}/*xml".format(path))):
      print(xml_file)
      tree = ET.parse(xml_file)

      height = int(tree.findtext("./size/height"))
      width = int(tree.findtext("./size/width"))

      for obj in tree.iter("object"):
         xmin = int(float(obj.findtext("bndbox/xmin"))) / width
         ymin = int(float(obj.findtext("bndbox/ymin"))) / height
         xmax = int(float(obj.findtext("bndbox/xmax"))) / width
         ymax = int(float(obj.findtext("bndbox/ymax"))) / height

         dataset.append([xmax - xmin, ymax - ymin])

   return np.array(dataset)


data = load_dataset(ANNOTATIONS_PATH)
print(ANNOTATIONS_PATH)
out = kmeans(data, k=CLUSTERS)
print("Accuracy: {:.2f}%".format(avg_iou(data, out) * 100))
print("Boxes:\n {}-{}".format(out[:, 0]*416, out[:, 1]*416))

ratios = np.around(out[:, 0] / out[:, 1], decimals=2).tolist()
print("Ratios:\n {}".format(sorted(ratios)))

将 ANNOTATIONS_PATH =’’ 路径改成自己的.
运行后：
在这里插入图片描述

plt_anchor.py:

import glob
import xml.etree.ElementTree as ET
import tqdm
import numpy as np
import matplotlib.pyplot as plt
from kmeans import kmeans, avg_iou

ANNOTATIONS_PATH = "E:/pytorch/ultralytics-yolov3/yolov3-sheep/data/Annotations/"
#以正斜杠/这种形式可以防止反斜杠带来的转义错误
CLUSTERS = 9
BBOX_NORMALIZE = True

def show_cluster(cluster, max_points=2000):
    if len(data) > max_points:
        idx = np.random.choice(len(data),max_points)
        data = data[idx]
    plt.scatter(data[:, 0], data[:, 1], s=5, c='lavender')
    plt.scatter(cluster[:, 0], cluster[:, 1], c='red', s=100, marker="^")
    plt.xlabel("Width")
    plt.ylabel("Height")
    plt.title("Bounding and anchor distribution")
    plt.savefig("cluster.png")
    plt.show()

def show_w_h(data,cluster,bins=50):
    if data.dtype != np.float32:
        data = data.astype(np.float32)
    width = data[:, 0]
    height = data[:, 1]
    ratio = height / width

    plt.figure(1, figsize=(20, 6))
    plt.subplot(131)
    plt.hist(width, bins=bins, color='blue', rwidth=0.8)
    plt.xlabel('Width')
    plt.ylabel('Number')
    plt.grid(True, linestyle='-.')
    plt.title('Distribution of Width')

    plt.subplot(132)
    plt.hist(height, bins=bins, color='green', rwidth=0.8)
    plt.xlabel('Height')
    plt.ylabel('Number')
    plt.grid(True, linestyle='-.')
    plt.title('Distribution of Height')

    plt.subplot(133)
    plt.hist(ratio, bins=bins, color='magenta', rwidth=0.8)
    plt.xlabel('Height / Width')
    plt.ylabel('Number')
    plt.grid(True, linestyle='-.')
    plt.title('Distribution of aspect ratio[Height / Width]')
    plt.savefig("shape-distribution.png")
    plt.show()

def sort_cluster(cluster):
    if cluster.dtype != np.float32:
        cluster = cluster.astype(np.float32)
    area = cluster[:, 0] * cluster[:, 1]
    ratio = cluster[:, 1:2] / cluster[:, 0:1]
    return np.concatenate([cluster, ratio], axis=-1)

def load_dataset(path):
    dataset = []
    for xml_file in tqdm.tqdm(glob.glob("{}/*xml".format(path))):
        print(xml_file)
        tree = ET.parse(xml_file)

        height = int(tree.findtext("./size/height"))
        width = int(tree.findtext("./size/width"))

        for obj in tree.iter("object"):
            xmin = int(float(obj.findtext("bndbox/xmin"))) / width
            ymin = int(float(obj.findtext("bndbox/ymin"))) / height
            xmax = int(float(obj.findtext("bndbox/xmax"))) / width
            ymax = int(float(obj.findtext("bndbox/ymax"))) / height

            dataset.append([xmax - xmin, ymax - ymin])

    return np.array(dataset)


data = load_dataset(ANNOTATIONS_PATH)
print(ANNOTATIONS_PATH)
out = kmeans(data, k=CLUSTERS)
out_sorted = sort_cluster(out)
if out.dtype != np.float32:
    cluster = out.astype(np.float32)
show_cluster(data, out, max_points=2000)

show_w_h(data, out, bins=50)

运行后：

簇类图：在这里插入图片描述

长宽图：在这里插入图片描述

平头哥pengtougu

原创文章 42 获赞 62 访问量 5万+

关注私信

yolov3使用聚类重置anchor

1、原理

2、重置

猜你喜欢