引言
本篇想就上一篇的图像处理继续补充关于图像标注相关的笔记,因为最近也看了下yolo关于标注的代码,所以在这里总结一下,方便以后查看。另外下图是yolov4的test,我也尝试着跑了一下,但因为笔记本开的东西很多,而且没换成GPU,所以丢帧严重,就只能以输出视频进行展示。
图像标注
import cv2,random
def plot_one_box(x, img, color=None, label=None, line_thickness=None):
# Plots one bounding box on image img
tl = line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1 # line/font thickness
color = color or [random.randint(0, 255) for _ in range(3)]
c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
if label:
tf = max(tl - 1, 1) # font thickness
t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA) # filled
cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)
def xywh2xyxy(x):
# Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x)
y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x
y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y
y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x
y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y
return y
def plot_images(images, targets, paths=None, fname='images.jpg', names=None, max_size=640, max_subplots=16):
tl = 3 # line thickness
tf = max(tl - 1, 1) # font thickness
if os.path.isfile(fname): # do not overwrite
return None
if isinstance(images, torch.Tensor):
images = images.cpu().float().numpy()
if isinstance(targets, torch.Tensor):
targets = targets.cpu().numpy()
# un-normalise
if np.max(images[0]) <= 1:
images *= 255
bs, _, h, w = images.shape # batch size, _, height, width
bs = min(bs, max_subplots) # limit plot images
ns = np.ceil(bs ** 0.5) # number of subplots (square)
# Check if we should resize
scale_factor = max_size / max(h, w)
if scale_factor < 1:
h = math.ceil(scale_factor * h)
w = math.ceil(scale_factor * w)
# Empty array for output
mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8)
# Fix class - colour map
prop_cycle = plt.rcParams['axes.prop_cycle']
# https://stackoverflow.com/questions/51350872/python-from-color-name-to-rgb
hex2rgb = lambda h: tuple(int(h[1 + i:1 + i + 2], 16) for i in (0, 2, 4))
color_lut = [hex2rgb(h) for h in prop_cycle.by_key()['color']]
for i, img in enumerate(images):
if i == max_subplots: # if last batch has fewer images than we expect
break
block_x = int(w * (i // ns))
block_y = int(h * (i % ns))
img = img.transpose(1, 2, 0)
if scale_factor < 1:
img = cv2.resize(img, (w, h))
mosaic[block_y:block_y + h, block_x:block_x + w, :] = img
if len(targets) > 0:
image_targets = targets[targets[:, 0] == i]
boxes = xywh2xyxy(image_targets[:, 2:6]).T
classes = image_targets[:, 1].astype('int')
gt = image_targets.shape[1] == 6 # ground truth if no conf column
conf = None if gt else image_targets[:, 6] # check for confidence presence (gt vs pred)
boxes[[0, 2]] *= w
boxes[[0, 2]] += block_x
boxes[[1, 3]] *= h
boxes[[1, 3]] += block_y
for j, box in enumerate(boxes.T):
cls = int(classes[j])
color = color_lut[cls % len(color_lut)]
cls = names[cls] if names else cls
if gt or conf[j] > 0.3: # 0.3 conf thresh
label = '%s' % cls if gt else '%s %.1f' % (cls, conf[j])
plot_one_box(box, mosaic, label=label, color=color, line_thickness=tl)
# Draw image filename labels
if paths is not None:
label = os.path.basename(paths[i])[:40] # trim to 40 char
t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
cv2.putText(mosaic, label, (block_x + 5, block_y + t_size[1] + 5), 0, tl / 3, [220, 220, 220], thickness=tf,
lineType=cv2.LINE_AA)
# Image border
cv2.rectangle(mosaic, (block_x, block_y), (block_x + w, block_y + h), (255, 255, 255), thickness=3)
if fname is not None:
mosaic = cv2.resize(mosaic, (int(ns * w * 0.5), int(ns * h * 0.5)), interpolation=cv2.INTER_AREA)
cv2.imwrite(fname, cv2.cvtColor(mosaic, cv2.COLOR_BGR2RGB))
return mosaic
上面是yolo官方的标注代码,不论是yolo3还是yolo4都是位于util / util.py工具类里,而yolo5也同样,不过是改变了张量转换图片的精度,但我也没有训练过,只是把预训练的模型拿过来直接使用,因为主要关注点在于标注。
上述代码中plot_one_box函数是画框,第二个xywh2xyxy函数是得到斜对角线的坐标,第三个函数就是图像帧函数了。而这里是将它们拆分成了三部分,并且用pytorch做的数值转换,而yolov4-custom-functions中还可以使用tensorflow的数值转换,并可以写得更简洁一些:
def draw_bbox(image, bboxes, info = False, counted_classes = None, show_label=True, allowed_classes=list(read_class_names(cfg.YOLO.CLASSES).values()), read_plate = False):
classes = read_class_names(cfg.YOLO.CLASSES)
num_classes = len(classes)
image_h, image_w, _ = image.shape
hsv_tuples = [(1.0 * x / num_classes, 1., 1.) for x in range(num_classes)]
colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), colors))
random.seed(0)
random.shuffle(colors)
random.seed(None)
out_boxes, out_scores, out_classes, num_boxes = bboxes
for i in range(num_boxes):
if int(out_classes[i]) < 0 or int(out_classes[i]) > num_classes: continue
coor = out_boxes[i]
fontScale = 0.5
score = out_scores[i]
class_ind = int(out_classes[i])
class_name = classes[class_ind]
if class_name not in allowed_classes:
continue
else:
if read_plate:
height_ratio = int(image_h / 25)
plate_number = recognize_plate(image, coor)
if plate_number != None:
cv2.putText(image, plate_number, (int(coor[0]), int(coor[1]-height_ratio)),
cv2.FONT_HERSHEY_SIMPLEX, 1.25, (255,255,0), 2)
bbox_color = colors[class_ind]
bbox_thick = int(0.6 * (image_h + image_w) / 600)
c1, c2 = (coor[0], coor[1]), (coor[2], coor[3])
cv2.rectangle(image, c1, c2, bbox_color, bbox_thick)
if info:
print("Object found: {}, Confidence: {:.2f}, BBox Coords (xmin, ymin, xmax, ymax): {}, {}, {}, {} ".format(class_name, score, coor[0], coor[1], coor[2], coor[3]))
if show_label:
bbox_mess = '%s: %.2f' % (class_name, score)
t_size = cv2.getTextSize(bbox_mess, 0, fontScale, thickness=bbox_thick // 2)[0]
c3 = (c1[0] + t_size[0], c1[1] - t_size[1] - 3)
cv2.rectangle(image, c1, (np.float32(c3[0]), np.float32(c3[1])), bbox_color, -1) #filled
cv2.putText(image, bbox_mess, (c1[0], np.float32(c1[1] - 2)), cv2.FONT_HERSHEY_SIMPLEX,
fontScale, (0, 0, 0), bbox_thick // 2, lineType=cv2.LINE_AA)
if counted_classes != None:
height_ratio = int(image_h / 25)
offset = 15
for key, value in counted_classes.items():
cv2.putText(image, "{}s detected: {}".format(key, value), (5, offset),
cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 255, 0), 2)
offset += height_ratio
return image
并自建了一个统计函数对每次进入draw前的data进行分组区分,只需要建立一个字典就能达到统计数据的作用:
def count_objects(data, by_class = False, allowed_classes = list(read_class_names(cfg.YOLO.CLASSES).values())):
boxes, scores, classes, num_objects = data
#create dictionary to hold count of objects
counts = dict()
# if by_class = True then count objects per class
if by_class:
class_names = read_class_names(cfg.YOLO.CLASSES)
# loop through total number of objects found
for i in range(num_objects):
# grab class index and convert into corresponding class name
class_index = int(classes[i])
class_name = class_names[class_index]
if class_name in allowed_classes:
counts[class_name] = counts.get(class_name, 0) + 1
else:
continue
# else count total objects found
else:
counts['total object'] = num_objects
return counts
那么通过这个函数就可以得到右上角统计的作用:
这里我们能把上述的两个文本标注拿出来单独实验,比如:
程序为:
def plot_one_box(x, img, color=None, label=None, line_thickness=None):
"""
:param x: 坐标点
:param img: 图片
:param color: 三原色值
:param label: 标注名
:param line_thickness: 线框厚度
:return:
"""
# Plots one bounding box on image img
tl = line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1 # line/font thickness
color = color or [random.randint(0, 255) for _ in range(3)]
c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
if label:
tf = max(tl - 1, 1) # font thickness
t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA) # filled
cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)
cv2.imshow("img",img)
cv2.waitKey(0)
cv2.destroyAllWindows()
img = cv2.imread(r"D:\Pictures\1200\36.jpg")
x = [100,100,500,500]
color = (0,255,255)
label = "person"
line_thickness = 4
plot_one_box(x,img,color,label,line_thickness)
def annotate(frame):
frameRate = 1
detectionRate = 1
text = "Frame rate: %.1f" % frameRate
textColor = (0, 255, 0)
font = cv2.FONT_HERSHEY_SIMPLEX
size = 0.5
thickness = 2
textSize = cv2.getTextSize(text, font, size, thickness)
height = textSize[1]
location = (0, frame.shape[0] - 4 * height)
cv2.putText(frame, text, location, font, size, textColor,
thickness=thickness)
text = "Detection rate: %.1f" % detectionRate
location = (0, frame.shape[0] - height)
cv2.putText(frame, text, location, font, size, textColor,
thickness=thickness)
cv2.imshow("images", frame)
cv2.waitKey(0)
cv2.destroyAllWindows()