注:
元素不同
print(torch.Tensor([[1,2,3],[2,3,4]]).repeat(8,1,1))
tensor([[[1., 2., 3.],
[2., 3., 4.]],
[[1., 2., 3.],
[2., 3., 4.]],
[[1., 2., 3.],
[2., 3., 4.]],
[[1., 2., 3.],
[2., 3., 4.]],
[[1., 2., 3.],
[2., 3., 4.]],
[[1., 2., 3.],
[2., 3., 4.]],
[[1., 2., 3.],
[2., 3., 4.]],
[[1., 2., 3.],
[2., 3., 4.]]])
print(torch.Tensor([[1,2,3],[2,3,4]]).repeat(8,1,1).view(-1,3))
tensor([[1., 2., 3.],
[2., 3., 4.],
[1., 2., 3.],
[2., 3., 4.],
[1., 2., 3.],
[2., 3., 4.],
[1., 2., 3.],
[2., 3., 4.],
[1., 2., 3.],
[2., 3., 4.],
[1., 2., 3.],
[2., 3., 4.],
[1., 2., 3.],
[2., 3., 4.],
[1., 2., 3.],
[2., 3., 4.]])
print(torch.Tensor([[1,2,3],[2,3,4]]).repeat(1,1,8))
tensor([[[1., 2., 3., 1., 2., 3., 1., 2., 3., 1., 2., 3., 1., 2., 3., 1., 2.,
3., 1., 2., 3., 1., 2., 3.],
[2., 3., 4., 2., 3., 4., 2., 3., 4., 2., 3., 4., 2., 3., 4., 2., 3.,
4., 2., 3., 4., 2., 3., 4.]]])
print(torch.Tensor([[1,2,3],[2,3,4]]).repeat(1,1,8).view(-1,3))
tensor([[1., 2., 3.],
[1., 2., 3.],
[1., 2., 3.],
[1., 2., 3.],
[1., 2., 3.],
[1., 2., 3.],
[1., 2., 3.],
[1., 2., 3.],
[2., 3., 4.],
[2., 3., 4.],
[2., 3., 4.],
[2., 3., 4.],
[2., 3., 4.],
[2., 3., 4.],
[2., 3., 4.],
[2., 3., 4.]])
元素相同:
print(torch.Tensor([[1,2,3],[1,2,3]]).repeat(1,1,8).view(-1,3))
tensor([[1., 2., 3.],
[1., 2., 3.],
[1., 2., 3.],
[1., 2., 3.],
[1., 2., 3.],
[1., 2., 3.],
[1., 2., 3.],
[1., 2., 3.],
[1., 2., 3.],
[1., 2., 3.],
[1., 2., 3.],
[1., 2., 3.],
[1., 2., 3.],
[1., 2., 3.],
[1., 2., 3.],
[1., 2., 3.]])
print(torch.Tensor([[1,2,3],[1,2,3]]).repeat(8,1,1).view(-1,3))
tensor([[1., 2., 3.],
[1., 2., 3.],
[1., 2., 3.],
[1., 2., 3.],
[1., 2., 3.],
[1., 2., 3.],
[1., 2., 3.],
[1., 2., 3.],
[1., 2., 3.],
[1., 2., 3.],
[1., 2., 3.],
[1., 2., 3.],
[1., 2., 3.],
[1., 2., 3.],
[1., 2., 3.],
[1., 2., 3.]])
print(torch.Tensor([[1,2,3],[1,2,3]]).repeat(8,1,1).view(-1,3).shape)
torch.Size([16, 3])
print(torch.Tensor([[1,2,3],[1,2,3]]).repeat(1,1,8).view(-1,3).shape)
torch.Size([16, 3])
注:
import torch
input=torch.Tensor(5,255,13,13)
print(input.shape)
prediction = input.view(5, 3, 85, 13, 13).permute(0, 1, 3, 4, 2)
print(prediction.shape)
print(prediction[...,0].shape)
torch.Size([5, 255, 13, 13])
torch.Size([5, 3, 13, 13, 85])
torch.Size([5, 3, 13, 13])
Process finished with exit code 0
注1:mask示范
print(torch.Tensor([1,2,3,4])>3)
tensor([False, False, False, True])
注2:mask, noobj_mask, tx, ty, tw, th, tconf, tcls, box_loss_scale_x, box_loss_scale_y =
self.get_target(targets, scaled_anchors,in_w, in_h,self.ignore_threshold) # 13 13 0.5
targets:[tensor([[0.7632, 0.4964, 0.0216, 0.0361, 2.0000],
[0.5096, 0.4928, 0.0721, 0.0817, 2.0000],
[0.6599, 0.5120, 0.1082, 0.1298, 2.0000],
[0.7945, 0.4952, 0.0409, 0.0288, 2.0000]])]
scaled_anchors:
[(3.625, 2.8125),
(4.875, 6.1875),
(11.65625, 10.1875),
(0.9375, 1.90625),
(1.9375, 1.40625),
(1.84375, 3.71875),
(0.3125, 0.40625),
(0.5, 0.9375),
(1.03125, 0.71875)]
注3:noobj_mask = self.get_ignore(prediction, targets, scaled_anchors, in_w, in_h, noobj_mask)
prediction.shape : torch.Size([1, 3, 13, 13, 85])
注4:torch.linspace(0,12,13)
tensor([ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.])
附: X.repeat(a,b,c) 变换方式, 取X.shape与a,b,c最大值做新变量shape。
注5:torch.linspace(0,12,13).repeat(13,1) #torch.Size([13, 13])
tensor([[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.]])
注6:torch.linspace(0,12,13).repeat(13,1).repeat(3,1,1) # torch.Size([3, 13, 13])
tensor([[[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.]],
[[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.]],
[[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.]]])
注7:torch.linspace(0,12,13).repeat(13,1).repeat(3,1,1).view(torch.Size([1,3,13,13]))
tensor([[[[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.]],
[[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.]],
[[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.],
[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.]]]])
注8:FloatTensor(scaled_anchors).index_select(1, LongTensor([0]))
def index_select(self, dim, index): # real signature unknown; restored from __doc__ """ index_select(dim, index) -> Tensor See :func:`torch.index_select` """ pass
注9:box_a = torch.zeros_like(_box_a)
注10:
box_a[:, 2:].unsqueeze(1).shape torch.Size([4, 2])
box_a[:, 2:].unsqueeze(1).shape torch.Size([4, 1, 2])
box_a[:, 2:].unsqueeze(1).expand(A, B, 2) torch.Size([4, 507, 2])
注11:
anch_iou = anch_ious[t].view(pred_boxes[i].size()[:3])
pred_boxes[i].size() # i=0
Out[27]: torch.Size([3, 13, 13, 4])
pred_boxes.size()
Out[28]: torch.Size([1, 3, 13, 13, 4])
anch_ious[t].shape
Out[29]: torch.Size([507])
anch_ious[t].view(pred_boxes[i].size()[:3]).shape
Out[30]: torch.Size([3, 13, 13])
import cv2
from random import shuffle
import numpy as np
import torch
import torch.nn as nn
import math
import torch.nn.functional as F
from matplotlib.colors import rgb_to_hsv, hsv_to_rgb
from PIL import Image
# torch.set_printoptions(profile="full")
def bbox_iou(box1, box2, x1y1x2y2=True):
"""
计算IOU
"""
if not x1y1x2y2:
b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2
b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2
b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2
b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2
else:
b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3]
b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3]
inter_rect_x1 = torch.max(b1_x1, b2_x1)
inter_rect_y1 = torch.max(b1_y1, b2_y1)
inter_rect_x2 = torch.min(b1_x2, b2_x2)
inter_rect_y2 = torch.min(b1_y2, b2_y2)
inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1 + 1, min=0) * \
torch.clamp(inter_rect_y2 - inter_rect_y1 + 1, min=0)
b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1)
b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1)
iou = inter_area / (b1_area + b2_area - inter_area + 1e-16)
return iou
def jaccard(_box_a, _box_b):
b1_x1, b1_x2 = _box_a[:, 0] - _box_a[:, 2] / 2, _box_a[:, 0] + _box_a[:, 2] / 2
b1_y1, b1_y2 = _box_a[:, 1] - _box_a[:, 3] / 2, _box_a[:, 1] + _box_a[:, 3] / 2
b2_x1, b2_x2 = _box_b[:, 0] - _box_b[:, 2] / 2, _box_b[:, 0] + _box_b[:, 2] / 2
b2_y1, b2_y2 = _box_b[:, 1] - _box_b[:, 3] / 2, _box_b[:, 1] + _box_b[:, 3] / 2
box_a = torch.zeros_like(_box_a)
box_b = torch.zeros_like(_box_b)
box_a[:, 0], box_a[:, 1], box_a[:, 2], box_a[:, 3] = b1_x1, b1_y1, b1_x2, b1_y2
box_b[:, 0], box_b[:, 1], box_b[:, 2], box_b[:, 3] = b2_x1, b2_y1, b2_x2, b2_y2
A = box_a.size(0)
B = box_b.size(0)
max_xy = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2),
box_b[:, 2:].unsqueeze(0).expand(A, B, 2))
min_xy = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2),
box_b[:, :2].unsqueeze(0).expand(A, B, 2))
inter = torch.clamp((max_xy - min_xy), min=0)
inter = inter[:, :, 0] * inter[:, :, 1]
# 计算先验框和真实框各自的面积
area_a = ((box_a[:, 2] - box_a[:, 0]) *
(box_a[:, 3] - box_a[:, 1])).unsqueeze(1).expand_as(inter) # [A,B]
area_b = ((box_b[:, 2] - box_b[:, 0]) *
(box_b[:, 3] - box_b[:, 1])).unsqueeze(0).expand_as(inter) # [A,B]
# 求IOU
union = area_a + area_b - inter
return inter / union # [A,B]
def clip_by_tensor(t, t_min, t_max):
t = t.float()
result = (t >= t_min).float() * t + (t < t_min).float() * t_min
result = (result <= t_max).float() * result + (result > t_max).float() * t_max
return result
def MSELoss(pred, target):
return (pred - target) ** 2
def BCELoss(pred, target):
epsilon = 1e-7
pred = clip_by_tensor(pred, epsilon, 1.0 - epsilon)
output = -target * torch.log(pred) - (1.0 - target) * torch.log(1.0 - pred)
return output
class YOLOLoss(nn.Module):
def __init__(self, anchors, num_classes, img_size, cuda):
super(YOLOLoss, self).__init__()
self.anchors = anchors
self.num_anchors = len(anchors)
self.num_classes = num_classes
self.bbox_attrs = 5 + num_classes
self.feature_length = [img_size[0] // 32, img_size[0] // 16, img_size[0] // 8] # 13 26 52
self.img_size = img_size
self.ignore_threshold = 0.5
self.lambda_xy = 1.0
self.lambda_wh = 1.0
self.lambda_conf = 1.0
self.lambda_cls = 1.0
self.cuda = cuda
def forward(self, input, targets=None):
# input为bs,3*(5+num_classes),13,13
# 一共多少张图片
bs = input.size(0)
# 特征层的高
in_h = input.size(2)
# 特征层的宽
in_w = input.size(3)
# 计算步长
# 每一个特征点对应原来的图片上多少个像素点
# 如果特征层为13x13的话,一个特征点就对应原来的图片上的32个像素点
stride_h = self.img_size[1] / in_h
stride_w = self.img_size[0] / in_w
# 把先验框的尺寸调整成特征层大小的形式
# 计算出先验框在特征层上对应的宽高
scaled_anchors = [(a_w / stride_w, a_h / stride_h) for a_w, a_h in self.anchors]
# bs,3*(5+num_classes),13,13 -> bs,3,13,13,(5+num_classes)
prediction = input.view(bs, int(self.num_anchors / 3),
self.bbox_attrs, in_h, in_w).permute(0, 1, 3, 4, 2).contiguous()
# 对prediction预测进行调整
x = torch.sigmoid(prediction[..., 0]) # Center x,目的是为保证中心点落在单元格内
y = torch.sigmoid(prediction[..., 1]) # Center y
w = prediction[..., 2] # Width
h = prediction[..., 3] # Height
conf = torch.sigmoid(prediction[..., 4]) # Conf
pred_cls = torch.sigmoid(prediction[..., 5:]) # Cls pred.
# 找到哪些先验框内部包含物体
mask, noobj_mask, tx, ty, tw, th, tconf, tcls, box_loss_scale_x, box_loss_scale_y = \
self.get_target(targets, scaled_anchors,
in_w, in_h,
self.ignore_threshold)
noobj_mask = self.get_ignore(prediction, targets, scaled_anchors, in_w, in_h, noobj_mask)
if self.cuda:
box_loss_scale_x = (box_loss_scale_x).cuda()
box_loss_scale_y = (box_loss_scale_y).cuda()
mask, noobj_mask = mask.cuda(), noobj_mask.cuda()
tx, ty, tw, th = tx.cuda(), ty.cuda(), tw.cuda(), th.cuda()
tconf, tcls = tconf.cuda(), tcls.cuda()
box_loss_scale = 2 - box_loss_scale_x * box_loss_scale_y
# losses.
# print("x", x.shape)
# print("x", x)
loss_x = torch.sum(BCELoss(x, tx) / bs * box_loss_scale * mask) # x为预测调整值 tx为真实调整值
loss_y = torch.sum(BCELoss(y, ty) / bs * box_loss_scale * mask)
# w为预测的宽度调整值
# tw = math.log(gw / scale_anchors[best_n+subtract_index][0])
# gw为真实框在特征图尺寸上的宽度 / 先验框在特征图尺寸上的宽度
loss_w = torch.sum(MSELoss(w, tw) / bs * 0.5 * box_loss_scale * mask)
loss_h = torch.sum(MSELoss(h, th) / bs * 0.5 * box_loss_scale * mask)
loss_conf = torch.sum(BCELoss(conf, mask) * mask / bs) + \
torch.sum(BCELoss(conf, mask) * noobj_mask / bs)
loss_cls = torch.sum(BCELoss(pred_cls[mask == 1], tcls[mask == 1]) / bs)
loss = loss_x * self.lambda_xy + loss_y * self.lambda_xy + \
loss_w * self.lambda_wh + loss_h * self.lambda_wh + \
loss_conf * self.lambda_conf + loss_cls * self.lambda_cls
print("losses:", loss, loss_x.item() + loss_y.item(), loss_w.item() + loss_h.item(),
loss_conf.item(), loss_cls.item(), \
torch.sum(mask), torch.sum(noobj_mask))
return loss, loss_x.item(), loss_y.item(), loss_w.item(), \
loss_h.item(), loss_conf.item(), loss_cls.item()
def get_target(self, target, anchors, in_w, in_h, ignore_threshold):
# 计算一共有多少张图片
bs = len(target)
# 获得先验框
anchor_index = [[0, 1, 2], [3, 4, 5], [6, 7, 8]][self.feature_length.index(in_w)] # [[0, 1, 2], [3, 4, 5], [6, 7, 8]][0] Out[42]: [0, 1, 2]
subtract_index = [0, 3, 6][self.feature_length.index(in_w)]
# 创建全是0或者全是1的阵列
mask = torch.zeros(bs, int(self.num_anchors / 3), in_h, in_w, requires_grad=False)
noobj_mask = torch.ones(bs, int(self.num_anchors / 3), in_h, in_w, requires_grad=False)
tx = torch.zeros(bs, int(self.num_anchors / 3), in_h, in_w, requires_grad=False)
ty = torch.zeros(bs, int(self.num_anchors / 3), in_h, in_w, requires_grad=False)
tw = torch.zeros(bs, int(self.num_anchors / 3), in_h, in_w, requires_grad=False)
th = torch.zeros(bs, int(self.num_anchors / 3), in_h, in_w, requires_grad=False)
tconf = torch.zeros(bs, int(self.num_anchors / 3), in_h, in_w, requires_grad=False)
tcls = torch.zeros(bs, int(self.num_anchors / 3), in_h, in_w, self.num_classes, requires_grad=False)
box_loss_scale_x = torch.zeros(bs, int(self.num_anchors / 3), in_h, in_w, requires_grad=False)
box_loss_scale_y = torch.zeros(bs, int(self.num_anchors / 3), in_h, in_w, requires_grad=False)
print("bs", bs)
for b in range(bs):
for t in range(target[b].shape[0]):
# 计算出在特征层上的点位
gx = target[b][t, 0] * in_w
gy = target[b][t, 1] * in_h
gw = target[b][t, 2] * in_w # target中是真实框的宽相对于图片框的比例 0<target[b][t, 2]<1
gh = target[b][t, 3] * in_h
# 计算出属于哪个网格
gi = int(gx)
gj = int(gy)
# 计算真实框的位置
gt_box = torch.FloatTensor(np.array([0, 0, gw, gh])).unsqueeze(0)
# 计算出所有先验框的位置
anchor_shapes = torch.FloatTensor(
np.concatenate((np.zeros((self.num_anchors, 2)), np.array(anchors)), 1))
# 计算重合程度
anch_ious = bbox_iou(gt_box, anchor_shapes)
# Find the best matching anchor box
best_n = np.argmax(anch_ious)
if best_n not in anchor_index:
continue
# Masks
if (gj < in_h) and (gi < in_w):
best_n = best_n - subtract_index # best_n只能为0/1/2
# 判定哪些先验框内部真实的存在物体
noobj_mask[b, best_n, gj, gi] = 0 # noobj_mask 没物体为1 有物体为0
mask[b, best_n, gj, gi] = 1 # mask 没物体为0 有物体为1
# 计算先验框中心调整参数
tx[b, best_n, gj, gi] = gx - gi
ty[b, best_n, gj, gi] = gy - gj
# 计算先验框宽高调整参数
tw[b, best_n, gj, gi] = math.log(gw / anchors[best_n + subtract_index][0])
th[b, best_n, gj, gi] = math.log(gh / anchors[best_n + subtract_index][1])
# 用于获得xywh的比例
box_loss_scale_x[b, best_n, gj, gi] = target[b][t, 2]
box_loss_scale_y[b, best_n, gj, gi] = target[b][t, 3]
# 物体置信度
tconf[b, best_n, gj, gi] = 1
# 种类
tcls[b, best_n, gj, gi, int(target[b][t, 4])] = 1
else:
print('Step {0} out of bound'.format(b))
print('gj: {0}, height: {1} | gi: {2}, width: {3}'.format(gj, in_h, gi, in_w))
continue
return mask, noobj_mask, tx, ty, tw, th, tconf, tcls, box_loss_scale_x, box_loss_scale_y
def get_ignore(self, prediction, target, scaled_anchors, in_w, in_h, noobj_mask):
bs = len(target)
anchor_index = [[0, 1, 2], [3, 4, 5], [6, 7, 8]][self.feature_length.index(in_w)]
scaled_anchors = np.array(scaled_anchors)[anchor_index]
# 先验框的中心位置的调整参数
x = torch.sigmoid(prediction[..., 0])
y = torch.sigmoid(prediction[..., 1])
# 先验框的宽高调整参数
w = prediction[..., 2] # Width
h = prediction[..., 3] # Height
FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
# 生成网格,先验框中心,网格左上角
grid_x = torch.linspace(0, in_w - 1, in_w).repeat(in_w, 1).repeat(
int(bs * self.num_anchors / 3), 1, 1).view(x.shape).type(FloatTensor)
grid_y = torch.linspace(0, in_h - 1, in_h).repeat(in_h, 1).t().repeat(
int(bs * self.num_anchors / 3), 1, 1).view(y.shape).type(FloatTensor)
# 生成先验框的宽高
anchor_w = FloatTensor(scaled_anchors).index_select(1, LongTensor([0]))
anchor_h = FloatTensor(scaled_anchors).index_select(1, LongTensor([1]))
anchor_w = anchor_w.repeat(bs, 1).repeat(1, 1, in_h * in_w).view(w.shape)
anchor_h = anchor_h.repeat(bs, 1).repeat(1, 1, in_h * in_w).view(h.shape)
# 计算调整后的先验框中心与宽高
pred_boxes = FloatTensor(prediction[..., :4].shape)
pred_boxes[..., 0] = x.data + grid_x
pred_boxes[..., 1] = y.data + grid_y
pred_boxes[..., 2] = torch.exp(w.data) * anchor_w
pred_boxes[..., 3] = torch.exp(h.data) * anchor_h
for i in range(bs):
pred_boxes_for_ignore = pred_boxes[i]
pred_boxes_for_ignore = pred_boxes_for_ignore.view(-1, 4)
if len(target[i]) > 0:
gx = target[i][:, 0:1] * in_w
gy = target[i][:, 1:2] * in_h
gw = target[i][:, 2:3] * in_w
gh = target[i][:, 3:4] * in_h
gt_box = torch.FloatTensor(np.concatenate([gx, gy, gw, gh], -1)).type(FloatTensor)
anch_ious = jaccard(gt_box, pred_boxes_for_ignore)
for t in range(target[i].shape[0]):
anch_iou = anch_ious[t].view(pred_boxes[i].size()[:3])
noobj_mask[i][anch_iou > self.ignore_threshold] = 0
# print(torch.max(anch_ious))
return noobj_mask
def rand(a=0, b=1):
return np.random.rand() * (b - a) + a
class Generator(object):
def __init__(self, batch_size,
train_lines, image_size,
):
self.batch_size = batch_size
self.train_lines = train_lines
self.train_batches = len(train_lines)
self.image_size = image_size
def get_random_data(self, annotation_line, input_shape, jitter=.1, hue=.1, sat=1.3, val=1.3):
'''r实时数据增强的随机预处理'''
line = annotation_line.split()
image = Image.open(line[0])
image = image.convert('RGB')
iw, ih = image.size
h, w = input_shape
box = np.array([np.array(list(map(int, box.split(',')))) for box in line[1:]])
# resize image
new_ar = w / h * rand(1 - jitter, 1 + jitter) / rand(1 - jitter, 1 + jitter)
scale = rand(.25, 2)
if new_ar < 1:
nh = int(scale * h)
nw = int(nh * new_ar)
else:
nw = int(scale * w)
nh = int(nw / new_ar)
image = image.resize((nw, nh), Image.BICUBIC)
# place image
dx = int(rand(0, w - nw))
dy = int(rand(0, h - nh))
new_image = Image.new('RGB', (w, h), (128, 128, 128))
new_image.paste(image, (dx, dy))
image = new_image
# flip image or not
flip = rand() < .5
if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT)
# distort image
hue = rand(-hue, hue)
sat = rand(1, sat) if rand() < .5 else 1 / rand(1, sat)
val = rand(1, val) if rand() < .5 else 1 / rand(1, val)
x = cv2.cvtColor(np.array(image, np.float32) / 255, cv2.COLOR_RGB2HSV)
x[..., 0] += hue * 360
x[..., 0][x[..., 0] > 1] -= 1
x[..., 0][x[..., 0] < 0] += 1
x[..., 1] *= sat
x[..., 2] *= val
x[x[:, :, 0] > 360, 0] = 360
x[:, :, 1:][x[:, :, 1:] > 1] = 1
x[x < 0] = 0
image_data = cv2.cvtColor(x, cv2.COLOR_HSV2RGB) * 255
# correct boxes
box_data = np.zeros((len(box), 5))
if len(box) > 0:
np.random.shuffle(box)
box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dx
box[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dy
if flip: box[:, [0, 2]] = w - box[:, [2, 0]]
box[:, 0:2][box[:, 0:2] < 0] = 0
box[:, 2][box[:, 2] > w] = w
box[:, 3][box[:, 3] > h] = h
box_w = box[:, 2] - box[:, 0]
box_h = box[:, 3] - box[:, 1]
box = box[np.logical_and(box_w > 1, box_h > 1)] # discard invalid box
box_data = np.zeros((len(box), 5))
box_data[:len(box)] = box
if len(box) == 0:
return image_data, []
if (box_data[:, :4] > 0).any():
return image_data, box_data
else:
return image_data, []
def generate(self, train=True):
while True:
shuffle(self.train_lines)
lines = self.train_lines
inputs = []
targets = []
for annotation_line in lines:
img, y = self.get_random_data(annotation_line, self.image_size[0:2])
if len(y) != 0:
boxes = np.array(y[:, :4], dtype=np.float32)
boxes[:, 0] = boxes[:, 0] / self.image_size[1]
boxes[:, 1] = boxes[:, 1] / self.image_size[0]
boxes[:, 2] = boxes[:, 2] / self.image_size[1]
boxes[:, 3] = boxes[:, 3] / self.image_size[0]
boxes = np.maximum(np.minimum(boxes, 1), 0)
boxes[:, 2] = boxes[:, 2] - boxes[:, 0]
boxes[:, 3] = boxes[:, 3] - boxes[:, 1]
boxes[:, 0] = boxes[:, 0] + boxes[:, 2] / 2
boxes[:, 1] = boxes[:, 1] + boxes[:, 3] / 2
y = np.concatenate([boxes, y[:, -1:]], axis=-1)
img = np.array(img, dtype=np.float32)
# 通道数放前面
inputs.append(np.transpose(img / 255.0, (2, 0, 1)))
targets.append(np.array(y, dtype=np.float32))
if len(targets) == self.batch_size:
tmp_inp = np.array(inputs)
tmp_targets = np.array(targets)
inputs = []
targets = []
yield tmp_inp, tmp_targets
import cv2
from random import shuffle
import numpy as np
import torch
import torch.nn as nn
import math
import torch.nn.functional as F
from matplotlib.colors import rgb_to_hsv, hsv_to_rgb
from PIL import Image
# torch.set_printoptions(profile="full")
def bbox_iou(box1, box2, x1y1x2y2=True):
"""
计算IOU
"""
if not x1y1x2y2:
b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2
b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2
b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2
b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2
else:
b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3]
b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3]
inter_rect_x1 = torch.max(b1_x1, b2_x1)
inter_rect_y1 = torch.max(b1_y1, b2_y1)
inter_rect_x2 = torch.min(b1_x2, b2_x2)
inter_rect_y2 = torch.min(b1_y2, b2_y2)
inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1 + 1, min=0) * \
torch.clamp(inter_rect_y2 - inter_rect_y1 + 1, min=0)
b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1)
b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1)
iou = inter_area / (b1_area + b2_area - inter_area + 1e-16)
return iou
def jaccard(_box_a, _box_b):
b1_x1, b1_x2 = _box_a[:, 0] - _box_a[:, 2] / 2, _box_a[:, 0] + _box_a[:, 2] / 2
b1_y1, b1_y2 = _box_a[:, 1] - _box_a[:, 3] / 2, _box_a[:, 1] + _box_a[:, 3] / 2
b2_x1, b2_x2 = _box_b[:, 0] - _box_b[:, 2] / 2, _box_b[:, 0] + _box_b[:, 2] / 2
b2_y1, b2_y2 = _box_b[:, 1] - _box_b[:, 3] / 2, _box_b[:, 1] + _box_b[:, 3] / 2
box_a = torch.zeros_like(_box_a)
box_b = torch.zeros_like(_box_b)
box_a[:, 0], box_a[:, 1], box_a[:, 2], box_a[:, 3] = b1_x1, b1_y1, b1_x2, b1_y2
box_b[:, 0], box_b[:, 1], box_b[:, 2], box_b[:, 3] = b2_x1, b2_y1, b2_x2, b2_y2
A = box_a.size(0) # 4
B = box_b.size(0) # 3*13*13 507
max_xy = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2),
box_b[:, 2:].unsqueeze(0).expand(A, B, 2))
min_xy = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2),
box_b[:, :2].unsqueeze(0).expand(A, B, 2))
inter = torch.clamp((max_xy - min_xy), min=0) # 长宽有一个为0,面积为0
inter = inter[:, :, 0] * inter[:, :, 1] # 长宽有一个为0,面积为0
# 计算先验框和真实框各自的面积
area_a = ((box_a[:, 2] - box_a[:, 0]) *
(box_a[:, 3] - box_a[:, 1])).unsqueeze(1).expand_as(inter) # [A,B] 4->4,1->torch.Size([4, 507])
area_b = ((box_b[:, 2] - box_b[:, 0]) *
(box_b[:, 3] - box_b[:, 1])).unsqueeze(0).expand_as(inter) # [A,B] 507->1,507->torch.Size([4, 507])
# 求IOU
union = area_a + area_b - inter
return inter / union # [A,B]
def clip_by_tensor(t, t_min, t_max):
t = t.float()
result = (t >= t_min).float() * t + (t < t_min).float() * t_min
result = (result <= t_max).float() * result + (result > t_max).float() * t_max
return result
def MSELoss(pred, target):
return (pred - target) ** 2
def BCELoss(pred, target):
epsilon = 1e-7
pred = clip_by_tensor(pred, epsilon, 1.0 - epsilon)
output = -target * torch.log(pred) - (1.0 - target) * torch.log(1.0 - pred)
return output
class YOLOLoss(nn.Module):
def __init__(self, anchors, num_classes, img_size, cuda):
super(YOLOLoss, self).__init__()
self.anchors = anchors
self.num_anchors = len(anchors)
self.num_classes = num_classes
self.bbox_attrs = 5 + num_classes
self.feature_length = [img_size[0] // 32, img_size[0] // 16, img_size[0] // 8] # 13 26 52
self.img_size = img_size
self.ignore_threshold = 0.5
self.lambda_xy = 1.0
self.lambda_wh = 1.0
self.lambda_conf = 1.0
self.lambda_cls = 1.0
self.cuda = cuda
def forward(self, input, targets=None):
# input为bs,3*(5+num_classes),13,13
# 一共多少张图片
bs = input.size(0)
# 特征层的高
in_h = input.size(2)
# 特征层的宽
in_w = input.size(3)
# 计算步长
# 每一个特征点对应原来的图片上多少个像素点
# 如果特征层为13x13的话,一个特征点就对应原来的图片上的32个像素点
stride_h = self.img_size[1] / in_h
stride_w = self.img_size[0] / in_w
# 把先验框的尺寸调整成特征层大小的形式
# 计算出先验框在特征层上对应的宽高
scaled_anchors = [(a_w / stride_w, a_h / stride_h) for a_w, a_h in self.anchors]
# bs,3*(5+num_classes),13,13 -> bs,3,13,13,(5+num_classes)
prediction = input.view(bs, int(self.num_anchors / 3),
self.bbox_attrs, in_h, in_w).permute(0, 1, 3, 4, 2).contiguous()
# 对prediction预测进行调整
x = torch.sigmoid(prediction[..., 0]) # Center x,目的是为保证中心点落在单元格内
y = torch.sigmoid(prediction[..., 1]) # Center y
w = prediction[..., 2] # Width
h = prediction[..., 3] # Height
conf = torch.sigmoid(prediction[..., 4]) # Conf
pred_cls = torch.sigmoid(prediction[..., 5:]) # Cls pred.
# 找到哪些先验框内部包含物体
mask, noobj_mask, tx, ty, tw, th, tconf, tcls, box_loss_scale_x, box_loss_scale_y = \
self.get_target(targets, scaled_anchors,
in_w, in_h,
self.ignore_threshold)
noobj_mask = self.get_ignore(prediction, targets, scaled_anchors, in_w, in_h, noobj_mask)
if self.cuda:
box_loss_scale_x = (box_loss_scale_x).cuda()
box_loss_scale_y = (box_loss_scale_y).cuda()
mask, noobj_mask = mask.cuda(), noobj_mask.cuda()
tx, ty, tw, th = tx.cuda(), ty.cuda(), tw.cuda(), th.cuda()
tconf, tcls = tconf.cuda(), tcls.cuda()
box_loss_scale = 2 - box_loss_scale_x * box_loss_scale_y
# losses.
# print("x", x.shape)
# print("x", x)
loss_x = torch.sum(BCELoss(x, tx) / bs * box_loss_scale * mask) # x为预测调整值 tx为真实调整值
loss_y = torch.sum(BCELoss(y, ty) / bs * box_loss_scale * mask)
# w为预测的宽度调整值
# tw = math.log(gw / scale_anchors[best_n+subtract_index][0])
# gw为真实框在特征图尺寸上的宽度 / 先验框在特征图尺寸上的宽度
loss_w = torch.sum(MSELoss(w, tw) / bs * 0.5 * box_loss_scale * mask)
loss_h = torch.sum(MSELoss(h, th) / bs * 0.5 * box_loss_scale * mask)
loss_conf = torch.sum(BCELoss(conf, mask) * mask / bs) + \
torch.sum(BCELoss(conf, mask) * noobj_mask / bs)
loss_cls = torch.sum(BCELoss(pred_cls[mask == 1], tcls[mask == 1]) / bs)
loss = loss_x * self.lambda_xy + loss_y * self.lambda_xy + \
loss_w * self.lambda_wh + loss_h * self.lambda_wh + \
loss_conf * self.lambda_conf + loss_cls * self.lambda_cls
print("losses:", loss, loss_x.item() + loss_y.item(), loss_w.item() + loss_h.item(),
loss_conf.item(), loss_cls.item(), \
torch.sum(mask), torch.sum(noobj_mask))
return loss, loss_x.item(), loss_y.item(), loss_w.item(), \
loss_h.item(), loss_conf.item(), loss_cls.item()
def get_target(self, target, anchors, in_w, in_h, ignore_threshold):
# 计算一共有多少张图片
bs = len(target)
# 获得先验框
anchor_index = [[0, 1, 2], [3, 4, 5], [6, 7, 8]][self.feature_length.index(in_w)] # [[0, 1, 2], [3, 4, 5], [6, 7, 8]][0] Out[42]: [0, 1, 2]
subtract_index = [0, 3, 6][self.feature_length.index(in_w)]
# 创建全是0或者全是1的阵列
mask = torch.zeros(bs, int(self.num_anchors / 3), in_h, in_w, requires_grad=False)
noobj_mask = torch.ones(bs, int(self.num_anchors / 3), in_h, in_w, requires_grad=False)
tx = torch.zeros(bs, int(self.num_anchors / 3), in_h, in_w, requires_grad=False)
ty = torch.zeros(bs, int(self.num_anchors / 3), in_h, in_w, requires_grad=False)
tw = torch.zeros(bs, int(self.num_anchors / 3), in_h, in_w, requires_grad=False)
th = torch.zeros(bs, int(self.num_anchors / 3), in_h, in_w, requires_grad=False)
tconf = torch.zeros(bs, int(self.num_anchors / 3), in_h, in_w, requires_grad=False)
tcls = torch.zeros(bs, int(self.num_anchors / 3), in_h, in_w, self.num_classes, requires_grad=False)
box_loss_scale_x = torch.zeros(bs, int(self.num_anchors / 3), in_h, in_w, requires_grad=False)
box_loss_scale_y = torch.zeros(bs, int(self.num_anchors / 3), in_h, in_w, requires_grad=False)
print("bs", bs)
for b in range(bs):
for t in range(target[b].shape[0]):
# 计算出在特征层上的点位
gx = target[b][t, 0] * in_w
gy = target[b][t, 1] * in_h
gw = target[b][t, 2] * in_w # target中是真实框的宽相对于图片框的比例 0<target[b][t, 2]<1
gh = target[b][t, 3] * in_h
# 计算出属于哪个网格
gi = int(gx)
gj = int(gy)
# 计算真实框的位置
gt_box = torch.FloatTensor(np.array([0, 0, gw, gh])).unsqueeze(0)
# 计算出所有先验框的位置
anchor_shapes = torch.FloatTensor(
np.concatenate((np.zeros((self.num_anchors, 2)), np.array(anchors)), 1))
# 计算重合程度
anch_ious = bbox_iou(gt_box, anchor_shapes)
# Find the best matching anchor box
best_n = np.argmax(anch_ious)
if best_n not in anchor_index:
continue
# Masks
if (gj < in_h) and (gi < in_w):
best_n = best_n - subtract_index # best_n只能为0/1/2
# 判定哪些先验框内部真实的存在物体
# 以下均为 torch.Size([1, 3, 13, 13])
noobj_mask[b, best_n, gj, gi] = 0 # noobj_mask 没物体为1 有物体为0
mask[b, best_n, gj, gi] = 1 # mask 没物体为0 有物体为1
# 计算先验框中心调整参数
tx[b, best_n, gj, gi] = gx - gi
ty[b, best_n, gj, gi] = gy - gj
# 计算先验框宽高调整参数
tw[b, best_n, gj, gi] = math.log(gw / anchors[best_n + subtract_index][0])
th[b, best_n, gj, gi] = math.log(gh / anchors[best_n + subtract_index][1])
# 用于获得xywh的比例
box_loss_scale_x[b, best_n, gj, gi] = target[b][t, 2]
box_loss_scale_y[b, best_n, gj, gi] = target[b][t, 3]
# 物体置信度
tconf[b, best_n, gj, gi] = 1
# 种类
# shape torch.Size([1, 3, 13, 13, 80])
tcls[b, best_n, gj, gi, int(target[b][t, 4])] = 1
else:
print('Step {0} out of bound'.format(b))
print('gj: {0}, height: {1} | gi: {2}, width: {3}'.format(gj, in_h, gi, in_w))
continue
return mask, noobj_mask, tx, ty, tw, th, tconf, tcls, box_loss_scale_x, box_loss_scale_y
def get_ignore(self, prediction, target, scaled_anchors, in_w, in_h, noobj_mask):
bs = len(target)
anchor_index = [[0, 1, 2], [3, 4, 5], [6, 7, 8]][self.feature_length.index(in_w)]
scaled_anchors = np.array(scaled_anchors)[anchor_index]
# 先验框的中心位置的调整参数
x = torch.sigmoid(prediction[..., 0])
y = torch.sigmoid(prediction[..., 1])
# 先验框的宽高调整参数
w = prediction[..., 2] # Width
h = prediction[..., 3] # Height
FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
# 生成网格,先验框中心,网格左上角
grid_x = torch.linspace(0, in_w - 1, in_w).repeat(in_w, 1).repeat(
int(bs * self.num_anchors / 3), 1, 1).view(x.shape).type(FloatTensor)
grid_y = torch.linspace(0, in_h - 1, in_h).repeat(in_h, 1).t().repeat(
int(bs * self.num_anchors / 3), 1, 1).view(y.shape).type(FloatTensor)
# 生成先验框的宽高
anchor_w = FloatTensor(scaled_anchors).index_select(1, LongTensor([0]))
anchor_h = FloatTensor(scaled_anchors).index_select(1, LongTensor([1]))
anchor_w = anchor_w.repeat(bs, 1).repeat(1, 1, in_h * in_w).view(w.shape)
anchor_h = anchor_h.repeat(bs, 1).repeat(1, 1, in_h * in_w).view(h.shape)
# 计算调整后的先验框中心与宽高 同维度运算,对应运算
pred_boxes = FloatTensor(prediction[..., :4].shape) # 1,3,13,13
pred_boxes[..., 0] = x.data + grid_x
pred_boxes[..., 1] = y.data + grid_y
pred_boxes[..., 2] = torch.exp(w.data) * anchor_w
pred_boxes[..., 3] = torch.exp(h.data) * anchor_h
for i in range(bs):
pred_boxes_for_ignore = pred_boxes[i] #torch.Size([1, 3, 13, 13, 4])->torch.Size([3, 13, 13, 4])
pred_boxes_for_ignore = pred_boxes_for_ignore.view(-1, 4) # torch.Size([507, 4])
# target
# [tensor([[0.7632, 0.4964, 0.0216, 0.0361, 2.0000],
# [0.5096, 0.4928, 0.0721, 0.0817, 2.0000],
# [0.6599, 0.5120, 0.1082, 0.1298, 2.0000],
# [0.7945, 0.4952, 0.0409, 0.0288, 2.0000]])]
if len(target[i]) > 0: # 0:1 不降维度
gx = target[i][:, 0:1] * in_w # gx.shape 4,1
gy = target[i][:, 1:2] * in_h
gw = target[i][:, 2:3] * in_w
gh = target[i][:, 3:4] * in_h
gt_box = torch.FloatTensor(np.concatenate([gx, gy, gw, gh], -1)).type(FloatTensor)
# torch.Size([4, 4]) torch.Size([507, 4])
anch_ious = jaccard(gt_box, pred_boxes_for_ignore) # 4,507
for t in range(target[i].shape[0]):
anch_iou = anch_ious[t].view(pred_boxes[i].size()[:3]) # anch_ious[t].shape torch.Size([507])
noobj_mask[i][anch_iou > self.ignore_threshold] = 0
# print(torch.max(anch_ious))
return noobj_mask
# pred_boxes[i].size()
# Out[27]: torch.Size([3, 13, 13, 4])
# pred_boxes.size()
# Out[28]: torch.Size([1, 3, 13, 13, 4])
# anch_ious[t].shape
# Out[31]: torch.Size([507])
# anch_ious[t].view(pred_boxes[i].size()[:3]).shape
# Out[30]: torch.Size([3, 13, 13])
def rand(a=0, b=1):
return np.random.rand() * (b - a) + a
class Generator(object):
def __init__(self, batch_size,
train_lines, image_size,
):
self.batch_size = batch_size
self.train_lines = train_lines
self.train_batches = len(train_lines)
self.image_size = image_size
def get_random_data(self, annotation_line, input_shape, jitter=.1, hue=.1, sat=1.3, val=1.3):
'''r实时数据增强的随机预处理'''
line = annotation_line.split()
image = Image.open(line[0])
image = image.convert('RGB')
iw, ih = image.size
h, w = input_shape
box = np.array([np.array(list(map(int, box.split(',')))) for box in line[1:]])
# resize image
new_ar = w / h * rand(1 - jitter, 1 + jitter) / rand(1 - jitter, 1 + jitter)
scale = rand(.25, 2)
if new_ar < 1:
nh = int(scale * h)
nw = int(nh * new_ar)
else:
nw = int(scale * w)
nh = int(nw / new_ar)
image = image.resize((nw, nh), Image.BICUBIC)
# place image
dx = int(rand(0, w - nw))
dy = int(rand(0, h - nh))
new_image = Image.new('RGB', (w, h), (128, 128, 128))
new_image.paste(image, (dx, dy))
image = new_image
# flip image or not
flip = rand() < .5
if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT)
# distort image
hue = rand(-hue, hue)
sat = rand(1, sat) if rand() < .5 else 1 / rand(1, sat)
val = rand(1, val) if rand() < .5 else 1 / rand(1, val)
x = cv2.cvtColor(np.array(image, np.float32) / 255, cv2.COLOR_RGB2HSV)
x[..., 0] += hue * 360
x[..., 0][x[..., 0] > 1] -= 1
x[..., 0][x[..., 0] < 0] += 1
x[..., 1] *= sat
x[..., 2] *= val
x[x[:, :, 0] > 360, 0] = 360
x[:, :, 1:][x[:, :, 1:] > 1] = 1
x[x < 0] = 0
image_data = cv2.cvtColor(x, cv2.COLOR_HSV2RGB) * 255
# correct boxes
box_data = np.zeros((len(box), 5))
if len(box) > 0:
np.random.shuffle(box)
box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dx
box[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dy
if flip: box[:, [0, 2]] = w - box[:, [2, 0]]
box[:, 0:2][box[:, 0:2] < 0] = 0
box[:, 2][box[:, 2] > w] = w
box[:, 3][box[:, 3] > h] = h
box_w = box[:, 2] - box[:, 0]
box_h = box[:, 3] - box[:, 1]
box = box[np.logical_and(box_w > 1, box_h > 1)] # discard invalid box
box_data = np.zeros((len(box), 5))
box_data[:len(box)] = box
if len(box) == 0:
return image_data, []
if (box_data[:, :4] > 0).any():
return image_data, box_data
else:
return image_data, []
def generate(self, train=True):
while True:
shuffle(self.train_lines)
lines = self.train_lines
inputs = []
targets = []
for annotation_line in lines:
img, y = self.get_random_data(annotation_line, self.image_size[0:2])
if len(y) != 0:
boxes = np.array(y[:, :4], dtype=np.float32)
boxes[:, 0] = boxes[:, 0] / self.image_size[1]
boxes[:, 1] = boxes[:, 1] / self.image_size[0]
boxes[:, 2] = boxes[:, 2] / self.image_size[1]
boxes[:, 3] = boxes[:, 3] / self.image_size[0]
boxes = np.maximum(np.minimum(boxes, 1), 0)
boxes[:, 2] = boxes[:, 2] - boxes[:, 0]
boxes[:, 3] = boxes[:, 3] - boxes[:, 1]
boxes[:, 0] = boxes[:, 0] + boxes[:, 2] / 2
boxes[:, 1] = boxes[:, 1] + boxes[:, 3] / 2
y = np.concatenate([boxes, y[:, -1:]], axis=-1)
img = np.array(img, dtype=np.float32)
# 通道数放前面
inputs.append(np.transpose(img / 255.0, (2, 0, 1)))
targets.append(np.array(y, dtype=np.float32))
if len(targets) == self.batch_size:
tmp_inp = np.array(inputs)
tmp_targets = np.array(targets)
inputs = []
targets = []
yield tmp_inp, tmp_targets