1.裁剪
import torch
from torchvision import transforms
import cv2
import numpy as np
import types
from numpy import random
class RandomSampleCrop(object):
"""Crop
Arguments:
img (Image): the image being input during training
boxes (Tensor): the original bounding boxes in pt form
labels (Tensor): the class labels for each bbox
mode (float tuple): the min and max jaccard overlaps
Return:
(img, boxes, classes)
img (Image): the cropped image
boxes (Tensor): the adjusted bounding boxes in pt form
labels (Tensor): the class labels for each bbox
"""
def __init__(self):
self.sample_options = (
# using entire original input image
None,
# sample a patch s.t. MIN jaccard w/ obj in .1,.3,.4,.7,.9
(0.1, None),
(0.3, None),
(0.7, None),
(0.9, None),
# randomly sample a patch
(None, None),
)
def __call__(self, image, boxes=None, labels=None):
height, width, _ = image.shape
while True:
# randomly choose a mode
mode = random.choice(self.sample_options)
if mode is None:
return image, boxes, labels
min_iou, max_iou = mode
if min_iou is None:
min_iou = float('-inf')
if max_iou is None:
max_iou = float('inf')
# max trails (50)
for _ in range(50):
current_image = image
w = random.uniform(0.3 * width, width)
h = random.uniform(0.3 * height, height)
# aspect ratio constraint b/t .5 & 2
if h / w < 0.5 or h / w > 2:
continue
left = random.uniform(width - w)
top = random.uniform(height - h)
# convert to integer rect x1,y1,x2,y2
rect = np.array([int(left), int(top), int(left + w), int(top + h)])
# calculate IoU (jaccard overlap) b/t the cropped and gt boxes
overlap = jaccard_numpy(boxes, rect)
# is min and max overlap constraint satisfied? if not try again
if overlap.min() < min_iou and max_iou < overlap.max():
continue
# cut the crop from the image
current_image = current_image[rect[1]:rect[3], rect[0]:rect[2],
:]
# keep overlap with gt box IF center in sampled patch
centers = (boxes[:, :2] + boxes[:, 2:]) / 2.0
# mask in all gt boxes that above and to the left of centers
m1 = (rect[0] < centers[:, 0]) * (rect[1] < centers[:, 1])
# mask in all gt boxes that under and to the right of centers
m2 = (rect[2] > centers[:, 0]) * (rect[3] > centers[:, 1])
# mask in that both m1 and m2 are true
mask = m1 * m2
# have any valid boxes? try again if not
if not mask.any():
continue
# take only matching gt boxes
current_boxes = boxes[mask, :].copy()
# take only matching gt labels
current_labels = labels[mask]
# should we use the box left and top corner or the crop's
current_boxes[:, :2] = np.maximum(current_boxes[:, :2],
rect[:2])
# adjust to crop (by substracting crop's left,top)
current_boxes[:, :2] -= rect[:2]
current_boxes[:, 2:] = np.minimum(current_boxes[:, 2:],
rect[2:])
# adjust to crop (by substracting crop's left,top)
current_boxes[:, 2:] -= rect[:2]
return current_image, current_boxes, current_labels
def debug_random_crop():
random_crop = RandomSampleCrop()
import cv2
path = './test.jpg'
img = cv2.imread(path)
print(img.shape)
boxes = np.array([[68, 62, 311, 523],
[276, 235, 498, 535],
[480, 160, 701, 510]])
labels = np.array([[1],
[1],
[1]])
current_image, current_boxes, current_labels = random_crop(img, boxes, labels)
print('==current_image.shape:', current_image.shape)
print('==current_boxes:', current_boxes)
print('==current_labels:', current_labels)
for box in current_boxes:
x1,y1,x2,y2 = box
cv2.rectangle(current_image,(x1,y1),(x2,y2),color=(0,0,255),thickness=2)
cv2.imwrite('./draw_current_image.jpg', current_image)
if __name__ == '__main__':
debug_random_crop()
变为
2.拓展
def expand(image, boxes, filler):
"""
Perform a zooming out operation by placing the image in a larger canvas of filler material.
Helps to learn to detect smaller objects.
:param image: image, a tensor of dimensions (3, original_h, original_w)
:param boxes: bounding boxes in boundary coordinates, a tensor of dimensions (n_objects, 4)
:param filler: RBG values of the filler material, a list like [R, G, B]
:return: expanded image, updated bounding box coordinates
"""
# Calculate dimensions of proposed expanded (zoomed-out) image
original_h = image.size(1)
original_w = image.size(2)
max_scale = 4
scale = random.uniform(1, max_scale)
new_h = int(scale * original_h)
new_w = int(scale * original_w)
# Create such an image with the filler
filler = torch.FloatTensor(filler) # (3)
new_image = torch.ones((3, new_h, new_w), dtype=torch.float) * filler.unsqueeze(1).unsqueeze(1) # (3, new_h, new_w)
# Note - do not use expand() like new_image = filler.unsqueeze(1).unsqueeze(1).expand(3, new_h, new_w)
# because all expanded values will share the same memory, so changing one pixel will change all
# Place the original image at random coordinates in this new image (origin at top-left of image)
left = random.randint(0, new_w - original_w)
right = left + original_w
top = random.randint(0, new_h - original_h)
bottom = top + original_h
new_image[:, top:bottom, left:right] = image
print('==boxes:', boxes)
# Adjust bounding boxes' coordinates accordingly
new_boxes = boxes + torch.FloatTensor([left, top, left, top]).unsqueeze(
0) # (n_objects, 4), n_objects is the no. of objects in this image
print('===new_boxes:', new_boxes)
return new_image, new_boxes
def torch_cutout():
info = {"boxes": [[52, 86, 470, 419],
[157, 43, 288, 166]],
"labels": [13, 15], "difficulties": [0, 0]}
image = Image.open('./2008_000008.jpg', mode='r')
image = image.convert('RGB')
bboxs = info['boxes']
lables = info['labels']
difficulties = info['difficulties']
img = np.array(image)[..., ::-1].copy()
for box in bboxs:
x1, y1, x2, y2 = box
print('x1, y1, x2, y2:', x1, y1, x2, y2)
cv2.rectangle(img, (x1,y1), (x2,y2),color=(0,0,255),thickness=2)
cv2.imwrite('./img_rect.jpg', img)
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
new_image = FT.to_tensor(image)
boxes = torch.FloatTensor(bboxs)
labels = torch.LongTensor(lables) # (n_objects)
difficulties = torch.ByteTensor(difficulties) # (n_objects)
# new_image, new_boxes, new_labels, new_difficulties = random_crop(new_image, boxes, labels, difficulties)
# print('new_image, new_boxes, new_labels, new_difficulties', new_image.shape, new_boxes, new_labels, new_difficulties)
new_image, new_boxes = expand(new_image, boxes, filler=mean)
fin_img = new_image.permute(1, 2, 0).numpy()*255.
fin_img = fin_img[..., ::-1].copy()
print('fin_img.shape:', fin_img.shape)
fin_boxes = new_boxes.numpy()
print(fin_boxes)
for box in fin_boxes:
x1, y1, x2, y2 = box
print('x1, y1, x2, y2:', x1, y1, x2, y2)
cv2.rectangle(fin_img, (x1, y1), (x2, y2), color=(0, 0, 255), thickness=2)
cv2.imwrite('./fin_img_rect.jpg', fin_img)
if __name__ == '__main__':
torch_cutout()
变为
检测的整个transform包括随机裁剪,扩张,resize等等。
import torch
from torchvision import transforms
import cv2
import numpy as np
import types
from numpy import random
def intersect(box_a, box_b):
max_xy = np.minimum(box_a[:, 2:], box_b[2:])
min_xy = np.maximum(box_a[:, :2], box_b[:2])
inter = np.clip((max_xy - min_xy), a_min=0, a_max=np.inf)
return inter[:, 0] * inter[:, 1]
def jaccard_numpy(box_a, box_b):
"""Compute the jaccard overlap of two sets of boxes. The jaccard overlap
is simply the intersection over union of two boxes.
E.g.:
A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
Args:
box_a: Multiple bounding boxes, Shape: [num_boxes,4]
box_b: Single bounding box, Shape: [4]
Return:
jaccard overlap: Shape: [box_a.shape[0], box_a.shape[1]]
"""
inter = intersect(box_a, box_b)
area_a = ((box_a[:, 2] - box_a[:, 0]) *
(box_a[:, 3] - box_a[:, 1])) # [A,B]
area_b = ((box_b[2] - box_b[0]) *
(box_b[3] - box_b[1])) # [A,B]
union = area_a + area_b - inter
return inter / union # [A,B]
class Compose(object):
"""Composes several augmentations together.
Args:
transforms (List[Transform]): list of transforms to compose.
Example:
>>> augmentations.Compose([
>>> transforms.CenterCrop(10),
>>> transforms.ToTensor(),
>>> ])
"""
def __init__(self, transforms):
self.transforms = transforms
def __call__(self, img, boxes=None, labels=None):
for t in self.transforms:
img, boxes, labels = t(img, boxes, labels)
return img, boxes, labels
class Lambda(object):
"""Applies a lambda as a transform."""
def __init__(self, lambd):
assert isinstance(lambd, types.LambdaType)
self.lambd = lambd
def __call__(self, img, boxes=None, labels=None):
return self.lambd(img, boxes, labels)
class ConvertFromInts(object):
def __call__(self, image, boxes=None, labels=None):
return image.astype(np.float32), boxes, labels
class Normalize(object):
def __init__(self, mean=None, std=None):
self.mean = np.array(mean, dtype=np.float32)
self.std = np.array(std, dtype=np.float32)
def __call__(self, image, boxes=None, labels=None):
image = image.astype(np.float32)
image /= 255.
image -= self.mean
image /= self.std
return image, boxes, labels
class ToAbsoluteCoords(object):
def __call__(self, image, boxes=None, labels=None):
height, width, channels = image.shape
boxes[:, 0] *= width
boxes[:, 2] *= width
boxes[:, 1] *= height
boxes[:, 3] *= height
return image, boxes, labels
class ToPercentCoords(object):
def __call__(self, image, boxes=None, labels=None):
height, width, channels = image.shape
boxes[:, 0] /= width
boxes[:, 2] /= width
boxes[:, 1] /= height
boxes[:, 3] /= height
return image, boxes, labels
class Resize(object):
def __init__(self, size=300):
self.size = size
def __call__(self, image, boxes=None, labels=None):
image = cv2.resize(image, (self.size,
self.size))
return image, boxes, labels
class RandomSaturation(object):
def __init__(self, lower=0.5, upper=1.5):
self.lower = lower
self.upper = upper
assert self.upper >= self.lower, "contrast upper must be >= lower."
assert self.lower >= 0, "contrast lower must be non-negative."
def __call__(self, image, boxes=None, labels=None):
if random.randint(2):
image[:, :, 1] *= random.uniform(self.lower, self.upper)
return image, boxes, labels
class RandomHue(object):
def __init__(self, delta=18.0):
assert delta >= 0.0 and delta <= 360.0
self.delta = delta
def __call__(self, image, boxes=None, labels=None):
if random.randint(2):
image[:, :, 0] += random.uniform(-self.delta, self.delta)
image[:, :, 0][image[:, :, 0] > 360.0] -= 360.0
image[:, :, 0][image[:, :, 0] < 0.0] += 360.0
return image, boxes, labels
class RandomLightingNoise(object):
def __init__(self):
self.perms = ((0, 1, 2), (0, 2, 1),
(1, 0, 2), (1, 2, 0),
(2, 0, 1), (2, 1, 0))
def __call__(self, image, boxes=None, labels=None):
if random.randint(2):
swap = self.perms[random.randint(len(self.perms))]
shuffle = SwapChannels(swap) # shuffle channels
image = shuffle(image)
return image, boxes, labels
class ConvertColor(object):
def __init__(self, current='BGR', transform='HSV'):
self.transform = transform
self.current = current
def __call__(self, image, boxes=None, labels=None):
if self.current == 'BGR' and self.transform == 'HSV':
image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
elif self.current == 'HSV' and self.transform == 'BGR':
image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)
else:
raise NotImplementedError
return image, boxes, labels
class RandomContrast(object):
def __init__(self, lower=0.5, upper=1.5):
self.lower = lower
self.upper = upper
assert self.upper >= self.lower, "contrast upper must be >= lower."
assert self.lower >= 0, "contrast lower must be non-negative."
# expects float image
def __call__(self, image, boxes=None, labels=None):
if random.randint(2):
alpha = random.uniform(self.lower, self.upper)
image *= alpha
return image, boxes, labels
class RandomBrightness(object):
def __init__(self, delta=32):
assert delta >= 0.0
assert delta <= 255.0
self.delta = delta
def __call__(self, image, boxes=None, labels=None):
if random.randint(2):
delta = random.uniform(-self.delta, self.delta)
image += delta
return image, boxes, labels
class ToCV2Image(object):
def __call__(self, tensor, boxes=None, labels=None):
return tensor.cpu().numpy().astype(np.float32).transpose((1, 2, 0)), boxes, labels
class ToTensor(object):
def __call__(self, cvimage, boxes=None, labels=None):
return torch.from_numpy(cvimage.astype(np.float32)).permute(2, 0, 1), boxes, labels
class RandomSampleCrop(object):
"""Crop
Arguments:
img (Image): the image being input during training
boxes (Tensor): the original bounding boxes in pt form
labels (Tensor): the class labels for each bbox
mode (float tuple): the min and max jaccard overlaps
Return:
(img, boxes, classes)
img (Image): the cropped image
boxes (Tensor): the adjusted bounding boxes in pt form
labels (Tensor): the class labels for each bbox
"""
def __init__(self):
self.sample_options = (
# using entire original input image
None,
# sample a patch s.t. MIN jaccard w/ obj in .1,.3,.4,.7,.9
(0.1, None),
(0.3, None),
(0.7, None),
(0.9, None),
# randomly sample a patch
(None, None),
)
def __call__(self, image, boxes=None, labels=None):
height, width, _ = image.shape
while True:
# randomly choose a mode
mode = random.choice(self.sample_options)
if mode is None:
return image, boxes, labels
min_iou, max_iou = mode
if min_iou is None:
min_iou = float('-inf')
if max_iou is None:
max_iou = float('inf')
# max trails (50)
for _ in range(50):
current_image = image
w = random.uniform(0.3 * width, width)
h = random.uniform(0.3 * height, height)
# aspect ratio constraint b/t .5 & 2
if h / w < 0.5 or h / w > 2:
continue
left = random.uniform(width - w)
top = random.uniform(height - h)
# convert to integer rect x1,y1,x2,y2
rect = np.array([int(left), int(top), int(left + w), int(top + h)])
# calculate IoU (jaccard overlap) b/t the cropped and gt boxes
overlap = jaccard_numpy(boxes, rect)
# is min and max overlap constraint satisfied? if not try again
if overlap.min() < min_iou and max_iou < overlap.max():
continue
# cut the crop from the image
current_image = current_image[rect[1]:rect[3], rect[0]:rect[2],
:]
# keep overlap with gt box IF center in sampled patch
centers = (boxes[:, :2] + boxes[:, 2:]) / 2.0
# mask in all gt boxes that above and to the left of centers
m1 = (rect[0] < centers[:, 0]) * (rect[1] < centers[:, 1])
# mask in all gt boxes that under and to the right of centers
m2 = (rect[2] > centers[:, 0]) * (rect[3] > centers[:, 1])
# mask in that both m1 and m2 are true
mask = m1 * m2
# have any valid boxes? try again if not
if not mask.any():
continue
# take only matching gt boxes
current_boxes = boxes[mask, :].copy()
# take only matching gt labels
current_labels = labels[mask]
# should we use the box left and top corner or the crop's
current_boxes[:, :2] = np.maximum(current_boxes[:, :2],
rect[:2])
# adjust to crop (by substracting crop's left,top)
current_boxes[:, :2] -= rect[:2]
current_boxes[:, 2:] = np.minimum(current_boxes[:, 2:],
rect[2:])
# adjust to crop (by substracting crop's left,top)
current_boxes[:, 2:] -= rect[:2]
return current_image, current_boxes, current_labels
class Expand(object):
def __init__(self, mean):
self.mean = mean
def __call__(self, image, boxes, labels):
if random.randint(2):
return image, boxes, labels
height, width, depth = image.shape
ratio = random.uniform(1, 4)
left = random.uniform(0, width * ratio - width)
top = random.uniform(0, height * ratio - height)
expand_image = np.zeros(
(int(height * ratio), int(width * ratio), depth),
dtype=image.dtype)
expand_image[:, :, :] = self.mean
expand_image[int(top):int(top + height),
int(left):int(left + width)] = image
image = expand_image
boxes = boxes.copy()
boxes[:, :2] += (int(left), int(top))
boxes[:, 2:] += (int(left), int(top))
return image, boxes, labels
class RandomMirror(object):
def __call__(self, image, boxes, classes):
_, width, _ = image.shape
if random.randint(2):
image = image[:, ::-1]
boxes = boxes.copy()
boxes[:, 0::2] = width - boxes[:, 2::-2]
return image, boxes, classes
class SwapChannels(object):
"""Transforms a tensorized image by swapping the channels in the order
specified in the swap tuple.
Args:
swaps (int triple): final order of channels
eg: (2, 1, 0)
"""
def __init__(self, swaps):
self.swaps = swaps
def __call__(self, image):
"""
Args:
image (Tensor): image tensor to be transformed
Return:
a tensor with channels swapped according to swap
"""
# if torch.is_tensor(image):
# image = image.data.cpu().numpy()
# else:
# image = np.array(image)
image = image[:, :, self.swaps]
return image
class PhotometricDistort(object):
def __init__(self):
self.pd = [
RandomContrast(),
ConvertColor(transform='HSV'),
RandomSaturation(),
RandomHue(),
ConvertColor(current='HSV', transform='BGR'),
RandomContrast()
]
self.rand_brightness = RandomBrightness()
# self.rand_light_noise = RandomLightingNoise()
def __call__(self, image, boxes, labels):
im = image.copy()
im, boxes, labels = self.rand_brightness(im, boxes, labels)
if random.randint(2):
distort = Compose(self.pd[:-1])
else:
distort = Compose(self.pd[1:])
im, boxes, labels = distort(im, boxes, labels)
return im, boxes, labels
# return self.rand_light_noise(im, boxes, labels)
class SSDAugmentation(object):
def __init__(self, size=300, mean=(0.406, 0.456, 0.485), std=(0.225, 0.224, 0.229)):
self.mean = mean
self.size = size
self.std = std
self.augment = Compose([
ConvertFromInts(),
ToAbsoluteCoords(),
PhotometricDistort(),
Expand(self.mean),
RandomSampleCrop(),
RandomMirror(),
ToPercentCoords(),
Resize(self.size),
Normalize(self.mean, self.std)
])
def __call__(self, img, boxes, labels):
return self.augment(img, boxes, labels)
def debug_random_crop():
random_crop = RandomSampleCrop()
import cv2
path = './test.jpg'
img = cv2.imread(path)
print(img.shape)
boxes = np.array([[68, 62, 311, 523],
[276, 235, 498, 535],
[480, 160, 701, 510]])
labels = np.array([[1],
[1],
[1]])
current_image, current_boxes, current_labels = random_crop(img, boxes, labels)
print('==current_image.shape:', current_image.shape)
print('==current_boxes:', current_boxes)
print('==current_labels:', current_labels)
for box in current_boxes:
x1,y1,x2,y2 = box
cv2.rectangle(current_image,(x1,y1),(x2,y2),color=(0,0,255),thickness=2)
cv2.imwrite('./draw_current_image.jpg', current_image)
if __name__ == '__main__':
debug_random_crop()
3.旋转
import os
import cv2
import numpy as np
import json
import imgaug as ia
from imgaug import augmenters as iaa
def may_augment_poly(aug, img_shape, poly):
# for p in poly:
# print('==p', p)
keypoints = [ia.Keypoint(p[0], p[1]) for p in poly]
keypoints = aug.augment_keypoints(
[ia.KeypointsOnImage(keypoints, shape=img_shape)])[0].keypoints
poly = [(p.x, p.y) for p in keypoints]
return poly
def get_express_code_txt():
path = './标好快递单二维码数据'
# output_path = './标好快递单二维码数据_out'
# if not os.path.exists(output_path):
# os.mkdir(output_path)
imgs_list_path =[os.path.join(path, i) for i in os.listdir(path) if '.jpg' in i]
for i, img_list_path in enumerate(imgs_list_path):
if i < 1:
print('==img_list_path:', img_list_path)
img = cv2.imread(img_list_path)
json_list_path = img_list_path.replace('.jpg', '.json')
with open(json_list_path, 'r') as file:
json_info = json.load(file)
shapes = json_info['shapes']
output_points = []
for shape in shapes:
points = np.array(shape['points']).astype(np.int)
# print('===before points', points)
points = cal_stand_points(points)
points = polygon_area1(points)
# print('===after points', points)
# cv2.polylines(img, [np.array(points).reshape(-1, 1, 2)], True, (0, 255, 0), thickness=2)
output_points.append(list(map(int, (points.reshape(-1).tolist()))))
print('==output_points:', output_points)
seq = iaa.Sequential([
# iaa.Multiply((1.2, 1.5)), # change brightness, doesn't affect keypoints
iaa.Fliplr(0.5),
iaa.Affine(
rotate=(0, 360),#0~360随机旋转
scale=(0.7, 1.0),#通过增加黑边缩小图片
), # rotate by exactly 0~360deg and scale to 70-100%, affects keypoints
# iaa.Resize(0.5, 3)
])
seq_def = seq.to_deterministic()
image_aug = seq_def.augment_image(img)
print('==image_aug.shape:', image_aug.shape)
line_polys = []
polys = np.array(output_points).reshape(-1, 4, 2).astype(np.int)
print('==polys:', polys.shape)
for poly in polys:
new_poly = may_augment_poly(seq_def, img.shape, poly)
line_polys.append(new_poly)
print('=line_polys:', line_polys)
#debug
for line_poly in line_polys:
# print('==line_poly:', line_poly)
cv2.polylines(image_aug, [np.array(line_poly).reshape(-1, 1, 2).astype(np.int)], True, (0, 0, 255), thickness=2)
cv2.imwrite('./image_aug.jpg', image_aug)
if __name__ == '__main__':
get_express_code_txt()
json文件:
{
"version": "4.2.10",
"shapes": [
{
"shape_type": "polygon",
"group_id": null,
"label": "code",
"points": [
[
207.6190476190476,
689.2857142857143
],
[
613.5714285714286,
545.2380952380953
],
[
654.047619047619,
635.7142857142858
],
[
254.04761904761904,
777.3809523809524
]
],
"flags": {}
},
{
"shape_type": "polygon",
"group_id": null,
"label": "code",
"points": [
[
500.4761904761905,
883.3333333333334
],
[
858.8095238095239,
757.1428571428572
],
[
881.4285714285716,
796.4285714285714
],
[
513.5714285714286,
925.0
]
],
"flags": {}
},
{
"shape_type": "polygon",
"group_id": null,
"label": "code",
"points": [
[
595.7142857142858,
1059.5238095238096
],
[
960.0,
933.3333333333334
],
[
981.4285714285716,
973.8095238095239
],
[
606.4285714285714,
1101.1904761904761
]
],
"flags": {}
}
],
"lineColor": [
0,
255,
0,
128
],
"fillColor": [
255,
0,
0,
128
],
"imageHeight": 1422,
"imageData": null,
"imageWidth": 1152,
"imagePath": "72.jpg",
"flags": {}
}
原图 增强图