Tensorflow2.0 YOLO篇之提取图像信息预处理
在上一篇博文中,我们已经将图片的路径和boxex信息分别存储在imgs和boxes的变量当中,现在我们就把他们转化为YOLO网络可以用的形式,本文中还是使用过vscode支持的jupyter的分布执行。有一说一,这个功能是真的好用.这里的代码是接着上一篇的,等到整个功能完全完成了,我会在最后一篇中留下完整代码的下载链接
转化为张量类型
使用tf.data.Dataset.from_tensor_slices将数据加载为tensor张量的形式
def preprocess(img,img_boxes):
# img :string path
# img_boxes: [40,5]
x = tf.io.read_file(img)
x = tf.image.decode_png(x,channels=3)
x =tf.image.convert_image_dtype(x,tf.float32)
return x ,img_boxes
def get_dataset(img_dir,ann_dir,batchsz):
# return tf dataset
# [b] ,boxes [b,40,5]
imgs,boxes = parse_annotation('data/train/image','data/train/annotation',obj_names)
db = tf.data.Dataset.from_tensor_slices((imgs,boxes))
db = db.shuffle(1000).map(preprocess).batch(batchsz).repeat()
print('db Images',len(imgs))
return db
可视化图片
我们将刚才记载好的信息可视化进行查看
# visual the db
from matplotlib import pyplot as plt
from matplotlib import patches
def db_visualize(db):
# imgs: [b,512,512,3]
# imgs_boxes: [b,40,5]
imgs,imgs_boxes = next(iter(db))
img,img_boxes = imgs[0],imgs_boxes[0]
f,ax1 = plt.subplots(1,figsize=(10,10))
# display the image ,[512,512,3]
ax1.imshow(img)
for x1,y1,x2,y2,l in img_boxes: # [40,5]
x1,y1,x2,y2 = float(x1),float(y1),float(x2),float(y2)
w =x2-x1
h = y2-y1
if l == 1: # green for sugarweet
color = (0,1,0)
elif l ==2: # red for weed
color = (1,0,0)
else: # ignore invalid boxes
break
rect = patches.Rectangle((x1,y1),w,h,linewidth=2,edgecolor=color,
facecolor='none')
ax1.add_patch(rect)
# %%
db_visualize(train_db)
效果如下图
图片增强
通过图片裁剪翻转等操作增加图片的数量,需要注意的在操作图片的时候图片的位置信息也会跟着改变
# %%
# data augementation
import imgaug as ia
from imgaug import augmenters as iaa
def augmentation_generator(yolo_dataset):
'''
Augmented batch generator from a yolo dataset
Parameters
----------
- YOLO dataset
Returns
-------
- augmented batch : tensor (shape : batch_size, IMAGE_W, IMAGE_H, 3)
batch : tupple(images, annotations)
batch[0] : images : tensor (shape : batch_size, IMAGE_W, IMAGE_H, 3)
batch[1] : annotations : tensor (shape : batch_size, max annot, 5)
'''
for batch in yolo_dataset:
# conversion tensor->numpy
img = batch[0].numpy()
boxes = batch[1].numpy()
# conversion bbox numpy->ia object
ia_boxes = []
for i in range(img.shape[0]):
ia_bbs = [ia.BoundingBox(x1=bb[0],
y1=bb[1],
x2=bb[2],
y2=bb[3]) for bb in boxes[i]
if (bb[0] + bb[1] +bb[2] + bb[3] > 0)]
ia_boxes.append(ia.BoundingBoxesOnImage(ia_bbs, shape=(512, 512)))
# data augmentation
seq = iaa.Sequential([
iaa.Fliplr(0.5),
iaa.Flipud(0.5),
iaa.Multiply((0.4, 1.6)), # change brightness
#iaa.ContrastNormalization((0.5, 1.5)),
#iaa.Affine(translate_px={"x": (-100,100), "y": (-100,100)}, scale=(0.7, 1.30))
])
#seq = iaa.Sequential([])
seq_det = seq.to_deterministic()
img_aug = seq_det.augment_images(img)
img_aug = np.clip(img_aug, 0, 1)
boxes_aug = seq_det.augment_bounding_boxes(ia_boxes)
# conversion ia object -> bbox numpy
for i in range(img.shape[0]):
boxes_aug[i] = boxes_aug[i].remove_out_of_image().clip_out_of_image()
for j, bb in enumerate(boxes_aug[i].bounding_boxes):
boxes[i,j,0] = bb.x1
boxes[i,j,1] = bb.y1
boxes[i,j,2] = bb.x2
boxes[i,j,3] = bb.y2
# conversion numpy->tensor
batch = (tf.convert_to_tensor(img_aug), tf.convert_to_tensor(boxes))
#batch = (img_aug, boxes)
#%%
aug_train_db = augmentation_generator(train_db)
db_visualize(aug_train_db)
Compose labels
刚才我们加载的label信息的格式试试[b,40,5],YOLO网络需要的形式是[b,16,16,5,6] 接下来我们将label是信息转换成对应的格式
首先我们想从5锚点中分别计算iou,选取最佳的anchors
# %%
IMGSZ = 512
GEIDSZ = 16
ANCHORS = [0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828]
# %%
def process_true_boxes(gt_boxes,anchors):
# gt_boxes: [40,5]
# 512//16 = 32
scale = IMGSZ // GEIDSZ
# [5,2]
anchors = np.array(anchors).reshape((5,2))
# mask for object
detector_mask = np.zeros([GEIDSZ,GEIDSZ,5,1])
# x-y-w-h-l
matching_gt_box = np.zeros([GEIDSZ,GEIDSZ,5,5])
# [N,5] x1-y1-x2-y2-l => x-y-w-h-l
gt_boxes_grid = np.zeros(gt_boxes.shape)
# DB: tensor => numpy
gt_boxes = gt_boxes.numpy()
for i,box in enumerate(gt_boxes):
# box: [5],x1-y1-x2-y2
# 512 => 16
x = ((box[0]+box[2])/2)/scale
y = ((box[1]+box[3])/2)/scale
w = (box[2]-box[0])/scale
h = (box[3]-box[1])/scale
# [40,5] x-y-w-h-i
gt_boxes_grid[i] = np.array([x,y,w,h,box[4]])
if w*h > 0: # valid box
# fine the best anchor
best_anchor = 0
for j in range(5):
# calcute IOU
interct = np.minimum(w,anchors[j,0])*np.minimum(h,anchors[j,1])
union = w*h + (anchors[j,1]*anchors[j,0]) - interct
iou = interct/union
if iou > best_anchor:
best_anchor = j
best_iou = iou
# found the best anchors
if best_iou > 0:
x_coord = np.floor(x).astype(np.int32)
y_coord = np.floor(y).astype(np.int32)
# [b,h,w,5,1]
detector_mask[y_coord,x_coord,best_anchor] = 1
# [b,h,w,5,x-y-w-h-l]
matching_gt_box[y_coord,x_coord,best_anchor] = \
np.array([x,y,w,h,box[4]])
# [40,5] => [16,16,5,5]
# [16,16,5,5]
# [16,16,5,1]
# [40,5]
return matching_gt_box,detector_mask,gt_boxes_grid
接下来调整格式:
def ground_truth_generator(db):
for imgs,imgs_boxes in db:
# imgs: [b,512,512,3]
# imgs_boxes: [b,40,5]
batch_matching_gt_boxes = []
batch_detector_mask = []
batch_gt_boxes_grid = []
b = imgs.shape[0]
for i in range(b): # for each image
matching_gt_box,detector_mask,gt_boxes_grid = \
process_true_boxes(imgs_boxes[i],ANCHORS)
batch_matching_gt_boxes.append(matching_gt_box)
batch_detector_mask.append(detector_mask)
batch_gt_boxes_grid.append(gt_boxes_grid)
# [b,16,16,5,1]
detector_mask = tf.cast(np.array(batch_detector_mask),dtype=tf.float32)
# [b,16,16,5,5] x-y-w-h-l
matching_gt_box = tf.cast(np.array(batch_matching_gt_boxes),dtype=tf.float32)
# [b,40,5] x-y-w-h-l
gt_boxes_grid = tf.cast(np.array(batch_gt_boxes_grid),dtype=tf.float32)
matching_classes = tf.cast(matching_gt_box[...,4],dtype=tf.int32)
matching_classes_oh = tf.one_hot(matching_classes,depth=3)
# x-y-w-h-conf-l1-l2
# [b,16,16,5,2]
matching_classes_oh = tf.cast(matching_classes_oh[...,1:],dtype=tf.float32)
# [b,512,512,3]
# [b,16,16,5,1]
# [b,16,16,5,5]
# [b,16,16,5,2]
# [b,40,5]
yield imgs,detector_mask,matching_gt_box,matching_classes_oh,gt_boxes_grid
可视化效果
接下来我们可视化一下看一下效果
# visualize object mask
# train_db -> aug_train_db -> train_gen
train_gen = ground_truth_generator(aug_train_db)
img,detector_mask,matching_gt_box,matching_classes_oh,gt_boxes_grid =\
next(train_gen)
img,detector_mask,matching_gt_box,matching_classes_oh,gt_boxes_grid=\
img[0],detector_mask[0],matching_gt_box[0],matching_classes_oh[0],gt_boxes_grid[0]
fig,(ax1,ax2) = plt.subplots(2,figsize=(5,10))
ax1.imshow(img)
# [b,16,16,5,1] => [16,16,1]
mask = tf.reduce_sum(detector_mask,axis=2)
ax2.matshow(mask[...,0]) # [16,16]
参考书籍: TensorFlow 深度学习 — 龙龙老师