YOLOv5:LoadImages类、letterbox函数
前言
- 由于水平有限,难免出现错漏,敬请批评改正。
- 更多精彩内容,可点击进入YOLO系列专栏或我的个人主页查看
前提条件
相关介绍
- Python是一种跨平台的计算机程序设计语言。是一个高层次的结合了解释性、编译性、互动性和面向对象的脚本语言。最初被设计用于编写自动化脚本(shell),随着版本的不断更新和语言新功能的添加,越多被用于独立的、大型项目的开发。
- Python OS模块是负责程序与操作系统的交互,提供了访问操作系统底层的接口和非常丰富的方法用来处理文件和目录。
letterbox()函数:自适应图片缩放
- letterbox的主要思想是尽可能地利用网络感受野的信息特征。在YOLOv5中,最后一层的特征图中每个点,可以对应原图中32X32的区域信息,在保证图片变换比例一致的情况下,长宽均可以被32整除,那么就可以有效的利用感受野的信息。
- 假设原图尺寸为(720, 640),目标缩放尺寸为(640, 640)。要想满足收缩的要求,应该选取收缩比例720 ÷ \div ÷ 640 = 0.88.则图片被缩放为(640,569),然后,要填充边界至可以被stride=32整除,则569填充至576,最终得到图片尺寸为(640, 576)。
import cv2
def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32):
shape = im.shape[:2]
if isinstance(new_shape, int):
new_shape = (new_shape, new_shape)
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
if not scaleup:
r = min(r, 1.0)
ratio = r, r
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]
if auto:
dw, dh = np.mod(dw, stride), np.mod(dh, stride)
elif scaleFill:
dw, dh = 0.0, 0.0
new_unpad = (new_shape[1], new_shape[0])
ratio = new_shape[1] / shape[1], new_shape[0] / shape[0]
dw /= 2
dh /= 2
if shape[::-1] != new_unpad:
im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)
return im, ratio, (dw, dh)
if __name__ =="__main__":
source = '1.jpg'
im = cv2.imread(source)
im = cv2.resize(im,(640,720))
print('原图大小:',im.shape)
im, ratio, (dw, dh) = letterbox(im)
print('缩放后大小:',im.shape)
原图大小: (720, 640, 3)
缩放后大小: (640, 576, 3)
LoadImages类:读取测试图片数据集
- LoadImages类实现涉及到两个魔法函数__iter__和__next__,这使其拥有迭代器的特性。
import os
import glob
import cv2
import numpy as np
from pathlib import Path
from utils.augmentations import letterbox
HELP_URL = 'See https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data'
IMG_FORMATS = 'bmp', 'dng', 'jpeg', 'jpg', 'mpo', 'png', 'tif', 'tiff', 'webp', 'pfm'
VID_FORMATS = 'asf', 'avi', 'gif', 'm4v', 'mkv', 'mov', 'mp4', 'mpeg', 'mpg', 'ts', 'wmv'
class LoadImages:
def __init__(self, path, img_size=640, stride=32, auto=True, transforms=None, vid_stride=1):
print("__init__")
if isinstance(path, str) and Path(path).suffix == ".txt":
path = Path(path).read_text().rsplit()
files = []
for p in sorted(path) if isinstance(path, (list, tuple)) else [path]:
p = str(Path(p).resolve())
if '*' in p:
files.extend(sorted(glob.glob(p, recursive=True)))
elif os.path.isdir(p):
files.extend(sorted(glob.glob(os.path.join(p, '*.*'))))
elif os.path.isfile(p):
files.append(p)
else:
raise FileNotFoundError(f'{
p} does not exist')
images = [x for x in files if x.split('.')[-1].lower() in IMG_FORMATS]
videos = [x for x in files if x.split('.')[-1].lower() in VID_FORMATS]
ni, nv = len(images), len(videos)
self.img_size = img_size
self.stride = stride
self.files = images + videos
self.nf = ni + nv
self.video_flag = [False] * ni + [True] * nv
self.mode = 'image'
self.auto = auto
self.transforms = transforms
self.vid_stride = vid_stride
if any(videos):
self._new_video(videos[0])
else:
self.cap = None
assert self.nf > 0, f'No images or videos found in {
p}. ' \
f'Supported formats are:\nimages: {
IMG_FORMATS}\nvideos: {
VID_FORMATS}'
def __iter__(self):
print("__iter__")
self.count = 0
return self
def __next__(self):
print("__next__")
if self.count == self.nf:
raise StopIteration
path = self.files[self.count]
if self.video_flag[self.count]:
self.mode = 'video'
for _ in range(self.vid_stride):
self.cap.grab()
ret_val, im0 = self.cap.retrieve()
while not ret_val:
self.count += 1
self.cap.release()
if self.count == self.nf:
raise StopIteration
path = self.files[self.count]
self._new_video(path)
ret_val, im0 = self.cap.read()
self.frame += 1
s = f'video {
self.count + 1}/{
self.nf} ({
self.frame}/{
self.frames}) {
path}: '
else:
self.count += 1
im0 = cv2.imread(path)
assert im0 is not None, f'Image Not Found {
path}'
s = f'image {
self.count}/{
self.nf} {
path}: '
if self.transforms:
im = self.transforms(im0)
else:
im = letterbox(im0, self.img_size, stride=self.stride, auto=self.auto)[0]
im = im.transpose((2, 0, 1))[::-1]
im = np.ascontiguousarray(im)
return path, im, im0, self.cap, s
if __name__ =="__main__":
source = '1.jpg'
imgsz = 640
stride = 32
pt = True
vid_stride = 1
dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride)
for path, im, im0s, vid_cap, s in dataset:
print('img_path:',path)
print('im0s.shape',im0s.shape)
print('im.shape:',im.shape)
print('vid_cap:',vid_cap)
print('相关信息string:',s)
__init__
__iter__
__next__
img_path: 1.jpg
im0s.shape (416, 416, 3)
im.shape: (3, 640, 640)
vid_cap: None
相关信息string: image 1/1 1.jpg:
__next__