今天打算把之前沐神的《动手学深度学习》中的tinySSD模型转移到Pytorch上来做简单的目标识别。
import torch
import torchvision
from torch import nn
import numpy as np
import math
sizes = [[0.2, 0.272], [0.37, 0.447], [0.54, 0.619], [0.71, 0.79],
[0.88, 0.961]]
ratios = [[1, 2, 0.5]] * 5
num_anchors = len(sizes[0]) + len(ratios[0]) - 1
def cls_predictor(num_anchors, num_classes, input_size):
return nn.Conv2d(input_size, num_anchors*(num_classes+1), kernel_size=3, padding=1)
def bbox_predictor(num_anchors, input_size):
return nn.Conv2d(input_size, num_anchors*4, kernel_size=3,padding=1)
def flatten_pred(pred):
return pred.permute(0, 2, 3, 1).flatten(1)
def concat_preds(preds):
return torch.cat(tuple([flatten_pred(p) for p in preds]), dim=1)
def MultiBoxPrior(feature_map, sizes=[0.75, 0.5, 0.25], ratios=[1, 2, 0.5]):
"""
Args:
feature_map: torch tensor, Shape: [N, C, H, W].
sizes: List of sizes (0~1) of generated MultiBoxPriores.
ratios: List of aspect ratios (non-negative) of generated MultiBoxPriores.
Returns:
anchors of shape (1, num_anchors, 4). 由于batch里每个都一样, 所以第一维为1
"""
pairs = [] # pair of (size, sqrt(ration))
for r in ratios:
pairs.append([sizes[0], math.sqrt(r)])
for s in sizes[1:]:
pairs.append([s, math.sqrt(ratios[0])])
pairs = np.array(pairs)
ss1 = pairs[:, 0] * pairs[:, 1] # size * sqrt(ration)
ss2 = pairs[:, 0] / pairs[:, 1] # size / sqrt(ration)
base_anchors = np.stack([-ss1, -ss2, ss1, ss2], axis=1) / 2
h, w = feature_map.shape[-2:]
shifts_x = np.arange(0, w) / w
shifts_y = np.arange(0, h) / h
shift_x, shift_y = np.meshgrid(shifts_x, shifts_y)
shift_x = shift_x.reshape(-1)
shift_y = shift_y.reshape(-1)
shifts = np.stack((shift_x, shift_y, shift_x, shift_y), axis=1)
anchors = shifts.reshape((-1, 1, 4)) + base_anchors.reshape((1, -1, 4))
return torch.tensor(anchors, dtype=torch.float32).view(1, -1, 4)
class TinySSD(nn.Module):
def __init__(self, num_classes, **kwargs):
super(TinySSD, self).__init__(**kwargs)
self.num_classes = num_classes
# 网络层
self.blk1 = nn.Sequential(
nn.Conv2d(3, 16, kernel_size=3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.Conv2d(16, 16, kernel_size=3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2),
nn.Conv2d(16, 32, kernel_size=3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.Conv2d(32, 32, kernel_size=3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2),
nn.Conv2d(32, 64, kernel_size=3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.Conv2d(64, 64, kernel_size=3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2)
)
self.cls1 = cls_predictor(num_anchors, num_classes,64)
self.bbox1 = bbox_predictor(num_anchors, 64)
self.blk2 = nn.Sequential(
nn.Conv2d(64, 128, kernel_size=3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.Conv2d(128, 128, kernel_size=3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2),
)
self.cls2 = cls_predictor(num_anchors, num_classes, 128)
self.bbox2 = bbox_predictor(num_anchors, 128)
self.blk3 = nn.Sequential(
nn.Conv2d(128, 128, kernel_size=3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.Conv2d(128, 128, kernel_size=3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2)
)
self.cls3 = cls_predictor(num_anchors, num_classes, 128)
self.bbox3 = bbox_predictor(num_anchors, 128)
self.blk4 = nn.Sequential(
nn.Conv2d(128, 128, kernel_size=3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.Conv2d(128, 128, kernel_size=3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2)
)
self.cls4 = cls_predictor(num_anchors, num_classes, 128)
self.bbox4 = bbox_predictor(num_anchors, 128)
self.blk5 = nn.MaxPool2d(4)
self.cls5 = cls_predictor(num_anchors, num_classes, 128)
self.bbox5 = bbox_predictor(num_anchors, 128)
def forward(self, x):
anchors, cls_preds, bbox_preds = [None]*5, [None]*5, [None]*5
# 第一层输出
x = self.blk1(x)
cls_preds[0] = self.cls1(x)
bbox_preds[0] = self.bbox1(x)
anchors[0] = MultiBoxPrior(x, sizes[0], ratios[0])
# 第二层输出
x = self.blk2(x)
cls_preds[1] = self.cls2(x)
bbox_preds[1] = self.bbox2(x)
anchors[1] = MultiBoxPrior(x, sizes[1], ratios[1])
# 第三层输出
x = self.blk3(x)
cls_preds[2] = self.cls3(x)
bbox_preds[2] = self.bbox3(x)
anchors[2] = MultiBoxPrior(x, sizes[2], ratios[2])
# 第四层输出
x = self.blk4(x)
cls_preds[3] = self.cls4(x)
bbox_preds[3] = self.bbox4(x)
anchors[3] = MultiBoxPrior(x, sizes[3], ratios[3])
# 第五层输出
x = self.blk5(x)
cls_preds[4] = self.cls5(x)
bbox_preds[4] = self.bbox5(x)
anchors[4] = MultiBoxPrior(x, sizes[4], ratios[4])
a=(concat_preds(cls_preds)).shape[0]
return torch.cat(tuple(anchors), dim=1), concat_preds(cls_preds).reshape(a, -1, self.num_classes+1), concat_preds(bbox_preds)
base_net = nn.Sequential(
nn.Conv2d(3, 16, kernel_size=3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.Conv2d(16, 16, kernel_size=3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2),
nn.Conv2d(16, 32, kernel_size=3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.Conv2d(32, 32, kernel_size=3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2),
nn.Conv2d(32, 64, kernel_size=3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.Conv2d(64, 64, kernel_size=3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2)
)
弄了一个下午总算把TinySSD迁移到PyTorch框架里来了,之后的训练、损失函数和测试部分明天再弄