PyTorch目标检测（三）

VGGNet

VGGNet共有六个版本，最常用的VGG16，其采用五组卷积，三个全连接层，最后采用softmax进行分类。VGG利用池化层达到将特征图尺寸缩小一倍，通道数增加一倍的目的。

VGG采用33的卷积核，但是两个卷积层叠加可以使感受野达到55，同时两层卷积拥有两个激活函数也增加了非线性度。

在这里插入图片描述
PyTorch VGG16经典网络架构

from torch import nn
import torch
class VGG(nn.Module):
    def __init__(self, num_classes=1000):
        super(VGG, self).__init__()
        layers = []
        in_dim = 3
        out_dim = 64
        #循环构造卷积层，一共有13个卷积层
        for i in range(13):
            layers += [nn.Conv2d(in_dim, out_dim, 3, 1, 1), nn.ReLU(inplace=True)]
            in_dim = out_dim
            #在第2，4，7，10，13层卷积层后增加池化层
            if i==1 or i==3 or i==6 or i==9 or i==12:
                layers += [nn.MaxPool2d(2, 2)]
                #第10个卷积前后通道数保持一致，其余加倍
                if i!=9:
                    out_dim*=2
        self.features = nn.Sequential(*layers)
        #三个全连接层，包括ReLU和Dropout层
        self.classifier = nn.Sequential(nn.Linear(512*7*7, 4096),
                                        nn.ReLU(True),
                                        nn.Dropout(),
                                        nn.Linear(4096, 4096),
                                        nn.ReLU(True),
                                        nn.Dropout(),
                                        nn.Linear(4096, num_classes))
    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

vgg = VGG(21).cuda()
inputs = torch.randn(1, 3, 224, 224).cuda()
print(inputs.shape)
score = vgg(inputs)
print(score)

Inception

V1版本

Inception v1由22层卷积网络加上池化运算拼接而成，卷积运算的卷积核大小也各不相同
在这里插入图片描述
为了进一步降低网络参数的数量，Inception增加了多个1*1的卷积模块实现降维的思想

Inception v1网络一共有9个这样的模块，总共22层。在最后使用了全局平均池化。在第3个和第6个模块输出后执行softmax并计算损失。Inception的参数量很少，适合处理大规模数据。

PyTorch Inception v1模块

import torch
from torch import nn
import torch.nn.functional as F
#定义包含Conv和ReLU的基础卷积类
class BasicConv2d(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, padding=0):
        super(BasicConv2d, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, padding=padding)
    def forward(self, x):
        x = self.conv(x)
        return F.relu(x, inplace=True)
#Inception v1类，初始化需要提供各个子模块的通道数
class Inceptionv1(nn.Module):
    def __init__(self, in_dim, hid_1_1, hid_2_1, hid_2_3, hid_3_1, out_3_5, out_4_1):
        super(Inceptionv1, self).__init__()
        #四个子模块各自的网络定义
        self.branch1x1 = BasicConv2d(in_dim, hid_1_1, 1)
        self.branch3x3 = nn.Sequential(BasicConv2d(in_dim, hid_2_1, 1),
                                       BasicConv2d(hid_2_1, hid_2_3, 3, padding=1))
        self.branch5x5 = nn.Sequential(BasicConv2d(in_dim, hid_3_1, 1),
                                       BasicConv2d(hid_3_1, out_3_5, 5, padding=2))
        self.branch_pool = nn.Sequential(nn.MaxPool2d(3, stride=1, padding=1),
                                         BasicConv2d(in_dim, out_4_1, 1))
    def forward(self, x):
        b1 = self.branch1x1(x)
        b2 = self.branch3x3(x)
        b3 = self.branch5x5(x)
        b4 = self.branch_pool(x)
        #按通道方向拼接
        output = torch.cat((b1, b2, b3, b4), dim=1)
        return output

#实例化测试
net_Inceptionv1 = Inceptionv1(3, 64, 32, 64, 64, 96, 32).cuda()
print(net_Inceptionv1)
inputs = torch.randn(1, 3, 256, 256).cuda()
print(inputs.shape)
output = net_Inceptionv1(inputs)
print(output.shape)
print(output)

V2版本

V2在V1的基础上进一步通过卷积分解和正则化使运算更高效，利用两个33的卷积层替代了55的卷积层，并且增加了BN层。V2减少了卷积参数量也增加了网络的非线性度

import torch
from torch import nn
import torch.nn.functional as F
#定义包含Conv和ReLU的基础卷积类
class BasicConv2d(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, padding=0):
        super(BasicConv2d, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, padding=padding)
        self.bn = nn.BatchNorm2d(out_channels, eps=0.001)
    def forward(self, x):
        x = self.conv(x)
        x = self.bn(x)
        return F.relu(x, inplace=True)
#Inception v1类，初始化需要提供各个子模块的通道数
class Inceptionv2(nn.Module):
    def __init__(self, in_dim, hid_1_1, hid_2_1, hid_2_3, hid_3_1, out_3_5, out_4_1):
        super(Inceptionv2, self).__init__()
        #四个子模块各自的网络定义
        self.branch1x1 = BasicConv2d(in_dim, hid_1_1, 1, 0)
        self.branch3x3 = nn.Sequential(BasicConv2d(in_dim, hid_2_1, 1, 0),
                                       BasicConv2d(hid_2_1, hid_2_3, 3, padding=1))
        self.branch3x3x2 = nn.Sequential(BasicConv2d(in_dim, hid_3_1, 1, 0),
                                         BasicConv2d(hid_3_1, out_3_5, 3, padding=1),
                                         BasicConv2d(out_3_5, out_3_5, 3, padding=1))
        self.branch_pool = nn.Sequential(nn.AvgPool2d(3, stride=1, padding=1, count_include_pad=False),
                                         BasicConv2d(in_dim, out_4_1, 1, 0))
    def forward(self, x):
        b1 = self.branch1x1(x)
        b2 = self.branch3x3(x)
        b3 = self.branch3x3x2(x)
        b4 = self.branch_pool(x)
        #按通道方向拼接
        output = torch.cat((b1, b2, b3, b4), dim=1)
        return output

#实例化测试
net_Inceptionv2 = Inceptionv2(192, 96, 48, 64, 64, 96, 64).cuda()
print(net_Inceptionv2)
inputs = torch.randn(1, 192, 32, 32).cuda()
print(inputs.shape)
output = net_Inceptionv2(inputs)
print(output.shape)
print(output)

V2的nn卷积运算还可以分解为1n与n*1两次卷积，这可以使计算成本再减少三分之一

V3

在V2的基础上又RMSProp优化器，在辅助的分类器部分添加了7*7的卷积，并且使用了标签平滑技术

V4

结合了残差网络，显著提升了训练速度和模型准确率

资料参考《深度学习之PyTorch物体检测实战》

czkjmohzy

发布了25 篇原创文章 · 获赞 2 · 访问量 2107

私信关注