1.目标
上一篇讲了怎么训练yolov8,
训练yolov8
但是如果只满足于此,我们就真的只是调参侠了。。。
所以为了更深入的理解大神的想法,也为了我们自己的代码能力的提升和深度学习的改造。我觉得应该把代码分解一下,可以更好的食用
1.前向网络
一般我们说的网络就是指前向网络,网络怎么反向训练的,我们一般不说,因为一般这个过程是框架自动完成的,但是训练不仅包括网络,还包括数据,loss。所以我们把yolov8分成3部分。第一部分就是前向网络。那么前向网络又可以分成3个backbone、neck、head
假设我们的输入是一个512*640的图片,数据集只有一个class。
0.basemodules
一些yolov8用到的模块,后面我们也可以增加自己的模块
# encoding=utf-8
import math
import torch.nn as nn
import torch
from utils.module_utils import autopad
class CBA(nn.Module):
default_act = nn.SiLU()
def __init__(self, input_channel, output_channel, k=1, s=1, p=None, g=1, d=1, act=True):
super().__init__()
self.conv = nn.Conv2d(input_channel, output_channel, k, s, autopad(k, p, d), groups=g, dilation=d, bias=False)
self.bn = nn.BatchNorm2d(output_channel)
self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity()
def forward(self, x):
return self.act(self.bn(self.conv(x)))
class DWCBA(CBA):
def __init__(self, input_channel, output_channel, k=1, s=1, d=1, act=True):
super().__init__(input_channel, output_channel, k, s, g=math.gcd(input_channel, output_channel), d=d, act=act)
class Bottleneck(nn.Module):
def __init__(self, input_channel, output_channel, shortcut=True, g=1, k=(3, 3), e=0.5):
super().__init__()
c_ = int(output_channel * e)
self.cv1 = CBA(input_channel, c_, k[0], 1)
self.cv2 = CBA(c_, output_channel, k[1], 1, g=g)
self.add = shortcut and input_channel == output_channel
def forward(self, x):
return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
class BottleneckCSP(nn.Module):
def __init__(self, input_channel, output_channel, n=1, shortcut=True, g=1, e=0.5):
super(BottleneckCSP, self).__init__()
c_ = int(output_channel * e)
self.cv1 = CBA(input_channel, c_, 1, 1)
self.cv2 = nn.Conv2d(input_channel, c_, 1, 1, bias=False)
self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
self.cv4 = CBA(2 * c_, output_channel, 1, 1)
self.bn = nn.BatchNorm2d(2 * c_)
self.act = nn.SiLU()
self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
def forward(self, x):
y1 = self.cv3(self.m(self.cv1(x)))
y2 = self.cv2(x)
return self.cv4(self.act(self.bn(torch.cat(y1, y2), 1)))
class C3(nn.Module):
def __init__(self, input_channel, output_channel, n=1, shortcut=True, g=1, e=0.5):
super(C3, self).__init__()
c_ = int(output_channel * e)
self.cv1 = CBA(input_channel, c_, 1, 1)
self.cv2 = CBA(input_channel, c_, 1, 1)
self.cv3 = CBA(2 * c_, output_channel, 1)
self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, k=((1, 1), (3, 3)), e=1.0) for _ in range(n)))
def forward(self, x):
return self.cv3(torch.cat(self.m(self.cv1(x)), self.cv2(x)), 1)
class C2(nn.Module):
def __init__(self, input_channel, output_channel, n=1, shortcut=True, g=1, e=0.5):
super(C2, self).__init__()
self.c = int(output_channel * e)
self.cv1 = CBA(input_channel, 2 * self.c, 1, 1)
self.cv2 = CBA(2 * self.c, output_channel, 1)
self.m = nn.Sequential(*(Bottleneck(self.c, self.c, shortcut, g, k=((3, 3), (3, 3)), e=1.0) for _ in range(n)))
def forward(self, x):
a, b = self.cv1(x).split((self.c, self.c), 1)
return self.cv2(torch.cat((self.m(a), b), 1))
class C2f(nn.Module):
def __init__(self, input_channel, output_channel, n=1, shortcut=True, g=1, e=0.5):
super(C2f, self).__init__()
self.c = int(output_channel * e)
self.cv1 = CBA(input_channel, 2 * self.c, 1, 1)
self.cv2 = CBA((2 + n) * self.c, output_channel, 1)
self.m = nn.ModuleList(Bottleneck(self.c, self.c, shortcut, g, k=((3, 3), (3, 3)), e=1.0) for _ in range(n))
def forward(self, x):
y = list(self.cv1(x).split((self.c, self.c), 1))
y.extend(m(y[-1]) for m in self.m)
return self.cv2(torch.cat(y, 1))
class C1(nn.Module):
def __init__(self, input_channel, output_channel, n=1):
super(C1, self).__init__()
self.cv1 = CBA(input_channel, output_channel, 1, 1)
self.m = nn.Sequential(*(CBA(output_channel, output_channel, 3) for _ in range(n)))
def forward(self, x):
y = self.cv1(x)
return self.m(y) + y
class C3x(C3):
def __init__(self, input_channel, output_channel, n=1, shortcut=True, g=1, e=0.5):
super(C3x, self).__init__(input_channel, output_channel, n, shortcut, g, e)
self.c_ = int(output_channel * e)
self.m = nn.Sequential(*(Bottleneck(self.c_, self.c_, shortcut, g, k=((1, 3), (3, 1)), e=1) for _ in range(n)))
class C3Ghost(C3):
def __init__(self, input_channel, output_channel, n=1, shortcut=True, g=1, e=0.5):
super(C3Ghost, self).__init__(input_channel, output_channel, n, shortcut, g, e)
c_ = int(output_channel * e)
self.m = nn.Sequential(*(GhostBottleneck(c_, c_) for _ in range(n)))
class GhostCBA(nn.Module):
def __init__(self, input_channel, output_channel, k=1, s=1, g=1, act=True):
super(GhostCBA, self).__init__()
c_ = output_channel // 2
self.cv1 = CBA(input_channel, c_, k, s, None, g, act)
self.cv2 = CBA(c_, c_, 5, 1, None, c_, act=act)
def forward(self, x):
y = self.cv1(x)
return torch.cat((y, self.cv2(y)), 1)
class GhostBottleneck(nn.Module):
def __init__(self, input_channel, output_channel, k=3, s=1):
super(GhostBottleneck, self).__init__()
c_ = output_channel // 2
self.conv = nn.Sequential(
GhostCBA(input_channel, c_, 1, 1),
DWCBA(c_, c_, k, s, act=False) if s == 2 else nn.Identity(),
GhostCBA(c_, output_channel, 1, 1, act=False)
)
self.shortcut = nn.Sequential(
DWCBA(input_channel, input_channel, k, s, act=False),
CBA(input_channel, output_channel, 1, 1, act=False)
) if s == 2 else nn.Identity()
def forward(self, x):
return self.conv(x) + self.shortcut(x)
1.1backbone
代码参考了paddleyolo
# encoding=utf-8
import torch.nn as nn
import torch
from backbone.basemodules import CBA,C2f
from enhance.other import SPPCSPC
class YOLOv8CSPDarkNet(nn.Module):
def __init__(self,return_idx=[2, 3, 4]):
super(YOLOv8CSPDarkNet, self).__init__()
self.return_idx=return_idx
arch_setting=[[64, 128, 3, True, False], [128, 256, 6, True, False],
[256, 512, 6, True, False], [512, 1024, 3, True, True]]
base_channels=arch_setting[0][0]
self.stem=CBA(3,base_channels,k=3,s=2)
_output_channels=[base_channels]
self.csp_dark_blocks=[]
for i,(input_channel,output_channel,num_blocks,shortcut,use_sppf) in enumerate(arch_setting):
_output_channels.append(output_channel)
stage=[]
conv_layer=CBA(input_channel,output_channel,3,2)
c2f_layer=C2f(output_channel,output_channel,num_blocks,shortcut)
stage.append(conv_layer)
stage.append(c2f_layer)
if use_sppf:
sppf_layer=SPPCSPC(output_channel,output_channel)
stage.append(sppf_layer)
self.csp_dark_blocks.append(nn.Sequential(*stage))
self._output_channels=[_output_channels[i] for i in self.return_idx]
self.strides = [[2, 4, 8, 16, 32, 64][i] for i in self.return_idx]
def forward(self,x):
outputs=[]
x=self.stem(x)
for i , layer in enumerate(self.csp_dark_blocks):
x=layer(x)
if i+1 in self.return_idx:
outputs.append(x)
return outputs
input=torch.randn(1,3,512,640)
model=YOLOv8CSPDarkNet()
output=model(input)
print(output[0].shape)
1.2pafpn
neck模块 ,其实就是一些特征融合
# encoding=utf-8
import torch.nn as nn
import torch
from backbone.basemodules import C2f,CBA
from utils.module_utils import Concat
class YOLOV8C2FPAN(nn.Module):
def __init__(self,n=3,input_chaneels=[256,512,1024]): # n cspbottleneck 个数
super(YOLOV8C2FPAN, self).__init__()
self.input_channels=input_chaneels
self._output_channels=input_chaneels
self.concat=Concat(1)
self.upsample=nn.Upsample(scale_factor=2,mode='nearest')
# fpn
self.fpn_p4=C2f(int(input_chaneels[2]+input_chaneels[1]),input_chaneels[1],n)
self.fpn_p3=C2f(int(input_chaneels[1]+input_chaneels[0]),input_chaneels[0],n)
# pan
self.down_conv2=CBA(input_chaneels[0],input_chaneels[0],k=3,s=2)
self.pan_n3=C2f(int(input_chaneels[0]+input_chaneels[1]),input_chaneels[1],n)
self.down_conv1 = CBA(input_chaneels[1], input_chaneels[1], k=3, s=2)
self.pan_n4 = C2f(int(input_chaneels[1] + input_chaneels[2]), input_chaneels[2], n)
def forward(self,x):
[c3,c4,c5]=x
# fpn
up_x1=self.upsample(c5)
f_concat1=self.concat((up_x1,c4))
f_out1=self.fpn_p4(f_concat1)
up_x2=self.upsample(f_out1)
f_concat2=self.concat((up_x2,c3))
f_out0=self.fpn_p3(f_concat2)
#pan
down_x1=self.down_conv2(f_out0)
p_concat1=self.concat((down_x1,f_out1))
pan_out1=self.pan_n3(p_concat1)
down_x2=self.down_conv1(pan_out1)
p_concat2=self.concat((down_x2,c5))
pan_out0=self.pan_n4(p_concat2)
return [f_out0,pan_out1,pan_out0]
c3=torch.randn(1,256,64,80)
c4=torch.randn(1,512,32,40)
c5=torch.randn(1,1024,16,20)
input=[c3,c4,c5]
m=YOLOV8C2FPAN()
output=m(input)
print(output[0].shape)
print(output[1].shape)
print(output[2].shape)
1.3 head
yolov8的检测头模块,这里只写了训练的部分,推理部分下次补上
# encoding=utf-8
import torch.nn as nn
import torch
from backbone.basemodules import CBA
class Detect(nn.Module):
def __init__(self,nc=1,ch=()):
super(Detect, self).__init__()
self.nc=nc
self.nl=len(ch)
self.reg_max=16 # ch[0] // 16 l r t d 除以stride 后 一定落在[0-16]区间 当然,如果图像大且检测物大 这个数也要大
self.no=nc+self.reg_max*4
self.stride=torch.zeros(self.nl)
c2,c3=max((16,ch[0]//4,self.reg_max*4)),max(ch[0],self.nc)
self.cv2=nn.ModuleList(
nn.Sequential(CBA(x,c2,3),CBA(c2,c2,3),CBA(c2,4*self.reg_max,1)) for x in ch)
self.cv3=nn.ModuleList(nn.Sequential(CBA(x,c3,3),CBA(c3,c3,3),nn.Conv2d(c3,self.nc,1)) for x in ch)
def forward(self,x):
for i in range(self.nl):
x[i]=torch.cat((self.cv2[i](x[i]),self.cv3[i](x[i])),1)
if self.training:
return x
from necks.yolov8_pafpn import YOLOV8C2FPAN
c3=torch.randn(1,256,64,80)
c4=torch.randn(1,512,32,40)
c5=torch.randn(1,1024,16,20)
m=YOLOV8C2FPAN()
m1=Detect(ch=([256,512,1024]))
output=m([c3,c4,c5])
output=m1(output)
print(output[0].shape)
1.4 yolov8
把前面的3个部分组合一下就是我们的前向推理网络了
class Yolov8(nn.Module):
def __init__(self, backbone, neck, head):
super(Yolov8, self).__init__()
self.backbone = backbone
self.neck = neck
self.head = head
def forward(self, x):
x = self.backbone(x)
x = self.neck(x)
return self.head(x)
backbone = YOLOv8CSPDarkNet()
neck = YOLOV8C2FPAN()
head = Detect(nc=1, ch=[256, 512, 1024])
model = Yolov8(backbone, neck, head)
input=torch.randn(1,3,512,640)
output=model(input)
print(output[0].shape)