MobileNetv1是谷歌提出的轻量级的卷积神经网络(同VGG相比),它主要采用了深度可分离的卷积,从而大大降低了参数数目和网络的计算量。深度可分离卷积包括两个部分,分别是Depthwise卷积和Pointwise卷积。
Depthwise卷积
主要进行特征的提取(filting),输入特征图为(D,D,C),则卷积核为(K,K,C),在进行卷积运算时,每个通道是独立的,即(D,D,Ci)特征图和(K,K,Ci)特征图进行卷积,结果直接作为输出特征图的第Ci个通道。
Pointwise卷积
主要进行特征的组合(combining),可以理解为1x1的标准卷积
MobileNet v1结构
如下图
Pytorch代码
import torch
from torch import nn
from torch import optim
from PIL import Image
import numpy as np
print(torch.cuda.is_available())
device = torch.device('cuda:0')
path = "/content/drive/My Drive/Colab Notebooks/data/dog_vs_cat/"
train_X = np.empty((2000,224,224,3),dtype="float32")
train_Y = np.empty((2000,),dtype="int")
train_XX = np.empty((2000,3,224,224),dtype="float32")
for i in range(1000):
file_path = path+"cat."+str(i)+".jpg"
image = Image.open(file_path)
resized_image = image.resize((224, 224), Image.ANTIALIAS)
img = np.array(resized_image)
train_X[i,:,:,:] = img
train_Y[i] = 0
for i in range(1000):
file_path = path+"dog."+str(i)+".jpg"
image = Image.open(file_path)
resized_image = image.resize((224, 224), Image.ANTIALIAS)
img = np.array(resized_image)
train_X[i+1000, :, :, :] = img
train_Y[i+1000] = 1
train_X /= 255
index = np.arange(2000)
np.random.shuffle(index)
train_X = train_X[index, :, :, :]
train_Y = train_Y[index]
for i in range(3):
train_XX[:,i,:,:] = train_X[:,:,:,i]
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
def conv_bn(inp, oup, stride): # 第一层传统的卷积:conv3*3+BN+ReLU
return nn.Sequential(
nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
nn.BatchNorm2d(oup),
nn.ReLU(inplace=True)
)
def conv_dw(inp, oup, stride): # 其它层的depthwise convolution:conv3*3+BN+ReLU+conv1*1+BN+ReLU
return nn.Sequential(
nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False),
nn.BatchNorm2d(inp),
nn.ReLU(inplace=True),
nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
nn.BatchNorm2d(oup),
nn.ReLU(inplace=True),
)
self.model = nn.Sequential(
conv_bn(3, 32, 2), # 第一层传统的卷积
conv_dw(32, 64, 1), # 其它层depthwise convolution
conv_dw(64, 128, 2),
conv_dw(128, 128, 1),
conv_dw(128, 256, 2),
conv_dw(256, 256, 1),
conv_dw(256, 512, 2),
conv_dw(512, 512, 1),
conv_dw(512, 512, 1),
conv_dw(512, 512, 1),
conv_dw(512, 512, 1),
conv_dw(512, 512, 1),
conv_dw(512, 1024, 2),
conv_dw(1024, 1024, 1),
nn.AvgPool2d(7),
)
self.fc = nn.Linear(1024, 2) # 全连接层
def forward(self, x):
x = self.model(x)
x = x.view(-1, 1024)
x = self.fc(x)
return x
batch_size = 16
net = Net().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.0005)
train_loss = []
precision = 0
for epoch in range(50):
for i in range(2000 // batch_size):
x = train_XX[i * batch_size:i * batch_size + batch_size]
y = train_Y[i * batch_size:i * batch_size + batch_size]
x = torch.from_numpy(x) # (batch_size,input_feature_shape)
y = torch.from_numpy(y) # (batch_size,label_onehot_shape)
x = x.cuda()
y = y.long().cuda()
out = net(x)
pred = torch.argmax(out,dim=1)
precision = y.eq(pred).sum().float().item()
loss = criterion(out, y) # 计算两者的误差
optimizer.zero_grad() # 清空上一步的残余更新参数值
loss.backward() # 误差反向传播, 计算参数更新值
optimizer.step() # 将参数更新值施加到 net 的 parameters 上
train_loss.append(loss.item())
print(epoch, i*batch_size, np.mean(train_loss), precision/batch_size)
train_loss = []
precision = 0
total_correct = 0
for i in range(200):
x = train_XX[i*10:i*10+10]
y = train_Y[i*10:i*10+10]
x = torch.from_numpy(x)
y = torch.from_numpy(y)
x = x.cuda()
y = y.long().cuda()
out = net(x)
pred = torch.argmax(out,dim=1)
total_correct += y.eq(pred).sum().float().item()
acc = total_correct / 2000.0
print('test acc:', acc)
torch.cuda.empty_cache()