一、须知
1.在基础框架上加入激活函数和标准化,基础框架参考:
基于Pytorch的卷积神经网络代码(CIFAR图像分类)及基本构架_百炼成丹的博客-CSDN博客
2.数据集读取路径更改为本地运行,故暂不支持kaggle直接训练,若需要Kaggle服务器run,则要自行更改读取路径,读取方式在基础框架中。
3.本轮优化只对比加入激活函数和标准化后的提升
4.学习率均设置为0.001对比不同优化方案效果
5.优化前,基础框架中,测试集准确率68.5,epoch = 165 ,学习率 = 0.001
二、优化过程
方案一:在池化后添加标准化并加入relu激活函数
网络构建代码:
class Model(nn.Module):
def __init__(self):
super(Tudui, self).__init__()
self.model1 = nn.Sequential(
nn.Conv2d(3, 32, 5, padding=2),
nn.MaxPool2d(2), nn.BatchNorm2d(32), nn.ReLU(),
nn.Conv2d(32, 32, 5, padding=2),
nn.MaxPool2d(2), nn.BatchNorm2d(32), nn.ReLU(),
nn.Conv2d(32, 64, 5, padding=2),
nn.MaxPool2d(2), nn.BatchNorm2d(64), nn.ReLU(),
nn.Flatten(),
nn.Linear(1024, 64),# 182528
nn.Linear(64, 10)
)
效果:
结论:训练集准确率在100次epoch时,已经达到了99.8%,但此时训练集的正确率已经出现了下降。十代时间为79.76s
在41次epoch时,测试集正确率达到了72%的最高值,但训练集仅有84.6%
分析:100次时出现了明显的过拟合现象,训练后大致稳定在0.7附近
方案二,在池化前添加标准化,加入relu激活函数
网络构建代码:
class Model(nn.Module):
def __init__(self):
super(Tudui, self).__init__()
self.model1 = nn.Sequential(
nn.Conv2d(3, 32, 5, padding=2), nn.BatchNorm2d(32),
nn.MaxPool2d(2), nn.ReLU(),
nn.Conv2d(32, 32, 5, padding=2), nn.BatchNorm2d(32),
nn.MaxPool2d(2), nn.ReLU(),
nn.Conv2d(32, 64, 5, padding=2), nn.BatchNorm2d(64),
nn.MaxPool2d(2), nn.ReLU(),
nn.Flatten(),
nn.Linear(1024, 64),# 182528
nn.Linear(64, 10)
)
效果
结论:由上可看出当把标准化BN层加在池化前面,则会有更好的表现效果,峰值73.1%,相较方案一多1%左右的准确率。测试集损失函数先降低后升高,十代epoch时间为80.8s,方案二为79.76s
分析:仍然出现了过拟合现象,随池化前就对数据进行标准化,但时间几乎没有明显增长
方案三、在方案二的基础上将relu改为prelu激活
网络构建代码
class Model(nn.Module):
def __init__(self):
super(Tudui, self).__init__()
self.model1 = nn.Sequential(
nn.Conv2d(3, 32, 5, padding=2), nn.BatchNorm2d(32),
nn.MaxPool2d(2), nn.PReLU(),
nn.Conv2d(32, 32, 5, padding=2), nn.BatchNorm2d(32),
nn.MaxPool2d(2), nn.PReLU(),
nn.Conv2d(32, 64, 5, padding=2), nn.BatchNorm2d(64),
nn.MaxPool2d(2), nn.PReLU(),
nn.Flatten(),
nn.Linear(1024, 64),# 182528
nn.Linear(64, 10)
)
效果:峰值71.6%,收敛慢于方法一和方法二。
结论:仍然出现过拟合现象,效果差于方法一和方法二
三、全代码(以方案二为例)
import torch
import torchvision
from torch import nn
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
import time
#../input/cifar10-python
train_data = torchvision.datasets.CIFAR10("../dataset", train=True, transform=torchvision.transforms.ToTensor())
test_data = torchvision.datasets.CIFAR10("../dataset", train=False, transform=torchvision.transforms.ToTensor())
train_dataloader = DataLoader(train_data, batch_size=64, drop_last=True)
test_dataloader = DataLoader(test_data, batch_size=64, drop_last=True)
# print(len(train_dataloader)) #781
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
test_data_size = len(test_dataloader) * 64
train_data_size = len(train_dataloader) * 64
print(f'测试集大小为:{test_data_size}')
print(f'训练集大小为:{train_data_size}')
writer = SummaryWriter("../model_logs")
loss_fn = nn.CrossEntropyLoss(reduction='mean')
loss_fn = loss_fn.to(device)
time_able = False # True
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.model1 = nn.Sequential(
nn.Conv2d(3, 32, 5, padding=2), nn.BatchNorm2d(32),
nn.MaxPool2d(2), nn.ReLU(),
nn.Conv2d(32, 32, 5, padding=2), nn.BatchNorm2d(32),
nn.MaxPool2d(2), nn.ReLU(),
nn.Conv2d(32, 64, 5, padding=2), nn.BatchNorm2d(64),
nn.MaxPool2d(2), nn.ReLU(),
nn.Flatten(),
nn.Linear(1024, 64),# 182528
nn.Linear(64, 10)
)
def forward(self, x):
x = self.model1(x)
return x
model = Model()
model = model.to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=0.001)
epoch = 100
running_loss = 0
total_train_step = 0
total_test_step = 0
if time_able:
str_time = time.time()
for i in range(epoch):
print(f'第{i + 1}次epoch')
total_accuracy1 = 0
for data in train_dataloader:
imgs, targets = data
imgs = imgs.to(device)
targets = targets.to(device)
output = model(imgs)
loss = loss_fn(output, targets)
optimizer.zero_grad()
loss.backward()
optimizer.step()
total_train_step += 1
if total_train_step % 200 == 0:
if time_able:
end_time = time.time()
print(f'{end_time-str_time}')
print(f'第{total_train_step}次训练,loss = {loss.item()}')
writer.add_scalar("train_loss", loss.item(), total_train_step)
accuracy1 = (output.argmax(1) == targets).sum()
total_accuracy1 += accuracy1
# 测试
total_test_loss = 0
total_accuracy = 0
with torch.no_grad():
for data in test_dataloader:
imgs, targets = data
imgs = imgs.to(device)
targets = targets.to(device)
outputs = model(imgs)
loss = loss_fn(outputs, targets)
total_test_loss = total_test_loss + loss
accuracy = (outputs.argmax(1) == targets).sum()
total_accuracy += accuracy
total_test_loss = total_test_loss / test_data_size
print(f'整体测试集上的loss = {total_test_loss}')
print(f'整体测试集正确率 = {total_accuracy / test_data_size}')
print(f'整体训练集正确率 = {total_accuracy1 / train_data_size}')
writer.add_scalar("test_loss", total_test_loss.item(), total_test_step)
writer.add_scalar("test_accuracy", total_accuracy / test_data_size, total_test_step)
writer.add_scalar("train_accuracy", total_accuracy1 / train_data_size, total_test_step) # test_step == epoch
total_test_step += 1
writer.close()