目录
1、CIFAR数据集准备
Step1:CIFAR-10数据集下载
在GitHub上找的压缩包:-VGG19-CIFAR10-/数据集/CIFAR10/cifar-10-batches-py at master · shiyadong123/-VGG19-CIFAR10- · GitHub
Step2:解压
我的了解:data_batch_1到data_batch_5以及test_batch文件都包含一万张分辨率为32 * 32的图片信息
- data_batch为字典,包含四个字典键:
- b’batch_label’
- b’labels’ 标签
- b’data’ 图片像素值
- b’filenames’
如果希望观察其具体内容及形式,可在下面代码中加个print(dict)
试试。
Step3:提前准备好存放图片的文件夹
类似这样↓
分为test和train两个文件夹,每个文件夹里各包含十个子文件夹,命名为1~10。
(养成文件名和文件夹名不含中文而且取怪一点的好习惯,否则可能会喜提ERROR)
Step4:文件转图片
把文件夹里的六个文件转化为图片↓
import numpy as np
import cv2
import matplotlib.pyplot as plt
def unpickle(file):#打开cifar-10文件的其中一个batch(一共5个batch)
import pickle
with open("cifar-10-batches-py/"+file, 'rb') as fo:
dict = pickle.load(fo, encoding='bytes')
return dict
#每解压一个文件都改一遍路径名,
data_batch=unpickle("data_batch_1")#打开cifar-10文件的data_batch_1
cifar_data=data_batch[b'data']#这里每个字典键的前面都要加上b
cifar_label=data_batch[b'labels']
cifar_data=np.array(cifar_data)#把字典的值转成array格式,方便操作
print(cifar_data.shape)#(10000,3072)
cifar_label=np.array(cifar_label)
print(cifar_label.shape)#(10000,)
label_name=['airplane','automobile','brid','cat','deer','dog','frog','horse','ship','truck']
# 将rgb矩阵转换为可显示图片并保存
def imwrite_images(k):#k的值可以选择1-10000范围内的值
for i in range(k):
image=cifar_data[i]
image=image.reshape(-1,1024)
r=image[0,:].reshape(32,32)#红色分量
g=image[1,:].reshape(32,32)#绿色分量
b=image[2,:].reshape(32,32)#蓝色分量
img=np.zeros((32,32,3))
#RGB还原成彩色图像
img[:,:,0]=r
img[:,:,1]=g
img[:,:,2]=b
#plt.imshow(img.astype("uint8"))
#plt.imshow(img/255) #plt.show()
print(i)
cv2.imwrite("picture//train//"+str(cifar_label[i])+"//"+str(label_name[cifar_label[i]])+"_"+str(i+40000)+".jpg",img)
print("第%d张图片保存完毕"%k)
imwrite_images(10000)
欧了.
2、AlexNet实现
Step1:训练模型
根据自己条件设置参数
我先尝试用的BATCH_SIZE=50
,好像有点小,得到五个模型,最高准确率是第五次53%。
后来BATCH_SIZE=1000时显示内存不足,就用的500。
用了四个小时,得到20个model
最后准确率稳定在79%左右,最高的是第十九次,结果已没保存忘了。
import torch
#import os
from torch import nn
from torch.nn import functional as F
#from torch.autograd import Variable
import matplotlib.pyplot as plt
from torchvision.datasets import ImageFolder
import torch.optim as optim
import torch.utils.data
#from PIL import Image
import torchvision.transforms as transforms
# 超参数设置
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
EPOCH = 20
BATCH_SIZE = 500
# 网络模型构建
class AlexNet(nn.Module):
def __init__(self, num_classes=10):
super(AlexNet, self).__init__()
self.features = nn.Sequential(
nn.Conv2d(3, 48, kernel_size=11),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(48, 128, kernel_size=5, padding=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(128, 192, kernel_size=3, stride=1, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(192, 192, kernel_size=3, stride=1, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(192, 128, kernel_size=3, stride=1, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
)
self.classifier = nn.Sequential(
nn.Linear(6 * 6 * 128, 2048),
nn.ReLU(inplace=True),
nn.Dropout(0.5),
nn.Linear(2048, 2048),
nn.ReLU(inplace=True),
nn.Dropout(),
nn.Linear(2048, num_classes),
)
def forward(self, x):
x = self.features(x)
x = torch.flatten(x, start_dim=1)
x = self.classifier(x)
return x
# 归一化,常用以下数据,均值和标准差
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
# 训练集
path_1 = r'D://code//5000//ML//picture//train//'
trans_1 = transforms.Compose([
transforms.Resize((65,65)),
transforms.ToTensor(),
normalize,
])
# 数据集
train_set = ImageFolder(root=path_1, transform=trans_1)
# 数据加载器
train_loader = torch.utils.data.DataLoader(train_set, batch_size=BATCH_SIZE,
shuffle=True, num_workers=0)
# 测试集
path_2 = r'D://code//5000//ML//picture//test//'
trans_2 = transforms.Compose([
transforms.Resize((65,65)),
transforms.ToTensor(),
normalize,
])
test_data = ImageFolder(root=path_2, transform=trans_2)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=BATCH_SIZE,
shuffle=True, num_workers=0)
# 验证集
path_3 = r'D://code//5000//ML//picture//test//'
valid_data = ImageFolder(root=path_2, transform=trans_2)
valid_loader = torch.utils.data.DataLoader(valid_data, batch_size=BATCH_SIZE,
shuffle=True, num_workers=0)
# 定义模型
model = AlexNet().to(DEVICE)
# 优化器的选择
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=0.0005) #传统梯度下降算法,lr学习率
# 训练过程
def train_model(model, device, train_loader, optimizer, epoch):
train_loss = 0
model.train()
for batch_index, (data, label) in enumerate(train_loader):
data, label = data.to(device), label.to(device)
optimizer.zero_grad()
output = model(data)
loss = F.cross_entropy(output, label)
loss.backward()
optimizer.step()
if batch_index % 300 == 0:
train_loss = loss.item()
print('Train Epoch:{}\ttrain loss:{:.6f}'.format(epoch, loss.item()))
return train_loss
# 测试部分的函数
def test_model(model, device, test_loader):
model.eval()
correct = 0.0
test_loss = 0.0
# 不需要梯度的记录
with torch.no_grad():
for data, label in test_loader:
data, label = data.to(device), label.to(device)
output = model(data)
test_loss += F.cross_entropy(output, label).item()
pred = output.argmax(dim=1)
correct += pred.eq(label.view_as(pred)).sum().item()
test_loss /= len(test_loader.dataset)
print('Test_average_loss:{:.4f},Accuracy:{:3f}\n'.format(
test_loss, 100 * correct / len(test_loader.dataset)
))
acc = 100 * correct / len(test_loader.dataset)
return test_loss, acc
# 训练开始
list = []
Train_Loss_list = []
Valid_Loss_list = []
Valid_Accuracy_list = []
# Epoc的调用
for epoch in range(6,26):
# 训练集训练
train_loss = train_model(model, DEVICE, train_loader, optimizer, epoch)
Train_Loss_list.append(train_loss)
torch.save(model, r'save_model\model%s.pth' % epoch)
# 验证集进行验证
test_loss, acc = test_model(model, DEVICE, valid_loader)
Valid_Loss_list.append(test_loss)
Valid_Accuracy_list.append(acc)
list.append(test_loss)
# 验证集的test_loss
min_num = min(list)
min_index = list.index(min_num)
print('model%s' % (min_index + 1))
print('验证集最高准确率: ')
print('{}'.format(Valid_Accuracy_list[min_index]))
# 取最好的进入测试集进行测试
model = torch.load(r'save_model\model%s.pth' % (min_index + 1))
model.eval()
accuracy = test_model(model, DEVICE, test_loader)
print('测试集准确率')
print('{}%'.format(accuracy))
# 绘图
# 字体设置,字符显示
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
# 坐标轴变量含义
x1 = range(0, EPOCH)
y1 = Train_Loss_list
y2 = Valid_Loss_list
y3 = Valid_Accuracy_list
# 图表位置
plt.subplot(221)
# 线条
plt.plot(x1, y1, '-o')
# 坐标轴批注
plt.ylabel('训练集损失')
plt.xlabel('轮数')
plt.subplot(222)
plt.plot(x1, y2, '-o')
plt.ylabel('验证集损失')
plt.xlabel('轮数')
plt.subplot(212)
plt.plot(x1, y3, '-o')
plt.ylabel('验证集准确率')
plt.xlabel('轮数')
# 显示
plt.show()
Step2:pth转onnx
将准确率最高的model24.pth
转化为model24.onnx
import torch
import torch.nn as nn
import torchvision
class AlexNet(nn.Module):
def __init__(self, num_classes=10):
super(AlexNet, self).__init__()
self.features = nn.Sequential(
nn.Conv2d(3, 48, kernel_size=11),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(48, 128, kernel_size=5, padding=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(128, 192, kernel_size=3, stride=1, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(192, 192, kernel_size=3, stride=1, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(192, 128, kernel_size=3, stride=1, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
)
self.classifier = nn.Sequential(
nn.Linear(6 * 6 * 128, 2048),
nn.ReLU(inplace=True),
nn.Dropout(0.5),
nn.Linear(2048, 2048),
nn.ReLU(inplace=True),
nn.Dropout(),
nn.Linear(2048, num_classes),
)
def forward(self, x):
x = self.features(x)
x = torch.flatten(x, start_dim=1)
x = self.classifier(x)
return x
dummy_input = torch.randn(1, 3, 65,65)
model = torch.load(r'D:\code\5000\Alexnet\save_model\model24.pth',map_location='cpu')
model.eval()
input_names = ["input"]
output_names = ["output"]
torch.onnx.export(model,
dummy_input,
"model24.onnx",
verbose=True,
input_names=input_names,
output_names=output_names)
Step3:调用onnx预测
Example 1
尝试输入一张图片进行预测,随便选用的airplane_44.jpg
import cv2 as cv
import numpy as np
def img_process(image):
mean = np.array([0.5,0.5,0.5],dtype=np.float32).reshape(1,1,3)
std = np.array([0.5,0.5,0.5],dtype=np.float32).reshape(1,1,3)
new_img = ((image/255. -mean)/std).astype(np.float32)
return new_img
img = cv.imread("D://code//5000//ML//picture//test//0//airplane_44.jpg")
cv.imshow("img",img)
img_t = cv.resize(img,(65,65)) #将图片改为模型适用的尺寸
img_t = img_process(img_t)
#img_t = np.transpose(img_t,[2,0,1])
#img_t = img_t[np.newaxis,:] #扩展一个新维度
layerNames = ["output"] # 这里的输出的名称应该于前面的转模型时候定义的一致
blob=cv.dnn.blobFromImage(img_t,scalefactor=1.0,swapRB=True,crop=False) # 将image转化为 1x3x64x64 格式输入模型中
net = cv.dnn.readNetFromONNX("model24.onnx")
net.setInput(blob)
outs = net.forward(layerNames)
print(outs)
输出
C:\Users\acer\.conda\envs\pytorch\python.exe D:/code/5000/Alexnet/predict.py
(array([[ 4.9695697 , -2.0295908 , 1.7662119 , -1.8614476 , 1.3672656 ,
-3.35308 , -1.7841126 , -1.9462793 , 2.6365623 , 0.49384755]],
dtype=float32),)
进程已结束,退出代码0
最大的就是第一个,标签为0,就对了嘛。
Example 2
再换成5号文件夹里的dog_9997.jpg
得到
C:\Users\acer\.conda\envs\pytorch\python.exe D:/code/5000/Alexnet/predict.py
(array([[-1.675911 , -3.8933187 , 3.03588 , 2.0604815 , 0.62322545,
4.3882637 , -0.34412646, 1.3543239 , -2.5193105 , -2.8624332 ]],
dtype=float32),)
进程已结束,退出代码0
第六个最大,标签为5,欧了。
至少说明模型能用,至于为什么准确率只有百分之八十……有待探究。