【Kaggle-MNIST之路】自定义程序结构(七)

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/a19990412/article/details/84302885

简述

这一篇跟这个系列的其他文章不一样,这个是重新安排下程序结构

  • 结构如下:
    在这里插入图片描述

其中model这个模型专门放模型就好了

import torch.nn as nn


class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.layer1 = nn.Sequential(
            # (1, 28, 28)
            nn.Conv2d(
                in_channels=1,
                out_channels=32,
                kernel_size=3,  # 卷积filter, 移动块长
                stride=1,  # filter的每次移动步长
            ),
            nn.ReLU(),
            nn.BatchNorm2d(32),
            nn.Conv2d(
                in_channels=32,
                out_channels=32,
                kernel_size=3,  # 卷积filter, 移动块长
                stride=1,  # filter的每次移动步长
            ),
            nn.ReLU(),
            nn.BatchNorm2d(32),
            nn.Conv2d(
                in_channels=32,
                out_channels=32,
                kernel_size=5,  # 卷积filter, 移动块长
                stride=2,  # filter的每次移动步长
                padding=2,
            ),
            nn.ReLU(),
            nn.BatchNorm2d(32),
            nn.Dropout(0.4),
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(
                in_channels=32,
                out_channels=64,
                kernel_size=3,  # 卷积filter, 移动块长
                stride=1,  # filter的每次移动步长
            ),
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.Conv2d(
                in_channels=64,
                out_channels=64,
                kernel_size=3,  # 卷积filter, 移动块长
                stride=1,  # filter的每次移动步长
            ),
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.Conv2d(
                in_channels=64,
                out_channels=64,
                kernel_size=5,  # 卷积filter, 移动块长
                stride=2,  # filter的每次移动步长
                padding=2,
            ),
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.Dropout(0.4),
        )
        self.layer3 = nn.Sequential(
            nn.Conv2d(
                in_channels=64,
                out_channels=128,
                kernel_size=4,  # 卷积filter, 移动块长
                stride=1,  # filter的每次移动步长
            ),
            nn.ReLU(),
            nn.BatchNorm2d(128),
        )
        self.layer4 = nn.Linear(128, 10)

    def forward(self, x):
        # print(x.shape)
        x = self.layer1(x)
        # print(x.shape)
        x = self.layer2(x)
        x = self.layer3(x)
        x = x.view(x.size(0), -1)
        x = self.layer4(x)
        return x
  • 生成模型文件的文件

  • Pycharm会报警说model这个库不存在,这个很麻烦,但是也不影响使用。等没事的时候,再研究下,如何改成不会报警的那种。目前虽然报警,但是不影响运行

  • Kaggle-MNIST-classify.py

import pandas as pd
import torch.utils.data as data
import torch
import torch.nn as nn
from model.CNN import CNN

file = './all/train.csv'
LR = 0.01


class MNISTCSVDataset(data.Dataset):

    def __init__(self, csv_file, Train=True):
        self.dataframe = pd.read_csv(csv_file, iterator=True)
        self.Train = Train

    def __len__(self):
        if self.Train:
            return 42000
        else:
            return 28000

    def __getitem__(self, idx):
        data = self.dataframe.get_chunk(100)
        ylabel = data['label'].as_matrix().astype('float')
        xdata = data.ix[:, 1:].as_matrix().astype('float')
        return ylabel, xdata


net = CNN()
loss_function = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=LR)
EPOCH = 10
for epoch in range(EPOCH):
    mydataset = MNISTCSVDataset(file)
    train_loader = torch.utils.data.DataLoader(mydataset, batch_size=1, shuffle=True)
    print('epoch %d' % epoch)
    for step, (yl, xd) in enumerate(train_loader):
        xd = xd.reshape(100, 1, 28, 28).float()
        output = net(xd)
        yl = yl.long()
        loss = loss_function(output, yl.squeeze())
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if step % 40 == 0:
            print('step %d' % step, loss)

torch.save(net, 'divided-net.pkl')
import torch
import torch.utils.data as data
import pandas as pd
import csv
from model.CNN import CNN

file = './all/test.csv'


class MNISTCSVDataset(data.Dataset):

    def __init__(self, csv_file, Train=False):
        self.dataframe = pd.read_csv(csv_file, iterator=True)
        self.Train = Train

    def __len__(self):
        if self.Train:
            return 42000
        else:
            return 28000

    def __getitem__(self, idx):
        data = self.dataframe.get_chunk(100)
        xdata = data.as_matrix().astype('float')
        return xdata


net = torch.load('divided-net.pkl')

myMnist = MNISTCSVDataset(file)
test_loader = torch.utils.data.DataLoader(myMnist, batch_size=1, shuffle=False)

values = []
for _, xd in enumerate(test_loader):
    xd = xd.reshape(100, 1, 28, 28).float()
    output = net(xd)
    values = values + output.argmax(dim=1).numpy().tolist()

with open('./all/sample_submission.csv', 'r') as fp_in, open('newfile.csv', 'w', newline='') as fp_out:
    reader = csv.reader(fp_in)
    writer = csv.writer(fp_out)
    for i, row in enumerate(reader):
        if i == 0:
            writer.writerow(row)
        else:
            row[-1] = str(values[i - 1])
            writer.writerow(row)

这个就是这个文件的架构啦~

猜你喜欢

转载自blog.csdn.net/a19990412/article/details/84302885