import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.utils.data as Data
import matplotlib.pyplot as plt
# Hyper Parameters
EOPCH = 20 # 几个世纪
BATCH_SIZE = 50
# LR = 0.01 # learning rate
# LR_list = [0.001*(i) for i in range(1,15)] # 搜索最佳学习率
LR_list = [0.006]
# 加载数据集
train= pd.read_csv('../input//train.csv')
train_labels__ = torch.from_numpy(np.array(train.label[:]))
train_data__ = torch.FloatTensor(np.array(train.iloc[:,1:]).reshape((-1,1,28,28)))/255
# !!! CUP 计算的结点不可以是LONG型的
print(train_data__.type())
test= pd.read_csv('../input/test.csv')
test_data = torch.FloatTensor(np.array(test).reshape((-1,1,28,28)))/255
train_data = Data.TensorDataset(train_data__,train_labels__)
train_loader = Data.DataLoader(
dataset=train_data,
batch_size=BATCH_SIZE,
shuffle=True
)
'''
# plot one example
plt.imshow(train_data__[1].squeeze().numpy(),cmap='gray')
plt.title('%i' % train_labels__[1])
plt.show()
'''
print('load data is over')
# start build CNN
class CNN(nn.Module):
def __init__(self):
super(CNN, self).__init__()
# 一个卷积层
self.conv1 = nn.Sequential(
nn.Conv2d( # (1,28,28)
in_channels=1, # 1个通道
out_channels=16, # 输出多少层 也就是有多少个过滤器
kernel_size=5, # 过滤器的大小
stride=1, # 步长
padding=2 # 填白
), # --> (16,28,28)
nn.ReLU(),
nn.MaxPool2d(kernel_size=2), # -->(16,14,14)
)
self.conv2 = nn.Sequential( # --> (16,14,14)
nn.Conv2d(16,32,5,1,2), # 这里用了两个过滤器,将将16层变成了32层
nn.ReLU(),
nn.MaxPool2d(kernel_size=2) # --> (32,7,7)
)
self.out = nn.Linear(32*7*7,10) # 将三维的数据展为2维的数据
def forward(self,x):
x = self.conv1(x)
x = self.conv2(x) # (batch,32,7,7)
x = x.view(x.size(0),-1) # (batch,32,7,7) batch 就是一批
output = self.out(x)
return output
cnn = CNN()
loss_func = nn.CrossEntropyLoss() # define loss_function 交叉嫡误差
optimzer = torch.optim.Adam(cnn.parameters(),lr=LR) # define optimezer
# training and testing
score_list = list()
for LR in LR_list:
score = list()
print('当前的学习率为:',LR)
for epoch in range(EOPCH):
print('epoch is :',epoch+1)
# optimzer = torch.optim.Adam(cnn.parameters(),lr=LR) # define optimezer for seek best learning rate
for step,(x,y) in enumerate(train_loader): # gives batch data
b_x = Variable(x)
b_y = Variable(y)
# 将数据分成训练集,和测试集 如果注释是第一行,那末就是寻找最优学习率
if (step+1) <= len(train_loader)*1.1:
# if (step+1) <= len(train_loader)*0.7:
output = cnn(b_x) # cnn output
loss = loss_func(output,b_y) # cross entropy loss
# update W
optimzer.zero_grad()
loss.backward()
optimzer.step()
else:
if (epoch+1) == EOPCH:
output = cnn(b_x)
pred_y = torch.max(output,1)[1].squeeze()
for i in pred_y:
if pred_y[i] == b_y[i]:
score.append(1)
else:
score.append(0)
# 寻找最佳学习率用的
# print('我的得分是:',sum(score)/len(score),'LR:',LR)
# score_list.append(sum(score)/len(score))
# print('train is over')
# plt.plot(LR_list,score_list,color='r')
# plt.show()
# MAX_SCORE = max(score_list)
# MAX_LR = LR_list[score_list.index(max(score_list))]
# print('最高的分数是:',MAX_SCORE)
# print('最好的学习率是:',MAX_LR)
# show prediction
'''
# print 10 predictions from test data
test_output = cnn(train_data__[:10])
# torch.max 返回的第一个是值,第二个是索引
pred_y = torch.max(test_output,1)[1].squeeze()
print('prediction number\n',pred_y)
print('real number\n',train_labels__[:10])
'''
# 得出结果
test = cnn(test_data[:])
pred_test = torch.max(test,1)[1].squeeze()
out = pd.DataFrame(np.array(pred_test),index=range(1,1+len(pred_test)))
out.to_csv('./kaggle.csv',header=None)
print("结束了")
手写数字全部代码
猜你喜欢
转载自www.cnblogs.com/liu247/p/11298063.html
今日推荐
周排行