本文已参与「新人创作礼」活动,一起开启掘金创作之路。
前言
刚开始学习LSTM的一些理解
torch版本
print(torch.__version__)
1.10.2
原数据下载
raw.githubusercontent.com/jbrownlee/D… 注意:下载好数据后我手动把第一列的月份全都换成1-12的循环了(如下图),这个案例中原文件不能直接用
代码步步解析
加载包们
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import torch
import torch.nn as nn
from sklearn.preprocessing import MinMaxScaler
原始数据导入与绘制
# csv文件放在程序同一个文件夹下
# pd.read_csv()返回一个Dataframe对象
# 对于Dataframe对象可以使用.shape查看尺寸
training_set = pd.read_csv('airline-passengers.csv')
# iloc通过行号列号来取数据,[:, 1]代表行全取,取第二列
training_set.iloc[:, 1].plot()
# .values将Dataframe转换为ndarray类型,用于后续
# print(training_set.shape)可查看尺寸,为(144, 2)
training_set = training_set.values
CSV相关知识 Dataframe相关知识 .iloc相关知识 .values相关知识
数据处理
定义滑窗
滑窗用于数据集选取
# 根据几个参数对set进行数据处理
def sliding_windows(set, seq_length, predict_len, batch_size):
x = []
y = []
# step:滑窗的步长,尽量小点,可以直接设为1或2
# //:商向下取整
# step = (len(set)-predict_len-seq_length)//batch_size
step = 1
# 每一个batch的输入(x)与输出(y)
for i in range(batch_size):
# 在这里可以分别确定x和y的维度
# 列全选,意味着我的输入(x)维度为2,输出维度也是2
# x和y维度不一定要一致,根据需求来
_x = set[i*step:(i*step+seq_length), :]
_y = set[i*step+seq_length+predict_len, :]
x.append(_x)
y.append(_y)
return np.array(x), np.array(y)
seq_length为每单个训练sample取点个数 predict_len为预测多久以后的数据 batch_size为batch的数量
数据归一化与训练数据划分
# MinMaxScaler对training_set进行数据归一化(对每列分别归一)
sc = MinMaxScaler()
training_data = sc.fit_transform(training_set[:, :])
seq_length = 4
predict_len = 1
batch_size = 128
x, y = sliding_windows(training_data, seq_length, predict_len, batch_size)
# 67%用于训练,其余用于测试
train_size = int(len(y) * 0.67)
test_size = len(y) - train_size
# 将数据转化为Tensor对象
dataX = torch.Tensor(np.array(x))
dataY = torch.Tensor(np.array(y))
trainX = torch.Tensor(np.array(x[0:train_size]))
trainY = torch.Tensor(np.array(y[0:train_size]))
testX = torch.Tensor(np.array(x[train_size:len(x)]))
testY = torch.Tensor(np.array(y[train_size:len(y)]))
print(x.shape)
print(y.shape)
print(trainX.size())
print(trainY.size())
(128, 4, 2)
(128, 2) torch.Size([85, 4, 2]) torch.Size([85, 2])
训练数据集中 batch_size为85 seq_length为4 input_size为2
搭建LSTM
class LSTM(nn.Module):
def __init__(self, input_size, hidden_size, num_classes, num_layers, bidirectional=False):
super(LSTM, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.num_classes = num_classes
self.num_layers = num_layers
self.bidirectional = bidirectional
self.lstm = nn.LSTM(input_size=self.input_size, hidden_size=self.hidden_size, batch_first=True,
num_layers=self.num_layers, bidirectional=self.bidirectional)
if self.bidirectional:
self.fc = nn.Linear(hidden_size*2, num_classes)
else:
self.fc = nn.Linear(hidden_size, num_classes)
def forward(self, x):
if self.bidirectional:
h0 = torch.randn(self.num_layers*2, x.size(0), self.hidden_size)
c0 = torch.randn(self.num_layers*2, x.size(0), self.hidden_size)
else:
h0 = torch.randn(self.num_layers, x.size(0), self.hidden_size)
c0 = torch.randn(self.num_layers, x.size(0), self.hidden_size)
out, (_, _) = self.lstm(x, (h0, c0))
# 因为out有seq_length(4)个时态,所以取最后一个时态
output = torch.squeeze(out[:, -1, :])
output = self.fc(output)
return output
大部分结构可以通过LSTM官网进行了解
num_classes代表y的维度,本案例中y的维度是2 网络中fc全连接层的作用就是把(squeeze后的)output: (85, 16)的维度匹配成trainY: (85, 2)的维度,这样一来,最终return的output的尺寸就和trainY一致了
训练
# 参数设置
num_epochs = 2000
learning_rate = 0.01
input_size = 2
hidden_size = 16
num_layers = 1
num_classes = 2
lstm = LSTM(input_size, hidden_size, num_classes, num_layers, bidirectional=False)
# 选择损失函数和优化器
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(lstm.parameters(), lr=learning_rate)
# 开始训练
for epoch in range(num_epochs):
outputs = lstm(trainX)
# 因为优化器是累加梯度,所以每次要清零
optimizer.zero_grad()
# 计算损失
loss = criterion(outputs, trainY)
# 反向传播,损失函数计算关于权重的梯度,并将结果储存在权重的.grad中
loss.backward()
# 优化器更新lstm权重参数
optimizer.step()
if epoch % 100 == 0:
print("Epoch: %d, loss: %1.5f" % (epoch, loss.item()))
Epoch: 0, loss: 0.11872
Epoch: 100, loss: 0.03787 Epoch: 200, loss: 0.00968 Epoch: 300, loss: 0.00569 Epoch: 400, loss: 0.00100 Epoch: 500, loss: 0.00041 Epoch: 600, loss: 0.00027 Epoch: 700, loss: 0.00025 Epoch: 800, loss: 0.00023 Epoch: 900, loss: 0.00019 Epoch: 1000, loss: 0.00019 Epoch: 1100, loss: 0.00019 Epoch: 1200, loss: 0.00020 Epoch: 1300, loss: 0.00016 Epoch: 1400, loss: 0.00016 Epoch: 1500, loss: 0.00018 Epoch: 1600, loss: 0.00015 Epoch: 1700, loss: 0.00016 Epoch: 1800, loss: 0.00015 Epoch: 1900, loss: 0.00014
测试
# 将模型切换为测试状态,用训练好的权重去预测
lstm.eval()
train_predict = lstm(dataX)
data_predict = train_predict.data.numpy()
dataY_plot = dataY.data.numpy()
# 之前的归一化,恢复到原始范围
data_predict = sc.inverse_transform(data_predict)
dataY_plot = sc.inverse_transform(dataY_plot)
# 取第二列用于画图
dataY_plot_1 = dataY_plot[:, 1:]
data_predict_1 = data_predict[:, 1:]
plt.axvline(x=train_size, c='r', linestyle='--')
plt.plot(dataY_plot_1)
plt.plot(data_predict_1)
plt.suptitle('Time-Series Prediction')
plt.show()
结语
目前对LSTM函数中的全连接层仍有疑惑 更多的参考资料在我的收藏里