图神经网络(五)基于GNN的图表示学习(4)基于图自编码器的推荐系统实战代码
基于图自编码器的推荐系统实战代码
代码说明
本次实验是在 Pycharm 上完成,将以下Python文件代码导入即可,如下图所示,然后直接运行即可。
autoencoder.py
import torch
import torch.nn as nn
import torch.nn.functional as F
import scipy.sparse as sp
import numpy as np
import torch.nn.init as init
class InputDropout(nn.Module):
def __init__(self, keep_prob):
super(InputDropout, self).__init__()
self.p = keep_prob
def forward(self, inputs):
x = inputs.clone()
if self.training:
random_tensor = self.p + torch.rand((inputs.size(0),))
dropout_mask = torch.floor(random_tensor).bool()
x[~dropout_mask] = 0.
return x / self.p
else:
return x
class StackGCNEncoder(nn.Module):
def __init__(self, input_dim, output_dim, num_support,
dropout=0.,
use_bias=False, activation=F.relu):
"""对得到的每类评分使用级联的方式进行聚合
Args:
----
input_dim (int): 输入的特征维度
output_dim (int): 输出的特征维度,需要output_dim % num_support = 0
num_support (int): 评分的类别数,比如1~5分,值为5
use_bias (bool, optional): 是否使用偏置. Defaults to False.
activation (optional): 激活函数. Defaults to F.relu.
"""
super(StackGCNEncoder, self).__init__()
self.input_dim = input_dim
self.output_dim = output_dim
self.num_support = num_support
self.dropout = dropout
self.use_bias = use_bias
self.activation = activation
assert output_dim % num_support == 0
self.weight = nn.Parameter(torch.Tensor(input_dim, output_dim))
if self.use_bias:
self.bias_user = nn.Parameter(torch.Tensor(output_dim, ))
self.bias_item = nn.Parameter(torch.Tensor(output_dim, ))
self.dropout = InputDropout(1 - dropout)
self.reset_parameters()
def reset_parameters(self):
init.xavier_uniform_(self.weight)
if self.use_bias:
init.zeros_(self.bias_user)
init.zeros_(self.bias_item)
def forward(self, user_supports, item_supports, user_inputs, item_inputs):
"""StackGCNEncoder计算逻辑
Args:
user_supports (list of torch.sparse.FloatTensor):
归一化后每个评分等级对应的用户与商品邻接矩阵
item_supports (list of torch.sparse.FloatTensor):
归一化后每个评分等级对应的商品与用户邻接矩阵
user_inputs (torch.Tensor): 用户特征的输入
item_inputs (torch.Tensor): 商品特征的输入
Returns:
[torch.Tensor]: 用户的隐层特征
[torch.Tensor]: 商品的隐层特征
"""
assert len(user_supports) == len(item_supports) == self.num_support
user_inputs = self.dropout(user_inputs)
item_inputs = self.dropout(item_inputs)
user_hidden = []
item_hidden = []
weights = torch.split(self.weight, self.output_dim//self.num_support, dim=1)
for i in range(self.num_support):
tmp_u = torch.matmul(user_inputs, weights[i])
tmp_v = torch.matmul(item_inputs, weights[i])
tmp_user_hidden = torch.sparse.mm(user_supports[i], tmp_v)
tmp_item_hidden = torch.sparse.mm(item_supports[i], tmp_u)
user_hidden.append(tmp_user_hidden)
item_hidden.append(tmp_item_hidden)
user_hidden = torch.cat(user_hidden, dim=1)
item_hidden = torch.cat(item_hidden, dim=1)
user_outputs = self.activation(user_hidden)
item_outputs = self.activation(item_hidden)
if self.use_bias:
user_outputs += self.bias_user
item_outputs += self.bias_item
return user_outputs, item_outputs
class SumGCNEncoder(nn.Module):
def __init__(self, input_dim, output_dim, num_support,
dropout=0.,
use_bias=False, activation=F.relu):
"""对得到的每类评分使用求和的方式进行聚合
Args:
input_dim (int): 输入的特征维度
output_dim (int): 输出的特征维度,需要output_dim % num_support = 0
num_support (int): 评分的类别数,比如1~5分,值为5
use_bias (bool, optional): 是否使用偏置. Defaults to False.
activation (optional): 激活函数. Defaults to F.relu.
"""
super(SumGCNEncoder, self).__init__()
self.input_dim = input_dim
self.output_dim = output_dim
self.num_support = num_support
self.use_bias = use_bias
self.activation = activation
self.weight = nn.Parameter(torch.Tensor(
input_dim, output_dim * num_support))
if self.use_bias:
self.bias_user = nn.Parameter(torch.Tensor(output_dim, ))
self.bias_item = nn.Parameter(torch.Tensor(output_dim, ))
self.dropout = InputDropout(1 - dropout)
self.reset_parameters()
def reset_parameters(self):
init.xavier_uniform_(self.weight)
if self.use_bias:
init.zeros_(self.bias_user)
init.zeros_(self.bias_item)
def forward(self, user_supports, item_supports, user_inputs, item_inputs):
"""SumGCNEncoder计算逻辑
Args:
user_supports (list of torch.sparse.FloatTensor):
归一化后每个评分等级对应的用户与商品邻接矩阵
item_supports (list of torch.sparse.FloatTensor):
归一化后每个评分等级对应的商品与用户邻接矩阵
user_inputs (torch.Tensor): 用户特征的输入
item_inputs (torch.Tensor): 商品特征的输入
Returns:
[torch.Tensor]: 用户的隐层特征
[torch.Tensor]: 商品的隐层特征
"""
assert len(user_supports) == len(item_supports) == self.num_support
user_inputs = self.dropout(user_inputs)
item_inputs = self.dropout(item_inputs)
user_hidden = []
item_hidden = []
weights = torch.split(self.weight, self.output_dim, dim=1)
for i in range(self.num_support):
w = sum(weights[:(i + 1)])
tmp_u = torch.matmul(user_inputs, w)
tmp_v = torch.matmul(item_inputs, w)
tmp_user_hidden = torch.sparse.mm(user_supports[i], tmp_v)
tmp_item_hidden = torch.sparse.mm(item_supports[i], tmp_u)
user_hidden.append(tmp_user_hidden)
item_hidden.append(tmp_item_hidden)
user_hidden, item_hidden = sum(user_hidden), sum(item_hidden)
user_outputs = self.activation(user_hidden)
item_outputs = self.activation(item_hidden)
if self.use_bias:
user_outputs += self.bias_user
item_outputs += self.bias_item
return user_outputs, item_outputs
class FullyConnected(nn.Module):
def __init__(self, input_dim, output_dim, dropout=0.,
use_bias=False, activation=F.relu,
share_weights=False):
"""非线性变换层
Args:
----
input_dim (int): 输入的特征维度
output_dim (int): 输出的特征维度,需要output_dim % num_support = 0
use_bias (bool, optional): 是否使用偏置. Defaults to False.
activation (optional): 激活函数. Defaults to F.relu.
share_weights (bool, optional): 用户和商品是否共享变换权值. Defaults to False.
"""
super(FullyConnected, self).__init__()
self.input_dim = input_dim
self.output_dim = output_dim
self.use_bias = use_bias
self.activation = activation
self.share_weights = share_weights
if not share_weights:
self.weights_u = nn.Parameter(torch.Tensor(input_dim, output_dim))
self.weights_v = nn.Parameter(torch.Tensor(input_dim, output_dim))
if use_bias:
self.user_bias = nn.Parameter(torch.Tensor(output_dim))
self.item_bias = nn.Parameter(torch.Tensor(output_dim))
else:
self.weights_u = nn.Parameter(torch.Tensor(input_dim, output_dim))
self.weights_v = self.weights_u
if use_bias:
self.user_bias = nn.Parameter(torch.Tensor(output_dim))
self.item_bias = self.user_bias
self.dropout = nn.Dropout(dropout)
self.reset_parameters()
def reset_parameters(self):
if not self.share_weights:
init.xavier_uniform_(self.weights_u)
init.xavier_uniform_(self.weights_v)
if self.use_bias:
init.normal_(self.user_bias, std=0.5)
init.normal_(self.item_bias, std=0.5)
else:
init.xavier_uniform_(self.weights_u)
if self.use_bias:
init.normal_(self.user_bias, std=0.5)
def forward(self, user_inputs, item_inputs):
"""前向传播
Args:
user_inputs (torch.Tensor): 输入的用户特征
item_inputs (torch.Tensor): 输入的商品特征
Returns:
[torch.Tensor]: 输出的用户特征
[torch.Tensor]: 输出的商品特征
"""
x_u = self.dropout(user_inputs)
x_u = torch.matmul(x_u, self.weights_u)
x_v = self.dropout(item_inputs)
x_v = torch.matmul(x_v, self.weights_v)
u_outputs = self.activation(x_u)
v_outputs = self.activation(x_v)
if self.use_bias:
u_outputs += self.user_bias
v_outputs += self.item_bias
return u_outputs, v_outputs
class Decoder(nn.Module):
def __init__(self, input_dim, num_weights, num_classes, dropout=0., activation=F.relu):
"""解码器
Args:
----
input_dim (int): 输入的特征维度
num_weights (int): basis weight number
num_classes (int): 总共的评分级别数,eg. 5
"""
super(Decoder, self).__init__()
self.input_dim = input_dim
self.num_weights = num_weights
self.num_classes = num_classes
self.activation = activation
self.weight = nn.ParameterList([nn.Parameter(torch.Tensor(input_dim, input_dim))
for _ in range(num_weights)])
self.weight_classifier = nn.Parameter(torch.Tensor(num_weights, num_classes))
self.dropout = nn.Dropout(dropout)
self.reset_parameters()
def reset_parameters(self):
for i in range(len(self.weight)):
init.orthogonal_(self.weight[i], gain=1.1)
init.xavier_uniform_(self.weight_classifier)
def forward(self, user_inputs, item_inputs, user_indices, item_indices):
"""计算非归一化的分类输出
Args:
user_inputs (torch.Tensor): 用户的隐层特征
item_inputs (torch.Tensor): 商品的隐层特征
user_indices (torch.LongTensor):
所有交互行为中用户的id索引,与对应的item_indices构成一条边,shape=(num_edges, )
item_indices (torch.LongTensor):
所有交互行为中商品的id索引,与对应的user_indices构成一条边,shape=(num_edges, )
Returns:
[torch.Tensor]: 未归一化的分类输出,shape=(num_edges, num_classes)
"""
user_inputs = self.dropout(user_inputs)
item_inputs = self.dropout(item_inputs)
user_inputs = user_inputs[user_indices]
item_inputs = item_inputs[item_indices]
basis_outputs = []
for i in range(self.num_weights):
tmp = torch.matmul(user_inputs, self.weight[i])
out = torch.sum(tmp * item_inputs, dim=1, keepdim=True)
basis_outputs.append(out)
basis_outputs = torch.cat(basis_outputs, dim=1)
outputs = torch.matmul(basis_outputs, self.weight_classifier)
outputs = self.activation(outputs)
return outputs
dataset.py
import os
import urllib.request
from zipfile import ZipFile
from io import StringIO
import numpy as np
import pandas as pd
import scipy.sparse as sp
def globally_normalize_bipartite_adjacency(adjacencies, symmetric=True):
""" Globally Normalizes set of bipartite adjacency matrices """
print('{} normalizing bipartite adj'.format(
['Asymmetrically', 'Symmetrically'][symmetric]))
adj_tot = np.sum([adj for adj in adjacencies])
degree_u = np.asarray(adj_tot.sum(1)).flatten()
degree_v = np.asarray(adj_tot.sum(0)).flatten()
# set zeros to inf to avoid dividing by zero
degree_u[degree_u == 0.] = np.inf
degree_v[degree_v == 0.] = np.inf
degree_u_inv_sqrt = 1. / np.sqrt(degree_u)
degree_v_inv_sqrt = 1. / np.sqrt(degree_v)
degree_u_inv_sqrt_mat = sp.diags([degree_u_inv_sqrt], [0])
degree_v_inv_sqrt_mat = sp.diags([degree_v_inv_sqrt], [0])
degree_u_inv = degree_u_inv_sqrt_mat.dot(degree_u_inv_sqrt_mat)
if symmetric:
adj_norm = [degree_u_inv_sqrt_mat.dot(adj).dot(
degree_v_inv_sqrt_mat) for adj in adjacencies]
else:
adj_norm = [degree_u_inv.dot(adj) for adj in adjacencies]
return adj_norm
def get_adjacency(edge_df, num_user, num_movie, symmetric_normalization):
user2movie_adjacencies = []
movie2user_adjacencies = []
train_edge_df = edge_df.loc[edge_df['usage'] == 'train']
for i in range(5):
edge_index = train_edge_df.loc[train_edge_df.ratings == i, [
'user_node_id', 'movie_node_id']].to_numpy()
support = sp.csr_matrix((np.ones(len(edge_index)), (edge_index[:, 0], edge_index[:, 1])),
shape=(num_user, num_movie), dtype=np.float32)
user2movie_adjacencies.append(support)
movie2user_adjacencies.append(support.T)
user2movie_adjacencies = globally_normalize_bipartite_adjacency(user2movie_adjacencies,
symmetric=symmetric_normalization)
movie2user_adjacencies = globally_normalize_bipartite_adjacency(movie2user_adjacencies,
symmetric=symmetric_normalization)
return user2movie_adjacencies, movie2user_adjacencies
def get_node_identity_feature(num_user, num_movie):
"""one-hot encoding for nodes"""
identity_feature = np.identity(num_user + num_movie, dtype=np.float32)
user_identity_feature, movie_indentity_feature = identity_feature[
:num_user], identity_feature[num_user:]
return user_identity_feature, movie_indentity_feature
def get_user_side_feature(node_user: pd.DataFrame):
"""用户节点属性特征,包括年龄,性别,职业"""
age = node_user['age'].to_numpy().astype('float32')
age /= age.max()
age = age.reshape((-1, 1))
gender_arr, gender_index = pd.factorize(node_user['gender'])
gender_arr = np.reshape(gender_arr, (-1, 1))
occupation_arr = pd.get_dummies(node_user['occupation']).to_numpy()
user_feature = np.concatenate([age, gender_arr, occupation_arr], axis=1)
return user_feature
def get_movie_side_feature(node_movie: pd.DataFrame):
"""电影节点属性特征,主要是电影类型"""
movie_genre_cols = ['Action', 'Adventure', 'Animation',
'Childrens', 'Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy',
'Film-Noir', 'Horror', 'Musical', 'Mystery', 'Romance', 'Sci-Fi',
'Thriller', 'War', 'Western']
movie_genre_arr = node_movie.loc[:,
movie_genre_cols].to_numpy().astype('float32')
return movie_genre_arr
def convert_to_homogeneous(user_feature: np.ndarray, movie_feature: np.ndarray):
"""通过补零将用户和电影的属性特征对齐到同一维度"""
num_user, user_feature_dim = user_feature.shape
num_movie, movie_feature_dim = movie_feature.shape
user_feature = np.concatenate(
[user_feature, np.zeros((num_user, movie_feature_dim))], axis=1)
movie_feature = np.concatenate(
[np.zeros((num_movie, user_feature_dim)), movie_feature], axis=1)
return user_feature, movie_feature
def normalize_feature(feature):
row_sum = feature.sum(1)
row_sum[row_sum == 0] = np.inf
normalized_feat = feature / row_sum.reshape(-1, 1)
return normalized_feat
class MovielensDataset(object):
url = "http://files.grouplens.org/datasets/movielens/ml-100k.zip"
def __init__(self, data_root="data"):
self.data_root = data_root
self.maybe_download()
@staticmethod
def build_graph(edge_df: pd.DataFrame, user_df: pd.DataFrame,
movie_df: pd.DataFrame, symmetric_normalization=False):
node_user = edge_df[['user_node']
].drop_duplicates().sort_values('user_node')
node_movie = edge_df[['movie_node']
].drop_duplicates().sort_values('movie_node')
node_user.loc[:, 'user_node_id'] = range(len(node_user))
node_movie.loc[:, 'movie_node_id'] = range(len(node_movie))
edge_df = edge_df.merge(node_user, on='user_node', how='left')\
.merge(node_movie, on='movie_node', how='left')
node_user = node_user.merge(user_df, on='user_node', how='left')
node_movie = node_movie.merge(movie_df, on='movie_node', how='left')
num_user = len(node_user)
num_movie = len(node_movie)
# adjacency
user2movie_adjacencies, movie2user_adjacencies = get_adjacency(edge_df, num_user, num_movie,
symmetric_normalization)
# node property feature
user_side_feature = get_user_side_feature(node_user)
movie_side_feature = get_movie_side_feature(node_movie)
user_side_feature = normalize_feature(user_side_feature)
movie_side_feature = normalize_feature(movie_side_feature)
user_side_feature, movie_side_feature = convert_to_homogeneous(user_side_feature,
movie_side_feature)
# one-hot encoding for nodes
user_identity_feature, movie_indentity_feature = get_node_identity_feature(
num_user, num_movie)
# user_indices, movie_indices, labels, train_mask
user_indices, movie_indices, labels = edge_df[[
'user_node_id', 'movie_node_id', 'ratings']].to_numpy().T
train_mask = (edge_df['usage'] == 'train').to_numpy()
return user2movie_adjacencies, movie2user_adjacencies, \
user_side_feature, movie_side_feature, \
user_identity_feature, movie_indentity_feature, \
user_indices, movie_indices, labels, train_mask
def read_data(self):
data_dir = os.path.join(self.data_root, "ml-100k")
# edge data
edge_train = pd.read_csv(os.path.join(data_dir, 'u1.base'), sep='\t',
header=None, names=['user_node', 'movie_node', 'ratings', 'timestamp'])
edge_train.loc[:, 'usage'] = 'train'
edge_test = pd.read_csv(os.path.join(data_dir, 'u1.test'), sep='\t',
header=None, names=['user_node', 'movie_node', 'ratings', 'timestamp'])
edge_test.loc[:, 'usage'] = 'test'
edge_df = pd.concat((edge_train, edge_test),
axis=0).drop(columns='timestamp')
edge_df.loc[:, 'ratings'] -= 1
# item feature
sep = r'|'
movie_file = os.path.join(data_dir, 'u.item')
movie_headers = ['movie_node', 'movie_title', 'release_date', 'video_release_date',
'IMDb_URL', 'unknown', 'Action', 'Adventure', 'Animation',
'Childrens', 'Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy',
'Film-Noir', 'Horror', 'Musical', 'Mystery', 'Romance', 'Sci-Fi',
'Thriller', 'War', 'Western']
movie_df = pd.read_csv(movie_file, sep=sep, header=None,
names=movie_headers, encoding='latin1')
# user feature
users_file = os.path.join(data_dir, 'u.user')
users_headers = ['user_node', 'age',
'gender', 'occupation', 'zip_code']
users_df = pd.read_csv(users_file, sep=sep, header=None,
names=users_headers, encoding='latin1')
return edge_df, users_df, movie_df
def maybe_download(self):
save_path = os.path.join(self.data_root)
if not os.path.exists(save_path):
self.download_data(self.url, save_path)
if not os.path.exists(os.path.join(self.data_root, "ml-100k")):
zipfilename = os.path.join(self.data_root, "ml-100k.zip")
with ZipFile(zipfilename, "r") as zipobj:
zipobj.extractall(os.path.join(self.data_root))
print("Extracting data from {}".format(zipfilename))
@staticmethod
def download_data(url, save_path):
"""数据下载工具,当原始数据不存在时将会进行下载"""
print("Downloading data from {}".format(url))
if not os.path.exists(save_path):
os.makedirs(save_path)
request = urllib.request.urlopen(url)
filename = os.path.basename(url)
with open(os.path.join(save_path, filename), 'wb') as f:
f.write(request.read())
return True
if __name__ == "__main__":
data = MovielensDataset()
user2movie_adjacencies, movie2user_adjacencies, \
user_side_feature, movie_side_feature, \
user_identity_feature, movie_indentity_feature, \
user_indices, movie_indices, labels, train_mask = data.build_graph(
*data.read_data())
main.py
"""基于 MovieLens-100K 数据的GraphAutoEncoder"""
import numpy as np
import torch
import torch.nn as nn
import scipy.sparse as sp
import torch.optim as optim
import torch.nn.functional as F
from Autoencoder_Recommendation_System.dataset import MovielensDataset
from Autoencoder_Recommendation_System.autoencoder import StackGCNEncoder, FullyConnected, Decoder
######hyper
DEVICE = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')
LEARNING_RATE = 0.015
EPOCHS = 1000
NODE_INPUT_DIM = 2625
SIDE_FEATURE_DIM = 41
GCN_HIDDEN_DIM = 500
SIDE_HIDDEN_DIM = 10
ENCODE_HIDDEN_DIM = 75
NUM_BASIS = 4
DROPOUT_RATIO = 0.55
WEIGHT_DACAY = 0.
######hyper
SCORES = torch.tensor([[1, 2, 3, 4, 5]]).to(DEVICE)
def to_torch_sparse_tensor(x, device):
if not sp.isspmatrix_coo(x):
x = sp.coo_matrix(x)
row, col = x.row, x.col
data = x.data
indices = torch.from_numpy(np.asarray([row, col]).astype('int64')).long()
values = torch.from_numpy(x.data.astype(np.float32))
th_sparse_tensor = torch.sparse.FloatTensor(indices, values,
x.shape).to(device)
return th_sparse_tensor
def tensor_from_numpy(x, device):
return torch.from_numpy(x).to(device)
class GraphMatrixCompletion(nn.Module):
def __init__(self, input_dim, side_feat_dim,
gcn_hidden_dim, side_hidden_dim,
encode_hidden_dim,
num_support=5, num_classes=5, num_basis=3):
super(GraphMatrixCompletion, self).__init__()
self.encoder = StackGCNEncoder(input_dim, gcn_hidden_dim, num_support, DROPOUT_RATIO)
self.dense1 = FullyConnected(side_feat_dim, side_hidden_dim, dropout=0.,
use_bias=True)
self.dense2 = FullyConnected(gcn_hidden_dim + side_hidden_dim, encode_hidden_dim,
dropout=DROPOUT_RATIO, activation=lambda x: x)
self.decoder = Decoder(encode_hidden_dim, num_basis, num_classes,
dropout=DROPOUT_RATIO, activation=lambda x: x)
def forward(self, user_supports, item_supports,
user_inputs, item_inputs,
user_side_inputs, item_side_inputs,
user_edge_idx, item_edge_idx):
user_gcn, movie_gcn = self.encoder(user_supports, item_supports, user_inputs, item_inputs)
user_side_feat, movie_side_feat = self.dense1(user_side_inputs, item_side_inputs)
user_feat = torch.cat((user_gcn, user_side_feat), dim=1)
movie_feat = torch.cat((movie_gcn, movie_side_feat), dim=1)
user_embed, movie_embed = self.dense2(user_feat, movie_feat)
edge_logits = self.decoder(user_embed, movie_embed, user_edge_idx, item_edge_idx)
return edge_logits
data = MovielensDataset()
user2movie_adjacencies, movie2user_adjacencies, \
user_side_feature, movie_side_feature, \
user_identity_feature, movie_identity_feature, \
user_indices, movie_indices, labels, train_mask = data.build_graph(
*data.read_data())
user2movie_adjacencies = [to_torch_sparse_tensor(adj, DEVICE) for adj in user2movie_adjacencies]
movie2user_adjacencies = [to_torch_sparse_tensor(adj, DEVICE) for adj in movie2user_adjacencies]
user_side_feature = tensor_from_numpy(user_side_feature, DEVICE).float()
movie_side_feature = tensor_from_numpy(movie_side_feature, DEVICE).float()
user_identity_feature = tensor_from_numpy(user_identity_feature, DEVICE).float()
movie_identity_feature = tensor_from_numpy(movie_identity_feature, DEVICE).float()
user_indices = tensor_from_numpy(user_indices, DEVICE).long()
movie_indices = tensor_from_numpy(movie_indices, DEVICE).long()
labels = tensor_from_numpy(labels, DEVICE)
train_mask = tensor_from_numpy(train_mask, DEVICE)
model = GraphMatrixCompletion(NODE_INPUT_DIM, SIDE_FEATURE_DIM, GCN_HIDDEN_DIM,
SIDE_HIDDEN_DIM, ENCODE_HIDDEN_DIM, num_basis=NUM_BASIS).to(DEVICE)
criterion = nn.CrossEntropyLoss().to(DEVICE)
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DACAY)
model_inputs = (user2movie_adjacencies, movie2user_adjacencies,
user_identity_feature, movie_identity_feature,
user_side_feature, movie_side_feature, user_indices, movie_indices)
def train():
test_result = []
model.train()
for e in range(EPOCHS):
logits = model(*model_inputs)
loss = criterion(logits[train_mask], labels[train_mask])
rmse = expected_rmse(logits[train_mask], labels[train_mask])
optimizer.zero_grad()
loss.backward() # 反向传播计算参数的梯度
optimizer.step() # 使用优化方法进行梯度更新
tr = test()
test_result.append(tr)
model.train()
print(f"Epoch {e:04d}: TrainLoss: {loss.item():.4f}, TrainRMSE: {rmse.item():.4f}, "
f"TestRMSE: {tr[0]:.4f}, TestLoss: {tr[1]:.4f}")
test_result = np.asarray(test_result)
idx = test_result[:, 0].argmin()
print(f'test min rmse {test_result[idx]} on epoch {idx}')
@torch.no_grad()
def test():
model.eval()
logits = model(*model_inputs)
test_mask = ~train_mask
loss = criterion(logits[test_mask], labels[test_mask])
rmse = expected_rmse(logits[test_mask], labels[test_mask])
return rmse.item(), loss.item()
def expected_rmse(logits, label):
true_y = label + 1 # 原来的评分为1~5,作为label时为0~4
prob = F.softmax(logits, dim=1)
pred_y = torch.sum(prob * SCORES, dim=1)
diff = torch.pow(true_y - pred_y, 2)
return torch.sqrt(diff.mean())
if __name__ == "__main__":
train()
运行结果
注:篇幅所限,这里只列出了前后20轮的运行结果供大家参考
Epoch 0000: TrainLoss: 1.7052, TrainRMSE: 1.3093, TestRMSE: 1.1578, TestLoss: 1.4967
Epoch 0001: TrainLoss: 1.5941, TrainRMSE: 1.1776, TestRMSE: 1.1894, TestLoss: 1.5403
Epoch 0002: TrainLoss: 1.5997, TrainRMSE: 1.1835, TestRMSE: 1.1571, TestLoss: 1.5028
Epoch 0003: TrainLoss: 1.5458, TrainRMSE: 1.1499, TestRMSE: 1.1460, TestLoss: 1.4842
Epoch 0004: TrainLoss: 1.5458, TrainRMSE: 1.1472, TestRMSE: 1.1368, TestLoss: 1.4695
Epoch 0005: TrainLoss: 1.5165, TrainRMSE: 1.1284, TestRMSE: 1.1303, TestLoss: 1.4720
Epoch 0006: TrainLoss: 1.5071, TrainRMSE: 1.1231, TestRMSE: 1.1133, TestLoss: 1.4629
Epoch 0007: TrainLoss: 1.4849, TrainRMSE: 1.0998, TestRMSE: 1.0750, TestLoss: 1.4322
Epoch 0008: TrainLoss: 1.4682, TrainRMSE: 1.0764, TestRMSE: 1.0468, TestLoss: 1.4041
Epoch 0009: TrainLoss: 1.4471, TrainRMSE: 1.0439, TestRMSE: 1.0416, TestLoss: 1.4002
Epoch 0010: TrainLoss: 1.4428, TrainRMSE: 1.0359, TestRMSE: 1.0271, TestLoss: 1.3825
Epoch 0011: TrainLoss: 1.4186, TrainRMSE: 1.0240, TestRMSE: 1.0199, TestLoss: 1.3828
Epoch 0012: TrainLoss: 1.4095, TrainRMSE: 1.0141, TestRMSE: 1.0189, TestLoss: 1.3768
Epoch 0013: TrainLoss: 1.4111, TrainRMSE: 1.0171, TestRMSE: 1.0216, TestLoss: 1.3631
Epoch 0014: TrainLoss: 1.3891, TrainRMSE: 1.0082, TestRMSE: 1.0324, TestLoss: 1.3726
Epoch 0015: TrainLoss: 1.3936, TrainRMSE: 1.0145, TestRMSE: 1.0251, TestLoss: 1.3606
Epoch 0016: TrainLoss: 1.3946, TrainRMSE: 1.0070, TestRMSE: 1.0080, TestLoss: 1.3349
Epoch 0017: TrainLoss: 1.3565, TrainRMSE: 0.9945, TestRMSE: 1.0084, TestLoss: 1.3388
Epoch 0018: TrainLoss: 1.3468, TrainRMSE: 0.9892, TestRMSE: 1.0125, TestLoss: 1.3410
Epoch 0019: TrainLoss: 1.3519, TrainRMSE: 0.9961, TestRMSE: 1.0072, TestLoss: 1.3289
Epoch 0020: TrainLoss: 1.3598, TrainRMSE: 1.0132, TestRMSE: 0.9984, TestLoss: 1.3182
…
Epoch 0980: TrainLoss: 1.1613, TrainRMSE: 0.8841, TestRMSE: 0.9200, TestLoss: 1.2317
Epoch 0981: TrainLoss: 1.1614, TrainRMSE: 0.8861, TestRMSE: 0.9200, TestLoss: 1.2300
Epoch 0982: TrainLoss: 1.1685, TrainRMSE: 0.8927, TestRMSE: 0.9186, TestLoss: 1.2272
Epoch 0983: TrainLoss: 1.1560, TrainRMSE: 0.8842, TestRMSE: 0.9177, TestLoss: 1.2271
Epoch 0984: TrainLoss: 1.1618, TrainRMSE: 0.8844, TestRMSE: 0.9174, TestLoss: 1.2279
Epoch 0985: TrainLoss: 1.1654, TrainRMSE: 0.8898, TestRMSE: 0.9178, TestLoss: 1.2294
Epoch 0986: TrainLoss: 1.1657, TrainRMSE: 0.8869, TestRMSE: 0.9181, TestLoss: 1.2300
Epoch 0987: TrainLoss: 1.1573, TrainRMSE: 0.8800, TestRMSE: 0.9191, TestLoss: 1.2292
Epoch 0988: TrainLoss: 1.1560, TrainRMSE: 0.8812, TestRMSE: 0.9227, TestLoss: 1.2316
Epoch 0989: TrainLoss: 1.1607, TrainRMSE: 0.8891, TestRMSE: 0.9226, TestLoss: 1.2301
Epoch 0990: TrainLoss: 1.1671, TrainRMSE: 0.8919, TestRMSE: 0.9203, TestLoss: 1.2288
Epoch 0991: TrainLoss: 1.1538, TrainRMSE: 0.8811, TestRMSE: 0.9186, TestLoss: 1.2284
Epoch 0992: TrainLoss: 1.1585, TrainRMSE: 0.8839, TestRMSE: 0.9172, TestLoss: 1.2265
Epoch 0993: TrainLoss: 1.1628, TrainRMSE: 0.8848, TestRMSE: 0.9162, TestLoss: 1.2245
Epoch 0994: TrainLoss: 1.1622, TrainRMSE: 0.8859, TestRMSE: 0.9160, TestLoss: 1.2251
Epoch 0995: TrainLoss: 1.1659, TrainRMSE: 0.8870, TestRMSE: 0.9174, TestLoss: 1.2281
Epoch 0996: TrainLoss: 1.1630, TrainRMSE: 0.8855, TestRMSE: 0.9180, TestLoss: 1.2284
Epoch 0997: TrainLoss: 1.1649, TrainRMSE: 0.8871, TestRMSE: 0.9200, TestLoss: 1.2304
Epoch 0998: TrainLoss: 1.1707, TrainRMSE: 0.8937, TestRMSE: 0.9203, TestLoss: 1.2294
Epoch 0999: TrainLoss: 1.1621, TrainRMSE: 0.8875, TestRMSE: 0.9196, TestLoss: 1.2296
test min rmse [0.9158209 1.21602809] on epoch 275