文章目录
1 S o f t m a x Softmax Softmax 回归
1.1 回归 VS 分类
回归估计一个连续值
分类预测一个离散类别
1.2 One-hot encoding
独热编码是一个向量,它的分量和分类类别一样多。类别对应的分量设置为1,其他所有分量设置为0.
1.3 s o f t m a x softmax softmax函数
y ^ = softmax ( 0 ) y ^ i = exp ( o i ) ∑ k exp ( o k ) \begin{aligned} \hat{\mathbf{y}} &=\operatorname{softmax}(\mathbf{0}) \\ \hat{y}_{i} &=\frac{\exp \left(o_{i}\right)}{\sum_{k} \exp \left(o_{k}\right)} \end{aligned} y^y^i=softmax(0)=∑kexp(ok)exp(oi)
从上面的式子可以得出 0 ≤ y ^ j ≤ 1 , ∑ j = 1 n y ^ j = 1 0\le \hat{y}_j\le 1, \sum_{j=1}^{n}\hat{y}_j=1 0≤y^j≤1,∑j=1ny^j=1
根据这个性质,可以知道,在多分类的问题中,样本属于各个类别的概率在 [ 0 , 1 ] [0, 1] [0,1],并且属于所有类别的概率和为 1 1 1。
2 S o f t m a x Softmax Softmax回归从零开始
2.1 完整代码
因为我在写代码的时候是使用模块化的方式,每一个模块的功能是独立的,每一个模块的详细解释见注释。
# -*- coding: utf-8 -*-
# @Time : 2021/9/12 9:32
# @Author : Amonologue
# @software : pycharm
# @File : softmax回归.py
import torch
import torchvision
from torch.utils import data
from torchvision import transforms
from d2l import torch as d2l
import os
def load_data_fashion_mnist(batch_size, resize=None):
'''下载Fashion-MNIST数据集, 然后将其加载到内存中'''
trans = [transforms.ToTensor()]
if resize:
trans.insert(0, transforms.Resize(resize))
trans = transforms.Compose(trans)
flag = True
# 根据是否存在FashionMNIST文件夹来判断是否需要下载Fashion-MNIST数据集
# 因为使用dataset下载的FashionMNIST数据集的文件夹为FashionMNIST
if os.path.exists('./data/FashionMNIST'):
flag = False
mnist_train = torchvision.datasets.FashionMNIST(
root='./data', train=True, transform=trans, download=flag
)
mnist_test = torchvision.datasets.FashionMNIST(
root='./data', train=False, transform=trans, download=flag
)
# 返回两个数据迭代器
return (
data.DataLoader(mnist_train, batch_size, shuffle=True, num_workers=4),
data.DataLoader(mnist_test, batch_size, shuffle=False, num_workers=4)
)
def net(X):
return softmax(torch.matmul(X.reshape((-1, W.shape[0])), W) + b)
def softmax(X):
X_exp = torch.exp(X)
partition = X_exp.sum(1, keepdim=True)
return X_exp / partition # 应用广播机制
def cross_entropy(y_hat, y):
'''交叉熵损失'''
return -torch.log(y_hat[range(len(y_hat)), y])
def accuracy(y_hat, y):
'''计算预测正确的数量'''
if len(y_hat.shape) > 1 and y_hat.shape[1] > 1:
y_hat = y_hat.argmax(axis=1)
cmp = y_hat.type(y.dtype) == y
return float(cmp.type(y.dtype).sum())
def evaluate_accuracy(net, data_iter):
"""计算在指定数据集上模型的精度。"""
if isinstance(net, torch.nn.Module):
net.eval() # 将模型设置为评估模式
metric = Accumulator(2) # 正确预测数、预测总数
for X, y in data_iter:
metric.add(accuracy(net(X), y), y.numel())
return metric[0] / metric[1]
class Accumulator:
"""在 n 个变量上累加。"""
def __init__(self, n):
self.data = [0.0] * n
def add(self, *args):
self.data = [a + float(b) for a, b in zip(self.data, args)]
def reset(self):
self.data = [0.0] * len(self.data)
def __getitem__(self, idx):
return self.data[idx]
def train(net, train_iterator, loss):
"""训练模型一个周期"""
# 训练损失总和、训练准确度总和、样本数
metric = Accumulator(3)
for X, y in train_iterator:
# 计算梯度并更新参数
y_hat = net(X)
l = loss(y_hat, y)
# 使用定制的优化器和损失函数
l.sum().backward()
updater(X.shape[0])
metric.add(float(l.sum()), accuracy(y_hat, y), y.numel())
# 返回训练损失和训练准确率
return metric[0] / metric[2], metric[1] / metric[2]
def updater(batch_size):
'''sgd'''
return d2l.sgd([W, b], lr, batch_size)
if __name__ == '__main__':
batch_size = 256
train_iterator, test_iterator = load_data_fashion_mnist(batch_size)
num_inputs = 784 # 将二维的图片拉长为一维的向量 28x28=784
num_outputs = 10 # 对应10个类别
# 初始化W, b
W = torch.normal(0, 0.01, size=(num_inputs, num_outputs), requires_grad=True)
b = torch.zeros(num_outputs, requires_grad=True)
lr = 0.1 # 学习率
num_epochs = 10 # 训练批次
loss = cross_entropy # 损失函数, 使用交叉熵损失
# 开始训练
for epoch in range(num_epochs):
train_metrics = train(net, train_iterator, loss, updater)
test_acc = evaluate_accuracy(net, test_iterator)
train_loss, train_acc = train_metrics
print(f'loss: {
train_loss}')
3 S o f t m a x Softmax Softmax简洁实现
3.1 完整代码
因为之前在从零实现中已经下载了Fashion-MNIST,为了不再在简洁实现中再次下载数据,我们需要把d2l包中的load_data_fashion_mnist
修改为与从零实现中相同。
# -*- coding: utf-8 -*-
# @Time : 2021/9/12 17:57
# @Author : Amonologue
# @software : pycharm
# @File : softmax_simple.py
import torch
from torch import nn
from d2l import torch as d2l
def init_weight(m):
if type(m) == nn.Linear:
nn.init.normal_(m.weight, std=0.01)
if __name__ == '__main__':
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
net = nn.Sequential(nn.Flatten(), nn.Linear(784, 10))
net.apply(init_weight)
loss = nn.CrossEntropyLoss()
lr = 0.1
trainer = torch.optim.SGD(net.parameters(), lr=lr)
num_epochs = 10
d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, trainer)