1 $S o f t m a x$ 回归

1.1 回归 VS 分类

回归估计一个连续值

分类预测一个离散类别

1.2 One-hot encoding

独热编码是一个向量，它的分量和分类类别一样多。类别对应的分量设置为1，其他所有分量设置为0.

1.3 $s o f t m a x$ 函数

$\begin{aligned} \hat{\mathbf{y}} &=\operatorname{softmax}(\mathbf{0}) \\ \hat{y}_{i} &=\frac{\exp \left(o_{i}\right)}{\sum_{k} \exp \left(o_{k}\right)} \end{aligned}$
从上面的式子可以得出 $0\le \hat{y}_j\le 1, \sum_{j=1}^{n}\hat{y}_j=1$

根据这个性质，可以知道，在多分类的问题中，样本属于各个类别的概率在 $[0, 1]$ ，并且属于所有类别的概率和为 $1$ 。

2 $S o f t m a x$ 回归从零开始

2.1 完整代码

因为我在写代码的时候是使用模块化的方式，每一个模块的功能是独立的，每一个模块的详细解释见注释。

# -*- coding: utf-8 -*- 
# @Time : 2021/9/12 9:32 
# @Author : Amonologue
# @software : pycharm   
# @File : softmax回归.py
import torch
import torchvision
from torch.utils import data
from torchvision import transforms
from d2l import torch as d2l
import os


def load_data_fashion_mnist(batch_size, resize=None):
    '''下载Fashion-MNIST数据集, 然后将其加载到内存中'''
    trans = [transforms.ToTensor()]
    if resize:
        trans.insert(0, transforms.Resize(resize))
    trans = transforms.Compose(trans)
    flag = True  
    # 根据是否存在FashionMNIST文件夹来判断是否需要下载Fashion-MNIST数据集
    # 因为使用dataset下载的FashionMNIST数据集的文件夹为FashionMNIST
    if os.path.exists('./data/FashionMNIST'):
        flag = False
    mnist_train = torchvision.datasets.FashionMNIST(
        root='./data', train=True, transform=trans, download=flag
    )
    mnist_test = torchvision.datasets.FashionMNIST(
        root='./data', train=False, transform=trans, download=flag
    )
    # 返回两个数据迭代器
    return (
        data.DataLoader(mnist_train, batch_size, shuffle=True, num_workers=4),
        data.DataLoader(mnist_test, batch_size, shuffle=False, num_workers=4)
    )


def net(X):
    return softmax(torch.matmul(X.reshape((-1, W.shape[0])), W) + b)


def softmax(X):
    X_exp = torch.exp(X)
    partition = X_exp.sum(1, keepdim=True)
    return X_exp / partition  # 应用广播机制


def cross_entropy(y_hat, y):
    '''交叉熵损失'''
    return -torch.log(y_hat[range(len(y_hat)), y])


def accuracy(y_hat, y):
    '''计算预测正确的数量'''
    if len(y_hat.shape) > 1 and y_hat.shape[1] > 1:
        y_hat = y_hat.argmax(axis=1)
    cmp = y_hat.type(y.dtype) == y
    return float(cmp.type(y.dtype).sum())


def evaluate_accuracy(net, data_iter):
    """计算在指定数据集上模型的精度。"""
    if isinstance(net, torch.nn.Module):
        net.eval()  # 将模型设置为评估模式
    metric = Accumulator(2)  # 正确预测数、预测总数
    for X, y in data_iter:
        metric.add(accuracy(net(X), y), y.numel())
    return metric[0] / metric[1]


class Accumulator:
    """在 n 个变量上累加。"""
    def __init__(self, n):
        self.data = [0.0] * n

    def add(self, *args):
        self.data = [a + float(b) for a, b in zip(self.data, args)]

    def reset(self):
        self.data = [0.0] * len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]


def train(net, train_iterator, loss):
    """训练模型一个周期"""
    # 训练损失总和、训练准确度总和、样本数
    metric = Accumulator(3)
    for X, y in train_iterator:
        # 计算梯度并更新参数
        y_hat = net(X)
        l = loss(y_hat, y)
        # 使用定制的优化器和损失函数
        l.sum().backward()
        updater(X.shape[0])
        metric.add(float(l.sum()), accuracy(y_hat, y), y.numel())
    # 返回训练损失和训练准确率
    return metric[0] / metric[2], metric[1] / metric[2]


def updater(batch_size):
    '''sgd'''
    return d2l.sgd([W, b], lr, batch_size)


if __name__ == '__main__':
    batch_size = 256
    train_iterator, test_iterator = load_data_fashion_mnist(batch_size)
    num_inputs = 784  # 将二维的图片拉长为一维的向量 28x28=784
    num_outputs = 10  # 对应10个类别

	# 初始化W, b
    W = torch.normal(0, 0.01, size=(num_inputs, num_outputs), requires_grad=True)
    b = torch.zeros(num_outputs, requires_grad=True)

    lr = 0.1  # 学习率
    num_epochs = 10  # 训练批次
    loss = cross_entropy  # 损失函数, 使用交叉熵损失

	# 开始训练
    for epoch in range(num_epochs):
        train_metrics = train(net, train_iterator, loss, updater)
        test_acc = evaluate_accuracy(net, test_iterator)
        train_loss, train_acc = train_metrics
        print(f'loss: {
      
      train_loss}')

3 $S o f t m a x$ 简洁实现

3.1 完整代码

因为之前在从零实现中已经下载了Fashion-MNIST，为了不再在简洁实现中再次下载数据，我们需要把d2l包中的load_data_fashion_mnist修改为与从零实现中相同。

# -*- coding: utf-8 -*- 
# @Time : 2021/9/12 17:57 
# @Author : Amonologue
# @software : pycharm   
# @File : softmax_simple.py
import torch
from torch import nn
from d2l import torch as d2l


def init_weight(m):
    if type(m) == nn.Linear:
        nn.init.normal_(m.weight, std=0.01)


if __name__ == '__main__':
    batch_size = 256
    train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
    net = nn.Sequential(nn.Flatten(), nn.Linear(784, 10))
    net.apply(init_weight)
    loss = nn.CrossEntropyLoss()
    lr = 0.1
    trainer = torch.optim.SGD(net.parameters(), lr=lr)
    num_epochs = 10
    d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, trainer)

DL_2——Softmax回归

文章目录

1 $S o f t m a x$ 回归

1.1 回归 VS 分类

1.2 One-hot encoding

1.3 $s o f t m a x$ 函数

2 $S o f t m a x$ 回归从零开始

2.1 完整代码

3 $S o f t m a x$ 简洁实现

3.1 完整代码

猜你喜欢

DL_2——Softmax回归

文章目录

1 S o f t m a x Softmax Softmax 回归

1.1 回归 VS 分类

1.2 One-hot encoding

1.3 s o f t m a x softmax softmax函数

2 S o f t m a x Softmax Softmax回归从零开始

2.1 完整代码

3 S o f t m a x Softmax Softmax简洁实现

3.1 完整代码

猜你喜欢

1 $S o f t m a x$ 回归

1.3 $s o f t m a x$ 函数

2 $S o f t m a x$ 回归从零开始

3 $S o f t m a x$ 简洁实现