手动实现softmax回归
//首先导入所需的包
%matplotlib inline
import d2lzh as d2l
from mxnet import autograd, nd
//使用Fashion-MNIST数据集,设置批量大小为256
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
初始化模型参数
num_inputs = 784 //这个参数是由图片的 heigth * weight所得 使用 shape()可以获得,我这使用的是28*28的一个图片
num_outputs = 10 //图片类别个数
w = nd.random.normal(scale=0.01, shape=(num_inputs,num_outputs))
b = nd.zeros(num_outputs)
//模型参数附上梯度
w.attach_grad()
b.attach_grad()
实现softmax运算
X = nd.array([[1,2,3],[4,5,6]])
X.sum(axis=0,keepdims=True), X.sum(axis=1,keepdims=True)//0计算X轴,1计算Y轴
def softmax(X):
X_exp = nd.exp(X)
partition = X_exp.sum(axis=1,keepdims=True)
return X_exp / partition
X = nd.random.normal(shape(2,5))
X_prob = softmax(X)
X_prob, X_prob.sum(axis=1)
定义模型
此函数是把每张图片改为num_inputs向量
def net(X):
return softmax(nd.dot(X.reshape((-1,num_inputs)),w) + b)
定义损失函数
在这我定义了2个样本在3个类别的预测概率。
使用该函数我们可以得到预测概率。
y_hat = nd.array([[0.1,0.3,0.6],[0.3,0.2,0.5]])
y = nd.array([0,2],dtype='int32')
def cross_entropy(y_hat,y):
return -nd.pick(y_hat,y).log()
计算分类准确率
y_hat.argmax(axis=1) 返回Y轴最大元素的索引,且返回结果与变量y形状相同.
def accuracy(y_hat,y):
return (y_hat.argmax(axis=1) == y.astype('float32')).mean().asscalar()
accuracy(y_hat,y)
def evaluate_accuracy(data_iter,net):
acc_sum, n = 0.0, 0
for X, y in data_iter:
y = y.astype('float32')
acc_sum += (net(X).argmax(axis=1) == y).sum.asscalar()
n += y.size
return acc_sum / n
evaluate_accuracy(test_iter,net)
训练模型
num_epochs, lr = 5, 0.1 #迭代周期数和学习率
def train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, params=None, lr=None, trainer=None):
for epoch in range(num_epochs):
train_l_sum, train_acc_sum, n = 0.0, 0.0, 0
for X, y in train_iter:
with autograd.record():#自动求导
y_hat = net(X)
l = loss(y_hat,y).sum()
l.backward()
if trainer is None:
d2l.sgd(params, lr, batch_size)
else:
trainer.step(batch_size)
y = y.astype('float32')
train_l_sum += l.asscalar()
train_acc_sum += (y_hat.argmax(axis=1) == y).sum().asscalar()
n += y.size
test_acc = evaluate_accuracy(test_iter,net)
print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f' % (epoch+1, train_l_sum / n, train_acc_sum / n, test_acc))
train_ch3(net, train_iter, test_iter, cross_entropy, num_epochs, batch_size, [w, b], lr)