二分类使用示例
- label格式:[0, 0, 0, 1, 1, 1]
- 二分类损失:
- nn.BCELoss(),网络最后输出不带sigmoid
- nn.BCEWithLogitsLoss(),网络最后输出带sigmoid
- 最后FC为1个神经元
class DNN_binary(nn.Module):
def __init__(self, input_dim, dnn_hidden_units, num_classes):
super(DNN_binary, self).__init__()
fc_layers = []
self.input_dim = input_dim
for i in range(len(dnn_hidden_units)):
fc_layers.append(nn.Linear(input_dim, dnn_hidden_units[i]))
fc_layers.append(nn.BatchNorm1d(dnn_hidden_units[i]))
fc_layers.append(nn.ReLU(inplace=True))
input_dim = dnn_hidden_units[i]
self.fc_layers = nn.Sequential(*fc_layers)
self.fc_out = nn.Linear(dnn_hidden_units[-1], num_classes)
for name, tensor in self.fc_layers.named_parameters():
if 'weight' in name:
nn.init.normal_(tensor, mean=0, std=0.0001)
def forward(self, inputs, flag=True):
out = self.fc_layers(inputs.view(-1, self.input_dim))
out = self.fc_out(out)
if flag:
return torch.sigmoid(out.squeeze(1))
else:
return out.squeeze(1)
batch_size, seq_len, emb_dim = 6, 3, 4
input = torch.randn(batch_size, seq_len, emb_dim)
print(input)
target = torch.tensor([0, 0, 0, 1, 1, 1]).float()
print(target)
net = DNN_binary(input_dim=seq_len * emb_dim, dnn_hidden_units=(64, 64), num_classes=1)
pred_probs1 = net(input, flag=True)
pred_probs2 = net(input, flag=False)
print((torch.sigmoid(pred_probs2) == pred_probs1).all())
loss_function1 = nn.BCELoss()
loss_function2 = nn.BCEWithLogitsLoss()
loss1 = loss_function1(pred_probs1, target)
loss2 = loss_function2(pred_probs2, target)
print(loss1.item(), loss2.item())
tensor([[[ 8.4793e-01, -9.6543e-01, -6.2708e-01, -1.7069e-02],
[-1.4977e+00, -2.2452e+00, -7.4397e-02, 1.7243e+00],
[ 1.0214e+00, 1.0061e+00, 1.3069e+00, -5.0149e-01]],
[[ 5.7075e-01, 2.2900e+00, -4.1233e-02, 6.6907e-01],
[-2.7835e-01, -7.2288e-04, -4.9872e-02, 9.4977e-01],
[-1.9064e-01, -9.9173e-01, -1.8691e+00, 9.6890e-01]],
[[ 2.1142e+00, 1.1281e+00, -9.9980e-01, -8.9916e-01],
[-5.2875e-01, -1.8958e-02, -2.7962e+00, 2.0635e+00],
[-8.1441e-01, 2.3245e-01, 1.1462e+00, 4.0582e-01]],
[[-1.8124e+00, -1.2169e+00, 6.0255e-01, -5.1454e-01],
[ 4.7466e-01, -3.2618e-01, 1.1159e+00, -1.7710e-01],
[ 1.7685e+00, -1.0302e+00, -2.1689e+00, -3.8944e-01]],
[[-6.3599e-01, 1.2273e-01, -1.3282e-01, 9.0053e-01],
[-1.3053e+00, 1.4624e+00, -5.3523e-01, -1.2314e+00],
[ 4.1356e-01, 3.7533e-01, -8.1962e-01, -1.0025e+00]],
[[ 5.2201e-01, 1.8164e-01, -8.4185e-02, 8.2399e-01],
[-1.1083e+00, -4.2032e-01, -1.2230e+00, 1.2096e-01],
[ 2.2036e+00, -4.6791e-01, 4.0248e-01, 9.8281e-01]]])
tensor([0., 0., 0., 1., 1., 1.])
tensor(True)
0.693398654460907 0.6933985352516174
多分类使用示例
- label格式(三分类为例)
- 可以为:[0, 1, 2]
- 也可以为:
tensor([[1., 0., 0.],
[0., 1., 0.],
[0., 0., 1.]])
```
- 多分类损失:
- nn.CrossEntropyLoss(),网络最后输出不带softmax,因为损失里带了
- 最后FC神经元个数为分类类别数
三分类为例
class DNN(nn.Module):
def __init__(self, input_dim, dnn_hidden_units, num_classes):
super(DNN, self).__init__()
fc_layers = []
self.input_dim = input_dim
for i in range(len(dnn_hidden_units)):
fc_layers.append(nn.Linear(input_dim, dnn_hidden_units[i]))
fc_layers.append(nn.BatchNorm1d(dnn_hidden_units[i]))
fc_layers.append(nn.ReLU(inplace=True))
input_dim = dnn_hidden_units[i]
self.fc_layers = nn.Sequential(*fc_layers)
self.fc_out = nn.Linear(dnn_hidden_units[-1], num_classes)
for name, tensor in self.fc_layers.named_parameters():
if 'weight' in name:
nn.init.normal_(tensor, mean=0, std=0.0001)
def forward(self, inputs):
out = self.fc_layers(inputs.view(-1, self.input_dim))
out = self.fc_out(out)
return out
def demo_train():
batch_size, seq_len, emb_dim = 6, 3, 4
input = torch.randn(batch_size, seq_len, emb_dim)
print(input)
target = torch.tensor([0, 0, 1, 1, 2, 2])
target1 = F.one_hot(target).float()
target2 = target.long()
print(target1)
print(target2)
net = DNN(input_dim=seq_len * emb_dim, dnn_hidden_units=(64, 64), num_classes=3)
pred_probs = net(input)
print(pred_probs)
loss_function = nn.CrossEntropyLoss()
loss1 = loss_function(pred_probs, target1)
loss2 = loss_function(pred_probs, target2)
print(loss1.item(), loss2.item())
tensor([[[-0.7408, 0.3025, -1.0376, -0.5466],
[ 0.0322, -0.4760, 0.2513, 1.8802],
[ 0.9833, 1.4926, -0.6791, -0.2804]],
[[-1.1813, -1.1706, -0.1627, 0.1864],
[-1.2612, -1.3480, 0.3172, 1.9101],
[ 1.3913, -1.1372, 0.8389, 0.2928]],
[[ 0.0643, 1.0133, 0.0922, -1.8800],
[-0.8096, 0.6886, -0.2188, 0.8877],
[-0.1556, -0.5697, -0.3586, -1.0695]],
[[-0.5265, -0.9017, 0.0790, -0.8117],
[-0.8104, 0.2160, -0.0704, 0.8088],
[-0.1420, 0.3280, -0.5399, -0.1540]],
[[ 1.9470, 0.7277, -0.3088, 0.1721],
[-0.5745, -1.0028, 1.8998, -0.4627],
[ 0.3502, 1.1034, -1.6437, -1.1877]],
[[ 0.4783, -0.7039, 1.4341, -0.1845],
[ 1.3011, -0.0039, -0.6479, 0.1350],
[-1.5386, 0.7718, 0.3782, 0.8980]]])
tensor([[1., 0., 0.],
[1., 0., 0.],
[0., 1., 0.],
[0., 1., 0.],
[0., 0., 1.],
[0., 0., 1.]])
tensor([0, 0, 1, 1, 2, 2])
tensor([[ 0.1023, 0.0600, -0.1141],
[ 0.1023, 0.0600, -0.1141],
[ 0.1023, 0.0600, -0.1141],
[ 0.1023, 0.0600, -0.1141],
[ 0.1023, 0.0600, -0.1141],
[ 0.1023, 0.0600, -0.1141]], grad_fn=<AddmmBackward0>)
1.1029125452041626 1.1029125452041626
二分类用softmax
class DNN(nn.Module):
def __init__(self, input_dim, dnn_hidden_units, num_classes):
super(DNN, self).__init__()
fc_layers = []
self.input_dim = input_dim
for i in range(len(dnn_hidden_units)):
fc_layers.append(nn.Linear(input_dim, dnn_hidden_units[i]))
fc_layers.append(nn.BatchNorm1d(dnn_hidden_units[i]))
fc_layers.append(nn.ReLU(inplace=True))
input_dim = dnn_hidden_units[i]
self.fc_layers = nn.Sequential(*fc_layers)
self.fc_out = nn.Linear(dnn_hidden_units[-1], num_classes)
for name, tensor in self.fc_layers.named_parameters():
if 'weight' in name:
nn.init.normal_(tensor, mean=0, std=0.0001)
def forward(self, inputs):
out = self.fc_layers(inputs.view(-1, self.input_dim))
out = self.fc_out(out)
return out
def demo_train():
batch_size, seq_len, emb_dim = 6, 3, 4
input = torch.randn(batch_size, seq_len, emb_dim)
print(input)
target = torch.tensor([0, 0, 0, 1, 1, 1])
target1 = F.one_hot(target).float()
target2 = target.long()
print(target1)
print(target2)
net = DNN(input_dim=seq_len * emb_dim, dnn_hidden_units=(64, 64), num_classes=2)
pred_probs = net(input)
print(pred_probs)
loss_function = nn.CrossEntropyLoss()
loss1 = loss_function(pred_probs, target1)
loss2 = loss_function(pred_probs, target2)
print(loss1.item(), loss2.item())
tensor([[[-1.0273, -1.1568, 0.4176, -0.5005],
[-0.2111, -0.9956, 0.7913, -0.8623],
[-0.3358, -1.0753, 0.0076, 0.0104]],
[[ 0.1666, 2.1301, -0.5491, -1.0460],
[-0.2292, 0.9588, -1.4148, 1.6562],
[ 2.2176, -1.0187, 0.7178, -0.1506]],
[[ 0.4299, -0.0988, -0.3603, -1.1780],
[ 2.0595, 0.0898, -0.3522, 0.1985],
[-0.3710, 2.4377, 2.0191, -0.5894]],
[[ 1.0433, 1.1146, 0.2392, -0.6284],
[-1.0640, -0.8061, -0.0592, -0.9320],
[-0.5394, 0.8351, -1.9062, -1.6103]],
[[ 1.4592, 2.1698, 0.1275, 0.1733],
[-0.0941, -0.6828, 0.6800, 1.1637],
[-0.2533, 0.0238, 0.8612, 1.0679]],
[[-1.1885, 0.2141, -1.7337, 0.1454],
[-0.3513, 0.0634, 0.3803, 0.4642],
[ 0.2229, 0.2430, 1.2290, 0.7928]]])
tensor([[1., 0.],
[1., 0.],
[1., 0.],
[0., 1.],
[0., 1.],
[0., 1.]])
tensor([0, 0, 0, 1, 1, 1])
tensor([[-0.1173, -0.0428],
[-0.1173, -0.0428],
[-0.1173, -0.0428],
[-0.1173, -0.0428],
[-0.1173, -0.0428],
[-0.1173, -0.0428]], grad_fn=<AddmmBackward0>)
0.6938410401344299 0.6938409805297852
pytorch的二分类、多分类、多标签等问题总结
https://zhuanlan.zhihu.com/p/372628727