二分类使用示例

label格式：[0, 0, 0, 1, 1, 1]
二分类损失：
- nn.BCELoss()，网络最后输出不带sigmoid
- nn.BCEWithLogitsLoss()，网络最后输出带sigmoid
最后FC为1个神经元

# 二分类示例 sigmoid
class DNN_binary(nn.Module):
    def __init__(self, input_dim, dnn_hidden_units, num_classes):
        super(DNN_binary, self).__init__()

        fc_layers = []
        self.input_dim = input_dim
        for i in range(len(dnn_hidden_units)):
            fc_layers.append(nn.Linear(input_dim, dnn_hidden_units[i]))
            fc_layers.append(nn.BatchNorm1d(dnn_hidden_units[i]))
            fc_layers.append(nn.ReLU(inplace=True))
            input_dim = dnn_hidden_units[i]
        self.fc_layers = nn.Sequential(*fc_layers)

        self.fc_out = nn.Linear(dnn_hidden_units[-1], num_classes)

        for name, tensor in self.fc_layers.named_parameters():
            if 'weight' in name:
                nn.init.normal_(tensor, mean=0, std=0.0001)

    def forward(self, inputs, flag=True):
        out = self.fc_layers(inputs.view(-1, self.input_dim))
        out = self.fc_out(out)
        if flag:
            return torch.sigmoid(out.squeeze(1))
        else:
            return out.squeeze(1)

batch_size, seq_len, emb_dim = 6, 3, 4
input = torch.randn(batch_size, seq_len, emb_dim)
print(input)

target = torch.tensor([0, 0, 0, 1, 1, 1]).float()
print(target)

net = DNN_binary(input_dim=seq_len * emb_dim, dnn_hidden_units=(64, 64), num_classes=1)

pred_probs1 = net(input, flag=True)
pred_probs2 = net(input, flag=False)
print((torch.sigmoid(pred_probs2) == pred_probs1).all())

loss_function1 = nn.BCELoss()
loss_function2 = nn.BCEWithLogitsLoss()
loss1 = loss_function1(pred_probs1, target)
loss2 = loss_function2(pred_probs2, target)
print(loss1.item(), loss2.item())

tensor([[[ 8.4793e-01, -9.6543e-01, -6.2708e-01, -1.7069e-02],
         [-1.4977e+00, -2.2452e+00, -7.4397e-02,  1.7243e+00],
         [ 1.0214e+00,  1.0061e+00,  1.3069e+00, -5.0149e-01]],

        [[ 5.7075e-01,  2.2900e+00, -4.1233e-02,  6.6907e-01],
         [-2.7835e-01, -7.2288e-04, -4.9872e-02,  9.4977e-01],
         [-1.9064e-01, -9.9173e-01, -1.8691e+00,  9.6890e-01]],

        [[ 2.1142e+00,  1.1281e+00, -9.9980e-01, -8.9916e-01],
         [-5.2875e-01, -1.8958e-02, -2.7962e+00,  2.0635e+00],
         [-8.1441e-01,  2.3245e-01,  1.1462e+00,  4.0582e-01]],

        [[-1.8124e+00, -1.2169e+00,  6.0255e-01, -5.1454e-01],
         [ 4.7466e-01, -3.2618e-01,  1.1159e+00, -1.7710e-01],
         [ 1.7685e+00, -1.0302e+00, -2.1689e+00, -3.8944e-01]],

        [[-6.3599e-01,  1.2273e-01, -1.3282e-01,  9.0053e-01],
         [-1.3053e+00,  1.4624e+00, -5.3523e-01, -1.2314e+00],
         [ 4.1356e-01,  3.7533e-01, -8.1962e-01, -1.0025e+00]],

        [[ 5.2201e-01,  1.8164e-01, -8.4185e-02,  8.2399e-01],
         [-1.1083e+00, -4.2032e-01, -1.2230e+00,  1.2096e-01],
         [ 2.2036e+00, -4.6791e-01,  4.0248e-01,  9.8281e-01]]])
tensor([0., 0., 0., 1., 1., 1.])
tensor(True)
0.693398654460907 0.6933985352516174

多分类使用示例

label格式（三分类为例）

可以为：[0, 1, 2]

也可以为：

tensor([[1., 0., 0.],
   		[0., 1., 0.],
   		[0., 0., 1.]])
   ```

多分类损失：
- nn.CrossEntropyLoss()，网络最后输出不带softmax，因为损失里带了
最后FC神经元个数为分类类别数

三分类为例

# 多分类示例 softmax
class DNN(nn.Module):
    def __init__(self, input_dim, dnn_hidden_units, num_classes):
        super(DNN, self).__init__()

        fc_layers = []
        self.input_dim = input_dim
        for i in range(len(dnn_hidden_units)):
            fc_layers.append(nn.Linear(input_dim, dnn_hidden_units[i]))
            fc_layers.append(nn.BatchNorm1d(dnn_hidden_units[i]))
            fc_layers.append(nn.ReLU(inplace=True))
            input_dim = dnn_hidden_units[i]
        self.fc_layers = nn.Sequential(*fc_layers)

        self.fc_out = nn.Linear(dnn_hidden_units[-1], num_classes)

        for name, tensor in self.fc_layers.named_parameters():
            if 'weight' in name:
                nn.init.normal_(tensor, mean=0, std=0.0001)

    def forward(self, inputs):
        out = self.fc_layers(inputs.view(-1, self.input_dim))
        out = self.fc_out(out)
        return out

def demo_train():
    batch_size, seq_len, emb_dim = 6, 3, 4
    input = torch.randn(batch_size, seq_len, emb_dim)
    print(input)

    target = torch.tensor([0, 0, 1, 1, 2, 2])
    target1 = F.one_hot(target).float()
    target2 = target.long()
    print(target1)
    print(target2)

    net = DNN(input_dim=seq_len * emb_dim, dnn_hidden_units=(64, 64), num_classes=3)

    pred_probs = net(input)
    print(pred_probs)

    loss_function = nn.CrossEntropyLoss()
    loss1 = loss_function(pred_probs, target1)
    loss2 = loss_function(pred_probs, target2)
    print(loss1.item(), loss2.item())

tensor([[[-0.7408,  0.3025, -1.0376, -0.5466],
         [ 0.0322, -0.4760,  0.2513,  1.8802],
         [ 0.9833,  1.4926, -0.6791, -0.2804]],

        [[-1.1813, -1.1706, -0.1627,  0.1864],
         [-1.2612, -1.3480,  0.3172,  1.9101],
         [ 1.3913, -1.1372,  0.8389,  0.2928]],

        [[ 0.0643,  1.0133,  0.0922, -1.8800],
         [-0.8096,  0.6886, -0.2188,  0.8877],
         [-0.1556, -0.5697, -0.3586, -1.0695]],

        [[-0.5265, -0.9017,  0.0790, -0.8117],
         [-0.8104,  0.2160, -0.0704,  0.8088],
         [-0.1420,  0.3280, -0.5399, -0.1540]],

        [[ 1.9470,  0.7277, -0.3088,  0.1721],
         [-0.5745, -1.0028,  1.8998, -0.4627],
         [ 0.3502,  1.1034, -1.6437, -1.1877]],

        [[ 0.4783, -0.7039,  1.4341, -0.1845],
         [ 1.3011, -0.0039, -0.6479,  0.1350],
         [-1.5386,  0.7718,  0.3782,  0.8980]]])
tensor([[1., 0., 0.],
        [1., 0., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 0., 1.],
        [0., 0., 1.]])
tensor([0, 0, 1, 1, 2, 2])
tensor([[ 0.1023,  0.0600, -0.1141],
        [ 0.1023,  0.0600, -0.1141],
        [ 0.1023,  0.0600, -0.1141],
        [ 0.1023,  0.0600, -0.1141],
        [ 0.1023,  0.0600, -0.1141],
        [ 0.1023,  0.0600, -0.1141]], grad_fn=<AddmmBackward0>)
1.1029125452041626 1.1029125452041626

二分类用softmax

# 多分类示例 softmax
class DNN(nn.Module):
    def __init__(self, input_dim, dnn_hidden_units, num_classes):
        super(DNN, self).__init__()

        fc_layers = []
        self.input_dim = input_dim
        for i in range(len(dnn_hidden_units)):
            fc_layers.append(nn.Linear(input_dim, dnn_hidden_units[i]))
            fc_layers.append(nn.BatchNorm1d(dnn_hidden_units[i]))
            fc_layers.append(nn.ReLU(inplace=True))
            input_dim = dnn_hidden_units[i]
        self.fc_layers = nn.Sequential(*fc_layers)

        self.fc_out = nn.Linear(dnn_hidden_units[-1], num_classes)

        for name, tensor in self.fc_layers.named_parameters():
            if 'weight' in name:
                nn.init.normal_(tensor, mean=0, std=0.0001)

    def forward(self, inputs):
        out = self.fc_layers(inputs.view(-1, self.input_dim))
        out = self.fc_out(out)
        return out

def demo_train():
    batch_size, seq_len, emb_dim = 6, 3, 4
    input = torch.randn(batch_size, seq_len, emb_dim)
    print(input)

    target = torch.tensor([0, 0, 0, 1, 1, 1])
    target1 = F.one_hot(target).float()
    target2 = target.long()
    print(target1)
    print(target2)

    net = DNN(input_dim=seq_len * emb_dim, dnn_hidden_units=(64, 64), num_classes=2)

    pred_probs = net(input)
    print(pred_probs)

    loss_function = nn.CrossEntropyLoss()
    loss1 = loss_function(pred_probs, target1)
    loss2 = loss_function(pred_probs, target2)
    print(loss1.item(), loss2.item())

tensor([[[-1.0273, -1.1568,  0.4176, -0.5005],
         [-0.2111, -0.9956,  0.7913, -0.8623],
         [-0.3358, -1.0753,  0.0076,  0.0104]],

        [[ 0.1666,  2.1301, -0.5491, -1.0460],
         [-0.2292,  0.9588, -1.4148,  1.6562],
         [ 2.2176, -1.0187,  0.7178, -0.1506]],

        [[ 0.4299, -0.0988, -0.3603, -1.1780],
         [ 2.0595,  0.0898, -0.3522,  0.1985],
         [-0.3710,  2.4377,  2.0191, -0.5894]],

        [[ 1.0433,  1.1146,  0.2392, -0.6284],
         [-1.0640, -0.8061, -0.0592, -0.9320],
         [-0.5394,  0.8351, -1.9062, -1.6103]],

        [[ 1.4592,  2.1698,  0.1275,  0.1733],
         [-0.0941, -0.6828,  0.6800,  1.1637],
         [-0.2533,  0.0238,  0.8612,  1.0679]],

        [[-1.1885,  0.2141, -1.7337,  0.1454],
         [-0.3513,  0.0634,  0.3803,  0.4642],
         [ 0.2229,  0.2430,  1.2290,  0.7928]]])
tensor([[1., 0.],
        [1., 0.],
        [1., 0.],
        [0., 1.],
        [0., 1.],
        [0., 1.]])
tensor([0, 0, 0, 1, 1, 1])
tensor([[-0.1173, -0.0428],
        [-0.1173, -0.0428],
        [-0.1173, -0.0428],
        [-0.1173, -0.0428],
        [-0.1173, -0.0428],
        [-0.1173, -0.0428]], grad_fn=<AddmmBackward0>)
0.6938410401344299 0.6938409805297852

pytorch的二分类、多分类、多标签等问题总结

https://zhuanlan.zhihu.com/p/372628727

pytorch --- 二分类与多分类label及损失的使用

文章目录

二分类使用示例

多分类使用示例

三分类为例

二分类用softmax

pytorch的二分类、多分类、多标签等问题总结

猜你喜欢