detectron代码理解(三):RPN构建与相应的损失函数

1.RPN的构建

对RPN的构建在FPN.py的add_fpn_rpn_output函数中

def add_fpn_rpn_outputs(model, blobs_in, dim_in, spatial_scales):
    """Add RPN on FPN specific outputs."""
    """
    blobs_in:
    [BlobReference("gpu_0/fpn_res5_2_sum_subsampled_2x"),
    BlobReference("gpu_0/fpn_res5_2_sum"), 
    BlobReference("gpu_0/fpn_res4_5_sum"), 
    BlobReference("gpu_0/fpn_res3_3_sum"), 
    BlobReference("gpu_0/fpn_res2_2_sum")]
    以BlobReference("gpu_0/fpn_res2_2_sum")为例
    3×3的卷积后,名称变为conv_rpn_fpn2,featuremap的个数为256
    增加分类层后,名称变为rpn_cls_logits_fpn2
    增加回归层后,名称变为rpn_bbox_pred_fpn2
    
    """
    
    num_anchors = len(cfg.FPN.RPN_ASPECT_RATIOS) #针对FPN设置的RPN,每一层的相应位置只产生RPN_ASPECT_RATIOS个anchor,面积是固定的。随着层数的增加,面积增加
    dim_out = dim_in  #经过FPN后,特征的维度是256

    k_max = cfg.FPN.RPN_MAX_LEVEL  # coarsest level of pyramid 这里为6
    k_min = cfg.FPN.RPN_MIN_LEVEL  # finest level of pyramid   这里为2
    assert len(blobs_in) == k_max - k_min + 1  
    for lvl in range(k_min, k_max + 1):  #从FPN的P2层开始,依次对于每一层FPN增加RPN的输出
        bl_in = blobs_in[k_max - lvl]  # blobs_in is in reversed order 
        sc = spatial_scales[k_max - lvl]  # in reversed order
        slvl = str(lvl)

        if lvl == k_min:
            # Create conv ops with randomly initialized weights and
            # zeroed biases for the first FPN level; these will be shared by
            # all other FPN levels
            # RPN hidden representation
            #首先完成3×3的卷积先(用来语义空间转换?)
            conv_rpn_fpn = model.Conv(
                bl_in,                 #输入的blob名称
                'conv_rpn_fpn' + slvl,   #输出的blob名称
                dim_in,                 #输入维度:256
                dim_out,                #输出维度:256
                kernel=3,
                pad=1,
                stride=1,
                weight_init=gauss_fill(0.01),
                bias_init=const_fill(0.0)
            )
            model.Relu(conv_rpn_fpn, conv_rpn_fpn)
            # Proposal classification scores   增加逻辑分类层,3个anchor
            rpn_cls_logits_fpn = model.Conv(
                conv_rpn_fpn,
                'rpn_cls_logits_fpn' + slvl,
                dim_in,
                num_anchors,    
                kernel=1,
                pad=0,
                stride=1,
                weight_init=gauss_fill(0.01),
                bias_init=const_fill(0.0)
            )
            # Proposal bbox regression deltas 增加回归层,3个anchor,每一个anchor4个参数
            rpn_bbox_pred_fpn = model.Conv(
                conv_rpn_fpn,
                'rpn_bbox_pred_fpn' + slvl,
                dim_in,
                4 * num_anchors,
                kernel=1,
                pad=0,
                stride=1,
                weight_init=gauss_fill(0.01),
                bias_init=const_fill(0.0)
            )
            print(conv_rpn_fpn,rpn_cls_logits_fpn, rpn_bbox_pred_fpn)
        else:
            # Share weights and biases,共享W和b,也就是每一次都采用P2层用的W和b
            sk_min = str(k_min)
            # RPN hidden representation
            conv_rpn_fpn = model.ConvShared(
                bl_in,
                'conv_rpn_fpn' + slvl,
                dim_in,
                dim_out,
                kernel=3,
                pad=1,
                stride=1,
                weight='conv_rpn_fpn' + sk_min + '_w',
                bias='conv_rpn_fpn' + sk_min + '_b'
            )
            model.Relu(conv_rpn_fpn, conv_rpn_fpn)
            # Proposal classification scores
            rpn_cls_logits_fpn = model.ConvShared(
                conv_rpn_fpn,
                'rpn_cls_logits_fpn' + slvl,
                dim_in,
                num_anchors,
                kernel=1,
                pad=0,
                stride=1,
                weight='rpn_cls_logits_fpn' + sk_min + '_w',
                bias='rpn_cls_logits_fpn' + sk_min + '_b'
            )
            # Proposal bbox regression deltas
            rpn_bbox_pred_fpn = model.ConvShared(
                conv_rpn_fpn,
                'rpn_bbox_pred_fpn' + slvl,
                dim_in,
                4 * num_anchors,
                kernel=1,
                pad=0,
                stride=1,
                weight='rpn_bbox_pred_fpn' + sk_min + '_w',
                bias='rpn_bbox_pred_fpn' + sk_min + '_b'
            )
            print(conv_rpn_fpn,rpn_cls_logits_fpn, rpn_bbox_pred_fpn)

        if not model.train or cfg.MODEL.FASTER_RCNN:
            # Proposals are needed during:
            #  1) inference (== not model.train) for RPN only and Faster R-CNN
            #  OR
            #  2) training for Faster R-CNN
            # Otherwise (== training for RPN only), proposals are not needed
            lvl_anchors = generate_anchors(
                stride=2.**lvl,
                sizes=(cfg.FPN.RPN_ANCHOR_START_SIZE * 2.**(lvl - k_min), ),
                aspect_ratios=cfg.FPN.RPN_ASPECT_RATIOS
            )
            #对于逻辑回归层增加sigmoid激活
            rpn_cls_probs_fpn = model.net.Sigmoid(
                rpn_cls_logits_fpn, 'rpn_cls_probs_fpn' + slvl
            )
            print(rpn_cls_probs_fpn,rpn_bbox_pred_fpn,'rpn_rois_fpn' + slvl,'rpn_roi_probs_fpn' + slvl)
            model.GenerateProposals(
                [rpn_cls_probs_fpn, rpn_bbox_pred_fpn, 'im_info'],
                ['rpn_rois_fpn' + slvl, 'rpn_roi_probs_fpn' + slvl],
                anchors=lvl_anchors,
                spatial_scale=sc
            )#产生的proposal将以rpn_rois_fpn_x命名,rpn_roi_probs_fpn_x将会表示每一个proposal的分数

对于每一层FPN

层数 语义转换 逻辑分类(sigmoid激活后 回归层 输出rois
P2 conv_rpn_fpn2 rpn_cls_logits_fpn2(rpn_cls_probs_fpn2 rpn_cls_probs_fpn2 rpn_rois_fpn2 rpn_roi_probs_fpn2
P3 conv_rpn_fpn2 rpn_cls_logits_fpn3(rpn_cls_probs_fpn3 rpn_cls_probs_fpn3 rpn_rois_fpn3 rpn_roi_probs_fpn3
P4 conv_rpn_fpn4 rpn_cls_logits_fpn4(rpn_cls_probs_fpn4 rpn_cls_probs_fpn4 rpn_rois_fpn4 rpn_roi_probs_fpn4
P5 conv_rpn_fpn5 rpn_cls_logits_fpn5(rpn_cls_probs_fpn5 rpn_cls_probs_fpn5 rpn_rois_fpn5 rpn_roi_probs_fpn5
P6 conv_rpn_fpn6 rpn_cls_logits_fpn6(rpn_cls_probs_fpn6 rpn_cls_probs_fpn6 rpn_rois_fpn6 rpn_roi_probs_fpn6

黑色加粗部分是产生proposal(对应函数为model.GenerateProposals)的输入,红色加粗部分是产生proposal的输出。

完成的内容是:

  • 从约20000个anchors中选取概率较大的 12000 个 anchor

  • 利用回归的位置参数,修正这 12000 个 anchor 的位置,得到 RoIs

  • 利用非极大值((Non-maximum suppression, NMS)抑制,选出概率最大的 2000 个 RoIs

2.为RPN构建损失

def add_fpn_rpn_losses(model):
    """Add RPN on FPN specific losses."""
    loss_gradients = {}
    for lvl in range(cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL + 1):
        slvl = str(lvl)
        # Spatially narrow the full-sized RPN label arrays to match the feature map
        # shape
        model.net.SpatialNarrowAs(
            ['rpn_labels_int32_wide_fpn' + slvl, 'rpn_cls_logits_fpn' + slvl],
            'rpn_labels_int32_fpn' + slvl
        )
        for key in ('targets', 'inside_weights', 'outside_weights'):
            model.net.SpatialNarrowAs(
                [
                    'rpn_bbox_' + key + '_wide_fpn' + slvl,
                    'rpn_bbox_pred_fpn' + slvl
                ],
                'rpn_bbox_' + key + '_fpn' + slvl
            )
        loss_rpn_cls_fpn = model.net.SigmoidCrossEntropyLoss(
            ['rpn_cls_logits_fpn' + slvl, 'rpn_labels_int32_fpn' + slvl],
            'loss_rpn_cls_fpn' + slvl,
            normalize=0,
            scale=(
                model.GetLossScale() / cfg.TRAIN.RPN_BATCH_SIZE_PER_IM /
                cfg.TRAIN.IMS_PER_BATCH
            )
        )
        # Normalization by (1) RPN_BATCH_SIZE_PER_IM and (2) IMS_PER_BATCH is
        # handled by (1) setting bbox outside weights and (2) SmoothL1Loss
        # normalizes by IMS_PER_BATCH
        loss_rpn_bbox_fpn = model.net.SmoothL1Loss(
            [
                'rpn_bbox_pred_fpn' + slvl, 'rpn_bbox_targets_fpn' + slvl,
                'rpn_bbox_inside_weights_fpn' + slvl,
                'rpn_bbox_outside_weights_fpn' + slvl
            ],
            'loss_rpn_bbox_fpn' + slvl,
            beta=1. / 9.,
            scale=model.GetLossScale(),
        )
        loss_gradients.update(
            blob_utils.
            get_loss_gradients(model, [loss_rpn_cls_fpn, loss_rpn_bbox_fpn])
        )
        model.AddLosses(['loss_rpn_cls_fpn' + slvl, 'loss_rpn_bbox_fpn' + slvl])
    return loss_gradients

以P2层为例:

[u'rpn_labels_int32_wide_fpn2', u'rpn_cls_logits_fpn2']  ——> rpn_labels_int32_fpn2
[u'rpn_bbox_targets_wide_fpn2', u'rpn_bbox_pred_fpn2'] ——> rpn_bbox_targets_fpn2
[u'rpn_bbox_inside_weights_wide_fpn2', u'rpn_bbox_pred_fpn2'] ——> rpn_bbox_inside_weights_fpn2
[u'rpn_bbox_outside_weights_wide_fpn2', u'rpn_bbox_pred_fpn2'] ——> rpn_bbox_outside_weights_fpn2
[u'rpn_cls_logits_fpn2', u'rpn_labels_int32_fpn2'] ——> loss_rpn_cls_fpn2   #RPN的分类损失
[u'rpn_bbox_pred_fpn2', u'rpn_bbox_targets_fpn2', u'rpn_bbox_inside_weights_fpn2', u'rpn_bbox_outside_weights_fpn2'] ——> loss_rpn_bbox_fpn2  #RPN的边框损失

猜你喜欢

转载自blog.csdn.net/Mr_health/article/details/84624486