Mmdetection/tools/train的执行流程
Mmdetection/tools/train的执行流程
tools\train.py
train_detector(
model,
datasets,
cfg,
distributed=distributed,
validate=args.validate,
logger=logger)
mmdet/apis/train.py
_dist_train(model, dataset, cfg, validate=validate)
构造dataloader,model,optimizer,runner
注册了很多的hook函数
执行runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
/mmcv/runner/runner.py
def run(self, data_loaders, workflow, max_epochs, **kwargs):
while self.epoch < max_epochs:
epoch_runner = getattr(self, mode)
epoch_runner(data_loaders[i], **kwargs)
def train(self, data_loader, **kwargs):
for i, data_batch in enumerate(data_loader):
self._inner_iter = i
self.call_hook('before_train_iter')
outputs = self.batch_processor(
self.model, data_batch, train_mode=True, **kwargs)
self.outputs = outputs
self.call_hook('after_train_iter')
self._iter += 1
self.call_hook('after_train_epoch')
可以通过注册的这些hook函数:
for hook in self._hooks:
getattr(hook, fn_name)(self)
模型的构造:
mmdetection-rotated/mmdet/apis/train.py
def _dist_train(model, dataset, cfg, validate=False):
model = MMDistributedDataParallel(model.cuda())
如果模型是RetinaNet,则执行
mmdetection-rotated/mmdet/models/detectors/retinanet.py
如果是FasterRcnn,则执行
mmdet/models/detectors/faster_rcnn.py
以RetinaNet为例:(继承SingleStageDetector:)
初始化:
mmdetection-rotated/mmdet/models/detectors/retinanet.py
class RetinaNet(SingleStageDetector):
def __init__(self,
backbone,
neck,
bbox_head,
train_cfg=None,
test_cfg=None,
pretrained=None):
super(RetinaNet, self).__init__(backbone, neck, bbox_head, train_cfg,
test_cfg, pretrained)
mmdetection-rotated/mmdet/models/detectors/single_stage.py
class SingleStageDetector(BaseDetector):
def __init__(self,
backbone,
neck=None,
bbox_head=None,
train_cfg=None,
test_cfg=None,
pretrained=None):
super(SingleStageDetector, self).__init__()
self.backbone = builder.build_backbone(backbone)
if neck is not None:
self.neck = builder.build_neck(neck)
self.bbox_head = builder.build_head(bbox_head)
self.train_cfg = train_cfg
self.test_cfg = test_cfg
self.init_weights(pretrained=pretrained)
按照配置文件中,依次构造backbone,neck,bbox_head.
以构造bbox_head为例:
配置文件:
bbox_head=dict(
type='RetinaHead',
num_classes=2,
in_channels=256,
stacked_convs=4,
feat_channels=256,
octave_base_scale=4,
scales_per_octave=3,
anchor_ratios=[0.3, 0.5, 0.8],
anchor_angle=[-60, -30, 0, 30, 60, 90],
anchor_strides=[8, 16, 32, 64, 128],
target_means=[.0, .0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0, 1.0],
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0)))
#根据type='RetinaHead'来调用响应的构造函数
self.bbox_head = builder.build_head(bbox_head)
调用mmdetection-rotated/mmdet/models/anchor_heads/retina_head.py
class RetinaHead(AnchorHead):
def __init__(self,
num_classes,
in_channels,
stacked_convs=4,
octave_base_scale=4,
scales_per_octave=3,
conv_cfg=None,
norm_cfg=None,
**kwargs):
self.stacked_convs = stacked_convs
self.octave_base_scale = octave_base_scale
self.scales_per_octave = scales_per_octave
self.conv_cfg = conv_cfg
self.norm_cfg = norm_cfg
octave_scales = np.array(
[2**(i / scales_per_octave) for i in range(scales_per_octave)])
anchor_scales = octave_scales * octave_base_scale
super(RetinaHead, self).__init__(
num_classes, in_channels, anchor_scales=anchor_scales, **kwargs)
调用了AnchorHead的构造函数:
mmdetection-rotated/mmdet/models/anchor_heads/anchor_head.py
class AnchorHead(nn.Module):
"""Anchor-based head (RPN, RetinaNet, SSD, etc.).
Args:
in_channels (int): Number of channels in the input feature map.
feat_channels (int): Number of channels of the feature map.
anchor_scales (Iterable): Anchor scales.
anchor_ratios (Iterable): Anchor aspect ratios.
anchor_strides (Iterable): Anchor strides.
anchor_base_sizes (Iterable): Anchor base sizes.
target_means (Iterable): Mean values of regression targets.
target_stds (Iterable): Std values of regression targets.
loss_cls (dict): Config of classification loss.
loss_bbox (dict): Config of localization loss.
def __init__(self,
num_classes,
in_channels,
feat_channels=256,
anchor_scales=[8, 16, 32],
anchor_ratios=[0.5, 1.0, 2.0],
anchor_angle=[-30.0, 0.0, 30.0, 60.0, 90.0, 120.0],
anchor_strides=[4, 8, 16, 32, 64],
target_means=(.0, .0, .0, .0),
target_stds=(1.0, 1.0, 1.0, 1.0),
loss_cls=dict(
type='CrossEntropyLoss',
use_sigmoid=True,
loss_weight=1.0),
loss_bbox=dict(
type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)):
#产生anchor框
for anchor_base in self.anchor_strides:
self.anchor_generators.append(AnchorGenerator(anchor_base, self.anchor_scales, self.anchor_ratios, self.anchor_angle))
模型的训练:
以retinaNet为例,retinaNet继承了SingleStageDetector,所以继承了它的训练过程
mmdet\models\detectors\single_stage.py
先提取特征,得到特征图(backbone+FPN,对应extract_feat)->然后根据特征图回归边界和分类,得到anchor对应的类别和proposals (对应bbox_head)->根据实际标注框(gt_bboxes,gt_labels,img_metas,self.train_cfg),得到各个anchor对应的label(0负样本,正整数:对应的类别,-1非正非负样本)和回归的target (一般是基于anchor和标注框的IOU来判定) -> 根据采样策略对正负样本进行采样->对采样后的样本,根据模型的输出结果和对应的label和target,计算损失losses (对应bbox_head.loss)
def forward_train(self,
img,
img_metas,
gt_bboxes,
gt_labels,
gt_bboxes_ignore=None):
x = self.extract_feat(img)
outs = self.bbox_head(x)
loss_inputs = outs + (gt_bboxes, gt_labels, img_metas, self.train_cfg)
losses = self.bbox_head.loss(
*loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)
特征提取一般是backbone+fpn
retinaNet用两个分支的卷积回归bbox offset和分类:
for i in range(self.stacked_convs):
chn = self.in_channels if i == 0 else self.feat_channels
self.cls_convs.append(
ConvModule(
chn,
self.feat_channels,
3,
stride=1,
padding=1,
conv_cfg=self.conv_cfg,
norm_cfg=self.norm_cfg))
self.reg_convs.append(
ConvModule(
chn,
self.feat_channels,
3,
stride=1,
padding=1,
conv_cfg=self.conv_cfg,
norm_cfg=self.norm_cfg))
self.retina_cls = nn.Conv2d(
self.feat_channels,
self.num_anchors * self.cls_out_channels,
3,
padding=1)
self.retina_reg = nn.Conv2d(
self.feat_channels, self.num_anchors * 5, 3, padding=1)#bbox是垂直时,有4个值;是倾斜时,有5个值
def forward_single(self, x):
cls_feat = x
reg_feat = x
for cls_conv in self.cls_convs:
cls_feat = cls_conv(cls_feat)
for reg_conv in self.reg_convs:
reg_feat = reg_conv(reg_feat)
cls_score = self.retina_cls(cls_feat)
bbox_pred = self.retina_reg(reg_feat)
return cls_score, bbox_pred
计算损失时:
mmdetection-rotated/mmdet/models/anchor_heads/anchor_head.py
self.bbox_head.loss(x):
根据标注的目标框和anchor计算每个anchor框的要回归的值和分类的label
cls_reg_targets = anchor_target( anchor_list, valid_flag_list, gt_bboxes, img_metas, self.target_means, self.target_stds, cfg, gt_bboxes_ignore_list=gt_bboxes_ignore, gt_labels_list=gt_labels, label_channels=label_channels, sampling=self.sampling)
(all_labels, all_label_weights, all_bbox_targets, all_bbox_weights, pos_inds_list, neg_inds_list) = multi_apply( anchor_target_single,anchor_list,valid_flag_list,gt_bboxes_list,gt_bboxes_ignore_list, gt_labels_list,img_metas, target_means=target_means, target_stds=target_stds, cfg=cfg, label_channels=label_channels, sampling=sampling, unmap_outputs=unmap_outputs)
其中,函数anchor_target_single的定义为:
def anchor_target_single:
bbox_assigner = build_assigner(cfg.assigner) assign_result = bbox_assigner.assign(anchors, gt_bboxes, gt_bboxes_ignore, gt_labels) bbox_sampler = PseudoSampler() sampling_result = bbox_sampler.sample(assign_result, anchors, gt_bboxes)
bbox_assigner.assign函数:计算预测框(proposals)和标注框的IOU,来定义预测的正样本,负样本,和非正非负样本。
其中,assgner的定义为
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.4,
min_pos_iou=0,
ignore_iof_thr=-1),
mmdetection-rotated/mmdet/core/bbox/assigners/max_iou_assigner_horizontal.py
mmdetection-rotated/mmdet/core/bbox/assigners/max_iou_assigner_horizontal.py
class MaxIoUAssigner(BaseAssigner):
def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None):
This method assign a gt bbox to every bbox (proposal/anchor), each bbox
will be assigned with -1, 0, or a positive number. -1 means don't care,
0 means negative sample, positive number is the index (1-based) of
assigned gt.
The assignment is done in following steps, the order matters.
1. assign every bbox to -1
2. assign proposals whose iou with all gts < neg_iou_thr to 0
3. for each bbox, if the iou with its nearest gt >= pos_iou_thr,
assign it to that bbox
4. for each gt bbox, assign its nearest proposals (may be more than
one) to itself
#通过mmdetection-rotated/mmdet/core/bbox/geometry.py的bbox_overlaps来计算proposals和标注框两两之间的的IOU
bboxes = bboxes[:, :5]
overlaps = bbox_overlaps(gt_bboxes, bboxes)
#根据IOU的预测结果来设定proposals的label为0,1,-1
if (self.ignore_iof_thr > 0) and (gt_bboxes_ignore is not None) and (
gt_bboxes_ignore.numel() > 0):
if self.ignore_wrt_candidates:
ignore_overlaps = bbox_overlaps(
bboxes, gt_bboxes_ignore, mode='iof')
ignore_max_overlaps, _ = ignore_overlaps.max(dim=1)
else:
ignore_overlaps = bbox_overlaps(
gt_bboxes_ignore, bboxes, mode='iof')
ignore_max_overlaps, _ = ignore_overlaps.max(dim=0)
overlaps[:, ignore_max_overlaps > self.ignore_iof_thr] = -1
assign_result = self.assign_wrt_overlaps(overlaps, gt_labels)
return assign_result
通过mmdetection-rotated/mmdet/core/bbox/geometry.py的bbox_overlaps来计算proposals和标注框两两之间的的IOU
Sample:根据给定的Sample策略(如hard sample (OHEM,IOU Sample),soft sample(Focal loss,GHM,PISA))等对样本(proposals)采样后来分类回归计算损失或者样本全部用来分类回归计算损失,但是样本的loss权重不同
D:\商汤项目存储\U盘内容\mmdetection_h\mmdet\core\anchor\anchor_target.py
def anchor_target_single
#当指定采样策略时,就会按照指定的采样策略采样(hard_sample);否则,就使用全部的正样本和负样本 (soft_sample)
if sampling:
assign_result, sampling_result = assign_and_sample(
anchors, gt_bboxes, gt_bboxes_ignore, None, cfg)
else:
bbox_assigner = build_assigner(cfg.assigner)
assign_result = bbox_assigner.assign(anchors, gt_bboxes,
gt_bboxes_ignore, gt_labels)
bbox_sampler = PseudoSampler()#使用全部的正样本和负样本(soft_sample)
sampling_result = bbox_sampler.sample(assign_result, anchors,
gt_bboxes)
anchor_target_single最终会得到该张图中所有的proposal的(labels, label_weights, bbox_targets, bbox_weights, pos_inds, neg_inds)。
其中label指proposal的类被(0负样本,1正样本(或者其他正整数,表示anchor所代表的类别),-1非正非负样本);
label_weights给定正样本,负样本或者不同类别的目标不同的分类的权重,可以用来平衡类别不平衡的现像
bbox_targets 根据anchor和实际的标注框,通过bbox2delta计算要回归的目标值, bbox_weights给定回归在损失函数中所占的权重
pos_inds,neg_ids表示样本中正负样本的索引
#bbox2delta,给定anchor的中心点坐标和宽高和角度,实际的标注框的中心点坐标和宽高和角度,计算bbox reg时要回归的值
def bbox2delta(proposals, gt, means=[0, 0, 0, 0, 0], stds=[1, 1, 1, 1, 1]):
assert proposals.size() == gt.size()
proposals = proposals.float()
gt = gt.float()
px = proposals[..., 0]
py = proposals[..., 1]
pw = proposals[..., 2]
ph = proposals[..., 3]
pa = proposals[:, 4]
gx = gt[..., 0]
gy = gt[..., 1]
gw = gt[..., 2]
gh = gt[..., 3]
ga = gt[..., 4]
dx = (gx - px) / pw
dy = (gy - py) / ph
dw = torch.log(gw / pw)
dh = torch.log(gh / ph)
da = (ga - pa) * np.pi / 180
deltas = torch.stack([dx, dy, dw, dh, da], dim=-1)
means = deltas.new_tensor(means).unsqueeze(0)
stds = deltas.new_tensor(stds).unsqueeze(0)
deltas = deltas.sub_(means).div_(stds)
(3)对采样后的正样本和负样本计算损失
mmdetection-rotated/mmdet/models/anchor_heads/anchor_head.py
class AnchorHead(nn.Module):
def __init__()#在init函数中设定损失函数
self.loss_cls = build_loss(loss_cls)
self.loss_bbox = build_loss(loss_bbox)
其中,我们cfg文件中对loss的定义为:
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0)))
类别损失函数:
定义;mmdetection-rotated/mmdet/models/losses/focal_loss.py
loss_cls = self.loss_weight * sigmoid_focal_loss(
pred,
target,
weight,
gamma=self.gamma,
alpha=self.alpha,
reduction=reduction,
avg_factor=avg_factor)
bbox的回归损失函数:
mmdetection-rotated/mmdet/models/losses/smooth_l1_loss.py
def smooth_l1_loss(pred, target, beta=1.0):
assert beta > 0
assert pred.size() == target.size() and target.numel() > 0
diff = torch.abs(pred - target)
loss = torch.where(diff < beta, 0.5 * diff * diff / beta,
diff - 0.5 * beta)
return loss
#
def loss():
(1)先计算各个anchor框的类别和正样本要回归的target
(2)对正负样本采样
(3)计算损失
losses_cls, losses_bbox = multi_apply(
self.loss_single,
cls_scores,
bbox_preds,
labels_list,
label_weights_list,
bbox_targets_list,
bbox_weights_list,
num_total_samples=num_total_samples,
cfg=cfg)
其中,self.loss_angle的定义为:
def loss_single(self, cls_score, bbox_pred, labels, label_weights, bbox_targets, bbox_weights, num_total_samples, cfg):
loss_cls = self.loss_cls(
cls_score, labels, label_weights, avg_factor=num_total_samples)
loss_bbox = self.loss_bbox(
bbox_pred,
bbox_targets,
bbox_weights,
avg_factor=num_total_samples)
return loss_cls, loss_bbox
模型的推理(测试时):
先提取特征,得到特征图(backbone+FPN)->然后根据特征图回归边界和分类,得到anchor对应的类别和proposals->取出图像中的前面N个得分最高的proposals,并将预测的Δ转换为bbox,方便后期计算各个proposals的IOU;用nms抑制重复预测的proposals
def simple_test(self, img, img_meta, rescale=False):
x = self.extract_feat(img)
outs = self.bbox_head(x)
bbox_inputs = outs + (img_meta, self.test_cfg, rescale)
bbox_list = self.bbox_head.get_bboxes(*bbox_inputs)
bbox_results = [
bbox2result(det_bboxes, det_labels, self.bbox_head.num_classes)
for det_bboxes, det_labels in bbox_list
]
return bbox_results[0]
其中, self.bbox_head.get_bboxes(*bbox_inputs)是取出图像中的前面N个得分最高的proposals,并将预测的Δ转换为bbox,方便后期计算各个proposals的IOU;用nms抑制重复预测的proposals
mmdetection-rotated/mmdet/models/anchor_heads/anchor_head.py
def get_bboxes(self, cls_scores, bbox_preds, img_metas, cfg,rescale=False):
proposals = self.get_bboxes_single(cls_score_list, bbox_pred_list, mlvl_anchors, img_shape,scale_factor, cfg, rescale)
_, topk_inds = max_scores.topk(nms_pre) #先选出前n个得分最高的proposals
bboxes = delta2bbox(anchors, bbox_pred, self.target_means,self.target_stds, img_shape) #将模型预测的Δ转为bbox
det_bboxes, det_labels = multiclass_nms(mlvl_bboxes, mlvl_scores,cfg.score_thr, cfg.nms, cfg.max_per_img)#通过NMS抑制重复预测的anchor框
return det_bboxes, det_labels