三、test.py解析
tools/test.py负责对已经训练好的模型进行评估,程序代码整体比较长,这里按照程序流程来部分梳理
3.1 函数入口
args = parse_args()
assert args.out or args.show or args.json_out, \
('Please specify at least one operation (save or show the results) '
'with the argument "--out" or "--show" or "--json_out"')
if args.out is not None and not args.out.endswith(('.pkl', '.pickle')):
raise ValueError('The output file must be a pkl file.')
if args.json_out is not None and args.json_out.endswith('.json'):
args.json_out = args.json_out[:-5]
cfg = mmcv.Config.fromfile(args.config) # 读取配置文件加载dict
# set cudnn_benchmark
# 单尺度开启可以加速
if cfg.get('cudnn_benchmark', False):
torch.backends.cudnn.benchmark = True
cfg.model.pretrained = None
cfg.data.test.test_mode = True
# init distributed env first, since logger depends on the dist info.
if args.launcher == 'none':
distributed = False
else:
distributed = True
init_dist(args.launcher, **cfg.dist_params)
# build the dataloader
# TODO: support multiple images per gpu (only minor changes are needed)
dataset = build_dataset(cfg.data.test)
data_loader = build_dataloader(
dataset,
imgs_per_gpu=1,
workers_per_gpu=cfg.data.workers_per_gpu,
dist=distributed,
shuffle=False)
# build the model and load checkpoint
model = build_detector(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg)
fp16_cfg = cfg.get('fp16', None)
if fp16_cfg is not None:
wrap_fp16_model(model)
checkpoint = load_checkpoint(model, args.checkpoint, map_location='cpu')
# old versions did not save class info in checkpoints, this walkaround is
# for backward compatibility
if 'CLASSES' in checkpoint['meta']:
model.CLASSES = checkpoint['meta']['CLASSES']
else:
model.CLASSES = dataset.CLASSES
if not distributed:
model = MMDataParallel(model, device_ids=[0])
outputs = single_gpu_test(model, data_loader, args.show)
else:
model = MMDistributedDataParallel(model.cuda())
outputs = multi_gpu_test(model, data_loader, args.tmpdir)
rank, _ = get_dist_info()
if args.out and rank == 0:
print('\nwriting results to {}'.format(args.out))
mmcv.dump(outputs, args.out)
eval_types = args.eval
if eval_types:
print('Starting evaluate {}'.format(' and '.join(eval_types)))
if eval_types == ['proposal_fast']:
result_file = args.out
coco_eval(result_file, eval_types, dataset.coco)
else:
if not isinstance(outputs[0], dict):
result_files = results2json(dataset, outputs, args.out)
coco_eval(result_files, eval_types, dataset.coco)
else:
for name in outputs[0]:
print('\nEvaluating {}'.format(name))
outputs_ = [out[name] for out in outputs]
result_file = args.out + '.{}'.format(name)
result_files = results2json(dataset, outputs_,
result_file)
coco_eval(result_files, eval_types, dataset.coco)
# Save predictions in the COCO json format
if args.json_out and rank == 0:
if not isinstance(outputs[0], dict):
results2json(dataset, outputs, args.json_out)
else:
for name in outputs[0]:
outputs_ = [out[name] for out in outputs]
result_file = args.json_out + '.{}'.format(name)
results2json(dataset, outputs_, result_file)
if __name__ == '__main__':
main()
3.2 参数读取
首先是参数输入
def parse_args():
parser = argparse.ArgumentParser(description='MMDet test detector')
parser.add_argument('config', help='test config file path')
parser.add_argument('checkpoint', help='checkpoint file')
parser.add_argument('--out', help='output result file')
parser.add_argument(
'--json_out',
help='output result file name without extension',
type=str)
parser.add_argument(
'--eval',
type=str,
nargs='+',
choices=['proposal', 'proposal_fast', 'bbox', 'segm', 'keypoints'],
help='eval types')
parser.add_argument('--show', action='store_true', help='show results')
parser.add_argument('--tmpdir', help='tmp dir for writing some results')
parser.add_argument(
'--launcher',
choices=['none', 'pytorch', 'slurm', 'mpi'],
default='none',
help='job launcher')
parser.add_argument('--local_rank', type=int, default=0)
args = parser.parse_args()
if 'LOCAL_RANK' not in os.environ:
os.environ['LOCAL_RANK'] = str(args.local_rank)
return args
这里参照上面的代码解释,就可以知道怎么用了,程序进去后,对参数进行了一定的要求。
后面就来到了读取配置文件,设置pretrained=None,test_mode=True
cfg = mmcv.Config.fromfile(args.config)
# set cudnn_benchmark
if cfg.get('cudnn_benchmark', False):
torch.backends.cudnn.benchmark = True
cfg.model.pretrained = None
cfg.data.test.test_mode = True
读取分布式信息
# init distributed env first, since logger depends on the dist info.
if args.launcher == 'none':
distributed = False
else:
distributed = True
init_dist(args.launcher, **cfg.dist_params)
这里如果你没有设置的话,默认是非分布式训练,如果设置了分布式训练,这里就会读取分布式训练配置信息。
接下来就开始读取数据集test配置的信息了
# build the dataloader
# TODO: support multiple images per gpu (only minor changes are needed)
dataset = build_dataset(cfg.data.test)
data_loader = build_dataloader(
dataset,
imgs_per_gpu=1,#每次测试的时候1张图片
workers_per_gpu=cfg.data.workers_per_gpu,
dist=distributed,
shuffle=False)#打乱每次测试的数据
主要是通过读取cfg.data.test内部参数,
3.3 导入model和checkpoints
# build the model and load checkpoint
model = build_detector(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg)
fp16_cfg = cfg.get('fp16', None)
if fp16_cfg is not None:
wrap_fp16_model(model)
checkpoint = load_checkpoint(model, args.checkpoint, map_location='cpu')
这里我们可以看到build_detector函数,
def build_detector(cfg, train_cfg=None, test_cfg=None):
return build(cfg, DETECTORS, dict(train_cfg=train_cfg, test_cfg=test_cfg))
def build(cfg, registry, default_args=None):
if isinstance(cfg, list):
modules = [
build_from_cfg(cfg_, registry, default_args) for cfg_ in cfg
]
return nn.Sequential(*modules)
else:
return build_from_cfg(cfg, registry, default_args)
模型会读取config文件夹下面的train_cfg和test_cfg
# training and testing settings
train_cfg = dict(
assigner=dict(
type='MaxIoUAssigner',#rpn网络的正负样本划分
pos_iou_thr=0.5, #正样本的IOU阈值
neg_iou_thr=0.4, #负样本的IOU阈值
min_pos_iou=0, #正样本的IOU最小值
ignore_iof_thr=-1), #忽略bbox的阈值
allowed_border=-1, #不允许在bbox周围外扩一定的像素
pos_weight=-1, #正负样本权重,-1表示不改变原始的权重
debug=False)
test_cfg = dict(
nms_pre=1000,
min_bbox_size=0,
score_thr=0.05,
nms=dict(type='nms', iou_thr=0.5),
#nms=dict(type='soft_nms', iou_thr=0.5),
max_per_img=100)
有了数据集信息,模型参数和checkpoints,就可以测试结果了
扫描二维码关注公众号,回复:
9143965 查看本文章
# old versions did not save class info in checkpoints, this walkaround is
# for backward compatibility
if 'CLASSES' in checkpoint['meta']:
model.CLASSES = checkpoint['meta']['CLASSES']
else:
model.CLASSES = dataset.CLASSES
if not distributed:
model = MMDataParallel(model, device_ids=[0])
outputs = single_gpu_test(model, data_loader, args.show)
else:
model = MMDistributedDataParallel(model.cuda())
outputs = multi_gpu_test(model, data_loader, args.tmpdir)
根据输入的信息,对应选择进去到单GPU和多GPU测试
def single_gpu_test(model, data_loader, show=False):
model.eval()
results = []
dataset = data_loader.dataset
prog_bar = mmcv.ProgressBar(len(dataset))
for i, data in enumerate(data_loader):
with torch.no_grad():
result = model(return_loss=False, rescale=not show, **data)
results.append(result)
if show:
model.module.show_result(data, result)
batch_size = data['img'][0].size(0)
for _ in range(batch_size):
prog_bar.update()
return results
def multi_gpu_test(model, data_loader, tmpdir=None):
model.eval()
results = []
dataset = data_loader.dataset
rank, world_size = get_dist_info()
if rank == 0:
prog_bar = mmcv.ProgressBar(len(dataset))
for i, data in enumerate(data_loader):
with torch.no_grad():
result = model(return_loss=False, rescale=True, **data)
results.append(result)
if rank == 0:
batch_size = data['img'][0].size(0)
for _ in range(batch_size * world_size):
prog_bar.update()
# collect results from all ranks
results = collect_results(results, len(dataset), tmpdir)
return results
无论是单GPU测试和多GPU测试,模型都调用了torch.nn.eval()函数,进入评估部分
通过遍历整个data_loader,取消梯度运算,,调用model函数最终的结果都append到一个results的一个数组里面,最后在进度条中显示出来。
for i, data in enumerate(data_loader):
with torch.no_grad():
result = model(return_loss=False, rescale=True, **data)
results.append(result)
if show:
model.module.show_result(data, result)
batch_size = data['img'][0].size(0)
for _ in range(batch_size):
prog_bar.update() #进度条更新
如果你没有设置eval_types的话,测试的最终结果就会保存在–out=eval/results.pkl文件下。具体测评方式在下面章节会继续讲到。
其他参考
这里是mmdetection入门介绍 前言 部分
这里是mmdetection入门介绍 test.py解析 部分
这里是mmdetection入门介绍 train.py解析 部分
这里是mmdetection入门介绍 模型解析 部分