前言
这里是loss的续,将刚才没写完的部分写完,主要是关于anchor_target_layer,也就是rpn部分。
anchor_target_layer()
def anchor_target_layer(cls_pred, bbox, im_info, scope_name):
with tf.variable_scope(scope_name) as scope:
# 'rpn_cls_score', 'gt_boxes', 'im_info'
rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = \
tf.py_func(anchor_target_layer_py,
[cls_pred, bbox, im_info, [16, ], [16]],
[tf.float32, tf.float32, tf.float32, tf.float32])
rpn_labels = tf.convert_to_tensor(tf.cast(rpn_labels, tf.int32),
name='rpn_labels')
rpn_bbox_targets = tf.convert_to_tensor(rpn_bbox_targets,
name='rpn_bbox_targets')
rpn_bbox_inside_weights = tf.convert_to_tensor(rpn_bbox_inside_weights,
name='rpn_bbox_inside_weights')
rpn_bbox_outside_weights = tf.convert_to_tensor(rpn_bbox_outside_weights,
name='rpn_bbox_outside_weights')
return [rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights]
tf.convert_to_tensor这是个很有用的函数,我们经常需要将python的数据类型转换成TensorFlow可用的tensor数据类型,所以仔细研究一下这个函数还是很有必要的,其实这块我们看到的只对类的重写,真正的函数在这:
"""
Assign anchors to ground-truth targets. Produces anchor classification
labels and bounding-box regression targets.
将锚点分配给真实目标。 生成锚点分类标签和边界框回归目标。
Parameters
----------
rpn_cls_score: (1, H, W, Ax2) bg/fg scores of previous conv layer 是前景还是背景的分类概率
gt_boxes: (G, 5) vstack of [x1, y1, x2, y2, class] 真实标签
im_info: a list of [image_height, image_width, scale_ratios] 图像信息
_feat_stride: the downsampling ratio of feature map to the original input image 原始图像到特征图的下采样率
anchor_scales: the scales to the basic_anchor (basic anchor is [16, 16]) 基本锚点的大小
----------
Returns 返回值
----------
rpn_labels : (HxWxA, 1), for each anchor, 0 denotes bg, 1 fg, -1 dontcare 0表示背景,1是前景,-1不关心
rpn_bbox_targets: (HxWxA, 4), distances of the anchors to the gt_boxes(may contains some transform)
that are the regression objectives 锚点到作为回归目标的gt_boxes(可能包含一些变换)的距离
rpn_bbox_inside_weights: (HxWxA, 4) weights of each boxes, mainly accepts hyper param in cfg 每个box的权重,主要在cfg中接受超级参数
rpn_bbox_outside_weights: (HxWxA, 4) used to balance the fg/bg, 用于平衡前景背景
beacuse the numbers of bgs and fgs mays significiantly different 因为ngs和fgs的数量可能有很大的不同
"""
def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, _feat_stride=[16, ], anchor_scales=[16, ]):
_anchors = generate_anchors(scales=np.array(anchor_scales)) # 生成基本的anchor,一共10个,shape(10,4)
_num_anchors = _anchors.shape[0] # 10个anchor
if DEBUG:
print('anchors:')
print(_anchors)
print('anchor shapes:')
print(np.hstack((
_anchors[:, 2::4] - _anchors[:, 0::4],
_anchors[:, 3::4] - _anchors[:, 1::4],
)))
_counts = cfg.EPS
_sums = np.zeros((1, 4))
_squared_sums = np.zeros((1, 4))
_fg_sum = 0
_bg_sum = 0
_count = 0
# allow boxes to sit over the edge by a small amount 允许boxes超出图像边界的阈值
_allowed_border = 0 #不允许超出图像边界
# map of shape (..., H, W)
# height, width = rpn_cls_score.shape[1:3]
im_info = im_info[0] #获取图像的高宽及通道数
if DEBUG:
print("im_info: ", im_info)
# 在feature-map上定位anchor,并加上delta,得到在实际图像中anchor的真实坐标
# Algorithm:
# for each (H, W) location i
# generate 9 anchor boxes centered on cell i
# apply predicted bbox deltas at cell i to each of the 9 anchors
# filter out-of-image anchors
# measure GT overlap
# assert语句的格式是【assert 表达式,返回数据】,当表达式为False时则触发AssertionError异常
assert rpn_cls_score.shape[0] == 1, 'Only single item batches are supported' # 一次只能传入一张图
# map of shape (..., H, W)
height, width = rpn_cls_score.shape[1:3] # feature-map的高宽
if DEBUG:
print('AnchorTargetLayer: height', height, 'width', width)
print('')
print('im_size: ({}, {})'.format(im_info[0], im_info[1]))
print('scale: {}'.format(im_info[2]))
print('height, width: ({}, {})'.format(height, width))
print('rpn: gt_boxes.shape', gt_boxes.shape)
print('rpn: gt_boxes', gt_boxes)
# 1. Generate proposals from bbox deltas and shifted anchors
shift_x = np.arange(0, width) * _feat_stride #_feat_stride=[16]
shift_y = np.arange(0, height) * _feat_stride
shift_x, shift_y = np.meshgrid(shift_x, shift_y) # in W H order
# K is H x W 1938=38*51
shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), # ravel()将多维数组转换为一维数组,如果没有必要,不会产生源数据的副本
shift_x.ravel(), shift_y.ravel())).transpose() # 生成feature-map和真实image上anchor之间的偏移量,shape(1938,4)
# add A anchors (1, A, 4) to
# cell K shifts (K, 1, 4) to get
# shift anchors (K, A, 4)
# reshape to (K*A, 4) shifted anchors
A = _num_anchors # 10个anchor
K = shifts.shape[0] # 50*38,feature-map的宽乘高的大小
all_anchors = (_anchors.reshape((1, A, 4)) +
shifts.reshape((1, K, 4)).transpose((1, 0, 2))) # 相当于复制宽高的维度,然后相加 shape(1938,10,4)
all_anchors = all_anchors.reshape((K * A, 4)) # shape(19380,4)
total_anchors = int(K * A) # 1938*10=19380
# only keep anchors inside the image
# 仅保留那些还在图像内部的anchor,超出图像的都删掉
inds_inside = np.where(
(all_anchors[:, 0] >= -_allowed_border) &
(all_anchors[:, 1] >= -_allowed_border) &
(all_anchors[:, 2] < im_info[1] + _allowed_border) & # width
(all_anchors[:, 3] < im_info[0] + _allowed_border) # height
)[0] # 获得在图像内部的anchor索引
if DEBUG:
print('total_anchors', total_anchors)
print('inds_inside', len(inds_inside))
# keep only inside anchors
anchors = all_anchors[inds_inside, :] # 根据在图像内部的anchor索引获取那些在图像内的anchor
if DEBUG:
print('anchors.shape', anchors.shape)
# 至此,anchor准备好了
# --------------------------------------------------------------
# label: 1 is positive, 0 is negative, -1 is dont care 1是前景,0是背景,-1不关心
# (A)
labels = np.empty((len(inds_inside),), dtype=np.float32) #根据在图像内部的anchor数量,创建标签列表
labels.fill(-1) # 初始化label,均为-1
# overlaps between the anchors and the gt boxes
# overlaps (ex, gt), shape is A x G
# 计算anchor和gt-box的overlap,用来给anchor上标签
overlaps = bbox_overlaps(
np.ascontiguousarray(anchors, dtype=np.float),
np.ascontiguousarray(gt_boxes, dtype=np.float)) # 假设anchors有x个,gt_boxes有y个,返回的是一个(x,y)的数组
# 存放每一个anchor和每一个gtbox之间的overlap
argmax_overlaps = overlaps.argmax(axis=1) # (A)#找到和每一个gtbox,overlap最大的那个anchor
max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
gt_argmax_overlaps = overlaps.argmax(axis=0) # G#找到每个位置上10个anchor中与gtbox,overlap最大的那个
gt_max_overlaps = overlaps[gt_argmax_overlaps,
np.arange(overlaps.shape[1])]
gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
if not cfg.RPN_CLOBBER_POSITIVES:
# assign bg labels first so that positive labels can clobber them
labels[max_overlaps < cfg.RPN_NEGATIVE_OVERLAP] = 0 # 先给背景上标签,小于0.3overlap的
# fg label: for each gt, anchor with highest overlap
labels[gt_argmax_overlaps] = 1 # 每个位置上的10个anchor中overlap最大的认为是前景
# fg label: above threshold IOU
labels[max_overlaps >= cfg.RPN_POSITIVE_OVERLAP] = 1 # overlap大于0.7的认为是前景
if cfg.RPN_CLOBBER_POSITIVES:
# assign bg labels last so that negative labels can clobber positives
labels[max_overlaps < cfg.RPN_NEGATIVE_OVERLAP] = 0
# subsample positive labels if we have too many
# 对正样本进行采样,如果正样本的数量太多的话
# 限制正样本的数量不超过128个
num_fg = int(cfg.RPN_FG_FRACTION * cfg.RPN_BATCHSIZE)
fg_inds = np.where(labels == 1)[0]
if len(fg_inds) > num_fg:
disable_inds = npr.choice(
fg_inds, size=(len(fg_inds) - num_fg), replace=False) # 随机去除掉一些正样本
labels[disable_inds] = -1 # 变为-1
# subsample negative labels if we have too many
# 对负样本进行采样,如果负样本的数量太多的话
# 正负样本总数是256,限制正样本数目最多128,
# 如果正样本数量小于128,差的那些就用负样本补上,凑齐256个样本
num_bg = cfg.RPN_BATCHSIZE - np.sum(labels == 1)
bg_inds = np.where(labels == 0)[0]
if len(bg_inds) > num_bg:
disable_inds = npr.choice(
bg_inds, size=(len(bg_inds) - num_bg), replace=False)
labels[disable_inds] = -1
# print "was %s inds, disabling %s, now %s inds" % (
# len(bg_inds), len(disable_inds), np.sum(labels == 0))
# 至此, 上好标签,开始计算rpn-box的真值
# --------------------------------------------------------------
bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) # 根据anchor和gtbox计算得真值(anchor和gtbox之间的偏差)
bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
bbox_inside_weights[labels == 1, :] = np.array(cfg.RPN_BBOX_INSIDE_WEIGHTS) # 内部权重,前景就给1,其他是0
bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
if cfg.RPN_POSITIVE_WEIGHT < 0: # 暂时使用uniform 权重,也就是正样本是1,负样本是0
# uniform weighting of examples (given non-uniform sampling)
num_examples = np.sum(labels >= 0) + 1
# positive_weights = np.ones((1, 4)) * 1.0 / num_examples
# negative_weights = np.ones((1, 4)) * 1.0 / num_examples
positive_weights = np.ones((1, 4))
negative_weights = np.zeros((1, 4))
else:
assert ((cfg.RPN_POSITIVE_WEIGHT > 0) &
(cfg.RPN_POSITIVE_WEIGHT < 1))
positive_weights = (cfg.RPN_POSITIVE_WEIGHT /
(np.sum(labels == 1)) + 1)
negative_weights = ((1.0 - cfg.RPN_POSITIVE_WEIGHT) /
(np.sum(labels == 0)) + 1)
bbox_outside_weights[labels == 1, :] = positive_weights # 外部权重,前景是1,背景是0
bbox_outside_weights[labels == 0, :] = negative_weights
if DEBUG:
_sums += bbox_targets[labels == 1, :].sum(axis=0)
_squared_sums += (bbox_targets[labels == 1, :] ** 2).sum(axis=0)
_counts += np.sum(labels == 1)
means = _sums / _counts
stds = np.sqrt(_squared_sums / _counts - means ** 2)
print('means:')
print(means)
print('stdevs:')
print(stds)
# map up to original set of anchors
# 一开始是将超出图像范围的anchor直接丢掉的,现在在加回来
labels = _unmap(labels, total_anchors, inds_inside, fill=-1) # 这些anchor的label是-1,也即dontcare
bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) # 这些anchor的真值是0,也即没有值
bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) # 内部权重以0填充
bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0) # 外部权重以0填充
if DEBUG:
print('rpn: max max_overlap', np.max(max_overlaps))
print('rpn: num_positive', np.sum(labels == 1))
print('rpn: num_negative', np.sum(labels == 0))
_fg_sum += np.sum(labels == 1)
_bg_sum += np.sum(labels == 0)
_count += 1
print('rpn: num_positive avg', _fg_sum / _count)
print('rpn: num_negative avg', _bg_sum / _count)
# labels
labels = labels.reshape((1, height, width, A)) # reshap一下label A = _num_anchors # 10个anchor
rpn_labels = labels
# bbox_targets
bbox_targets = bbox_targets \
.reshape((1, height, width, A * 4)) # reshape
rpn_bbox_targets = bbox_targets
# bbox_inside_weights
bbox_inside_weights = bbox_inside_weights \
.reshape((1, height, width, A * 4))
rpn_bbox_inside_weights = bbox_inside_weights
# bbox_outside_weights
bbox_outside_weights = bbox_outside_weights \
.reshape((1, height, width, A * 4))
rpn_bbox_outside_weights = bbox_outside_weights
if DEBUG:
print("anchor target set")
return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
这里由于代码过于复杂,引用一位前辈的注解,这里前辈的注解很完美,我们只需了解一下返回值rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights,分别是变迁box的gt信息和内外部权值。
最后的话
这里就写这些,大部分取材与一位前辈的博客这里向前辈致以诚挚敬意。