Realtime_Multi-Person_Pose_Estimation demo.ipynb代码注释

该部分可以帮助很好的理解论文的实现部分

源码地址：https://github.com/ZheC/Realtime_Multi-Person_Pose_Estimation
论文地址：https://arxiv.org/abs/1611.08050
# -*- coding:utf-8 -*-
import sys
from configobj import ConfigObj
caffe_root='/home/lijing/Tools/caffe-GPU/'
#os.chdir(caffe_root)
#sys.path.insert(0,caffe_root+'python')
sys.path.append(caffe_root+'python')
#from . import config_reader
#sys.path.append("..")
import cv2 as cv
import numpy as np
import scipy
import PIL.Image
import math
import caffe
import time
from config_reader import config_reader
#from test import config_reader
import util
import copy
import matplotlib
#%matplotlib inline
import pylab as plt

test_image = '/home/lijing/workplace/Pose/Realtime_Multi-Person_Pose_Estimation/testing/sample_image/ski.jpg'
#test_image = '../sample_image/upper.jpg'
#test_image = '../sample_image/upper2.jpg'
oriImg = cv.imread(test_image) # B,G,R order
f = plt.imshow(oriImg[:,:,[2,1,0]]) # reorder it before displaying  #cv2.imread()返回bgr通道，plt.show()需要rgb
param, model = config_reader()#读取设备信息
#368*[0.5, 1, 1.5, 2]/oriImg_height
multiplier = [x * model['boxsize'] / oriImg.shape[0] for x in param['scale_search']]
if param['use_gpu']:
    caffe.set_mode_gpu()
    caffe.set_device(param['GPUdeviceNumber']) # set to your device!
else:
    caffe.set_mode_cpu()
net = caffe.Net(model['deployFile'], model['caffemodel'], caffe.TEST)
heatmap_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 19))
paf_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 38))
# first figure shows padded images
f, axarr = plt.subplots(1, len(multiplier))
f.set_size_inches((20, 5))
# second figure shows heatmaps
f2, axarr2 = plt.subplots(1, len(multiplier))
f2.set_size_inches((20, 5))
# third figure shows PAFs
f3, axarr3 = plt.subplots(2, len(multiplier))
f3.set_size_inches((20, 10))

for m in range(len(multiplier)):
    scale = multiplier[m]
    imageToTest = cv.resize(oriImg, (0, 0), fx=scale, fy=scale, interpolation=cv.INTER_CUBIC)
    #stride=8,value=128
    imageToTest_padded, pad = util.padRightDownCorner(imageToTest, model['stride'], model['padValue'])
    #print imageToTest_padded.shape

    axarr[m].imshow(imageToTest_padded[:, :, [2, 1, 0]])
    axarr[m].set_title('Input image: scale %d' % m)
    #cv2默认读取[Height,Width,Chanel]
    #[batch_size,chanel,height,width]
    net.blobs['data'].reshape(*(1, 3, imageToTest_padded.shape[0], imageToTest_padded.shape[1]))
    # net.forward() # dry run
    #先归一化，img/256-0.5
    #transpose就是将imageToTest_padded[Height,Width,Chanel]格式放入[batch_size,chanel,height,width]格式
    net.blobs['data'].data[...] = np.transpose(np.float32(imageToTest_padded[:, :, :, np.newaxis]),
                                               (3, 2, 0, 1)) / 256 - 0.5;
    start_time = time.time()
    output_blobs = net.forward()
    print('At scale %d, The CNN took %.2f ms.' % (m, 1000 * (time.time() - start_time)))

    # extract outputs, resize, and remove padding
    #np.squeeze就是取出单维条目，比如shape[1,3,1]->[3,]
    #output_blobs.keys()->['Mconv7_stage6_L1', 'Mconv7_stage6_L2'],为网络的两个输出key
    #print 'out_key:', output_blobs.keys()
    heatmap = np.transpose(np.squeeze(net.blobs[output_blobs.keys()[1]].data), (1, 2, 0))  # output 1 is heatmaps
    #print '000',heatmap.shape
    #shape:4个scale依次[23,35,19],[46,69,19],[69,104,19],[92,138,19]
    #print 'out_shape[1]:',net.blobs[output_blobs.keys()[1]].data.shape
    #这个过程就是要将生成的heatmap-feature与原图相对应，方法（heatmap->size*scale,which scale is n_pooling->
    # 减去原来对图片的pad操作尺寸，只有在右下添加了->resize到原图大小）
    heatmap = cv.resize(heatmap, (0, 0), fx=model['stride'], fy=model['stride'], interpolation=cv.INTER_CUBIC)
    #print '111',heatmap.shape
    #print 'model[stride]',model['stride']
    #print 'imageToTest_padded.shape',imageToTest_padded.shape
    #print 'pad',pad
    heatmap = heatmap[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :]
    #print '222',heatmap.shape
    heatmap = cv.resize(heatmap, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv.INTER_CUBIC)
    #print '333',heatmap.shape

    paf = np.transpose(np.squeeze(net.blobs[output_blobs.keys()[0]].data), (1, 2, 0))  # output 0 is PAFs
    # shape:4个scale依次[23,35,38],[46,69,38],[69,104,38],[92,138,38]
    #print "444",paf.shape
    paf = cv.resize(paf, (0, 0), fx=model['stride'], fy=model['stride'], interpolation=cv.INTER_CUBIC)
    paf = paf[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :]
    paf = cv.resize(paf, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv.INTER_CUBIC)

    # visualization
    #先输出一个原图，再覆盖上一个热力图，透明度位0.5
    axarr2[m].imshow(oriImg[:, :, [2, 1, 0]])
    #序号为3的是右腕
    ax2 = axarr2[m].imshow(heatmap[:, :, 3], alpha=.5)  # right wrist
    axarr2[m].set_title('Heatmaps (Rwri): scale %d' % m)
    #编号16为右肘
    #因为PAFs记录的是两个关节点之间的信息，所以较heatmap输出多一倍
    axarr3.flat[m].imshow(oriImg[:, :, [2, 1, 0]])
    #print '555',axarr3.shape
    #显示相邻两个关节right elbow与right wrist的关联图，透明度0.5，4个scale分别按列展示
    ax3x = axarr3.flat[m].imshow(paf[:, :, 16], alpha=.5)  # right elbow
    axarr3.flat[m].set_title('PAFs (x comp. of Rwri to Relb): scale %d' % m)
    axarr3.flat[len(multiplier) + m].imshow(oriImg[:, :, [2, 1, 0]])
    ax3y = axarr3.flat[len(multiplier) + m].imshow(paf[:, :, 17], alpha=.5)  # right wrist
    axarr3.flat[len(multiplier) + m].set_title('PAFs (y comp. of Relb to Rwri): scale %d' % m)
    #heatmap_avg与paf_avg分别为heatmap与paf在4个scale上的均值
    heatmap_avg = heatmap_avg + heatmap / len(multiplier)
    paf_avg = paf_avg + paf / len(multiplier)
start_time_stage2=time.time()
#heatmap-plot界面设计
f2.subplots_adjust(right=0.93)
cbar_ax = f2.add_axes([0.95, 0.15, 0.01, 0.7])
_ = f2.colorbar(ax2, cax=cbar_ax)
#paf-plot界面设计
f3.subplots_adjust(right=0.93)
cbar_axx = f3.add_axes([0.95, 0.57, 0.01, 0.3])
_ = f3.colorbar(ax3x, cax=cbar_axx)
cbar_axy = f3.add_axes([0.95, 0.15, 0.01, 0.3])
_ = f3.colorbar(ax3y, cax=cbar_axy)

plt.imshow(oriImg[:,:,[2,1,0]])
plt.imshow(heatmap_avg[:,:,2], alpha=.5)
fig = matplotlib.pyplot.gcf()
cax = matplotlib.pyplot.gca()
fig.set_size_inches(20, 20)
fig.subplots_adjust(right=0.93)
cbar_ax = fig.add_axes([0.95, 0.15, 0.01, 0.7])
_ = fig.colorbar(ax2, cax=cbar_ax)


from numpy import ma
U = paf_avg[:,:,16] * -1
V = paf_avg[:,:,17]
#np.meshgrid：
# x = np.arange(-2,2)
# y = np.arange(0,3)#生成一位数组，其实也就是向量
#
# x
# Out[31]: array([-2, -1,  0,  1])
#
# y
# Out[32]: array([0, 1, 2])
#
# z,s = np.meshgrid(x,y)#将两个一维数组变为二维矩阵
#
# z
# Out[36]:
# array([[-2, -1,  0,  1],
#        [-2, -1,  0,  1],
#        [-2, -1,  0,  1]])
#
# s
# Out[37]:
# array([[0, 0, 0, 0],
#        [1, 1, 1, 1],
#        [2, 2, 2, 2]])
X, Y = np.meshgrid(np.arange(U.shape[1]), np.arange(U.shape[0]))
M = np.zeros(U.shape, dtype='bool')
#设置mask，对于不满足U**2 + V**2 < 0.5 * 0.5的区域给擦除
M[U**2 + V**2 < 0.5 * 0.5] = True
U = ma.masked_array(U, mask=M)
V = ma.masked_array(V, mask=M)

# 1
plt.figure()
plt.imshow(oriImg[:,:,[2,1,0]], alpha = .5)
s = 5
#绘制场量图，XY为常量的起点坐标，每隔s取一个
#UV为场量的方向定义
Q = plt.quiver(X[::s,::s], Y[::s,::s], U[::s,::s], V[::s,::s],
               scale=50, headaxislength=4, alpha=.5, width=0.001, color='r')

fig = matplotlib.pyplot.gcf()
fig.set_size_inches(20, 20)
import scipy

#print heatmap_avg.shape

# plt.imshow(heatmap_avg[:,:,2])
from scipy.ndimage.filters import gaussian_filter

all_peaks = []
peak_counter = 0
#print "heatmap_avr_shape:",heatmap_avg.shape
for part in range(19 - 1):
    x_list = []
    y_list = []
    map_ori = heatmap_avg[:, :, part]
    #  图像的高斯模糊是非常经典的图像卷积例子。本质上，图像模糊就是
    # 将（灰度）图像I 和一个高斯核进行卷积操作：，其中是第二个参数标准差为σ的二维高斯核。
    # 高斯模糊通常是其他图像处理操作的一部分，比如图像插值操作、兴趣点计算以及很多其他应用
    map = gaussian_filter(map_ori, sigma=3)
    #对高斯滤波后的map进行像素点左右上下各移动1位置的映射
    map_left = np.zeros(map.shape)
    map_left[1:, :] = map[:-1, :]
    map_right = np.zeros(map.shape)
    map_right[:-1, :] = map[1:, :]
    map_up = np.zeros(map.shape)
    map_up[:, 1:] = map[:, :-1]
    map_down = np.zeros(map.shape)
    map_down[:, :-1] = map[:, 1:]
    #并行实现满足map中大于上下左右值的点，可以看做是极点，同时大于heatmap阈值.最后peaks_binary为map大小的true与false组成
    peaks_binary = np.logical_and.reduce(
        (map >= map_left, map >= map_right, map >= map_up, map >= map_down, map > param['thre1']))
    #np.nonzero(peaks_binary)[1]记录peaks_binary非零位置所在的列，np.nonzero(peaks_binary)[0]记录的是行。zip后为非零的坐标
    peaks = zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0])  # note reverse（注意：这个是反序）
    #带上得分的peak，格式：[(x_1,y_1,score_1)....(x_n,y_n,score_n)]
    peaks_with_score = [x + (map_ori[x[1], x[0]],) for x in peaks]
    #记录一下该part（heatmap）里对应的极点的index。
    id = range(peak_counter, peak_counter + len(peaks))
    #带上得分和id的peak，格式：[(x_1,y_1,score_1，id1)....(x_n,y_n,score_n，id_n)]
    peaks_with_score_and_id = [peaks_with_score[i] + (id[i],) for i in range(len(id))]
    #最后都存在all_peaks中,格式[[(x_1,y_1,score_1，id1)....(x_n,y_n,score_n，id_n)]，[(x_1,y_1,score_1，id1)....(x_n,y_n,score_n，id_n)]...[(x_1,y_1,score_1，id1)....(x_n,y_n,score_n，id_n)]]
    all_peaks.append(peaks_with_score_and_id)
    #peak_counter为全局变量，所以id是按照heartmap的次序依次往后排的
    peak_counter += len(peaks)
#all_peak_len:18
#print"all_peak_len",len(all_peaks)
# find connection in the specified sequence, center 29 is in the position 15
#limbseq为19对相邻骨骼点，mapIdx为这对骨骼点2层关联信息paf
limbSeq = [[2,3], [2,6], [3,4], [4,5], [6,7], [7,8], [2,9], [9,10], \
           [10,11], [2,12], [12,13], [13,14], [2,1], [1,15], [15,17], \
           [1,16], [16,18], [3,17], [6,18]]
# the middle joints heatmap correpondence
mapIdx = [[31,32], [39,40], [33,34], [35,36], [41,42], [43,44], [19,20], [21,22], \
          [23,24], [25,26], [27,28], [29,30], [47,48], [49,50], [53,54], [51,52], \
          [55,56], [37,38], [45,46]]
connection_all = []
special_k = []
mid_num = 10
#len(mapIdx)等于19
for k in range(len(mapIdx)):
    #拿出来paf有关系的两层
    score_mid = paf_avg[:, :, [x - 19 for x in mapIdx[k]]]
    #print "777",score_mid.shape
    #all_peaks里存的全是heatmap中关键点的信息，candA与candB为管理某两个关节点的heatmap的极点信息（peak）
    candA = all_peaks[limbSeq[k][0] - 1]
    candB = all_peaks[limbSeq[k][1] - 1]
    #print "888",len(candA)
    nA = len(candA)#indexA关节点检测到的个数
    nB = len(candB)#indexB关节点检测到的个数
    indexA, indexB = limbSeq[k]
    if (nA != 0 and nB != 0):
        connection_candidate = []
        for i in range(nA):
            for j in range(nB):
                #拿相邻关节极点的坐标出来，两两成对形成向量
                vec = np.subtract(candB[j][:2], candA[i][:2])
                #计算一下向量距离
                norm = math.sqrt(vec[0] * vec[0] + vec[1] * vec[1])
                #相当于计算一下单位向量
                vec = np.divide(vec, norm)
                #将每个关节点A,分别与所有点B连线形成10等分点，startend记录的是10个10等分点的坐标
                #目前工作：得到了相邻关节点所有极点的两两成对单位向量，两两成对的10等分点
                startend = zip(np.linspace(candA[i][0], candB[j][0], num=mid_num), \
                               np.linspace(candA[i][1], candB[j][1], num=mid_num))
                #score_mid为两层paf，这两行代码就是将10分等分点对应的两张paf值给存在vec_x，vec_y中
                vec_x = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 0] \
                                  for I in range(len(startend))])
                vec_y = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 1] \
                                  for I in range(len(startend))])
                #10等分点的paf值乘上单位向量，组成10个向量，每个向量的坐标和（x+y）为当个向量的得分，score_midpts：[s1,s2,..,s10]
                score_midpts = np.multiply(vec_x, vec[0]) + np.multiply(vec_y, vec[1])
                #print '888',score_midpts
                #sum(score_midpts) / len(score_midpts)为平均得分
                #min(0.5 * oriImg.shape[0] / norm - 1, 0)为如果向量长度小于0.5倍的Width则有加分，反之没有
                score_with_dist_prior = sum(score_midpts) / len(score_midpts) + min(0.5 * oriImg.shape[0] / norm - 1, 0)
                #标准1：score_midpts：[s1,s2,..,s10]这10个值中有8个大于阈值param['thre2'])[0]:0.05
                criterion1 = len(np.nonzero(score_midpts > param['thre2'])[0]) > 0.8 * len(score_midpts)
                #print "hahahah",param['thre2']
                #标准2：关联综合得分大于0
                criterion2 = score_with_dist_prior > 0
                #如果满足标准1与标准2，connection_candidate中记录下[indexA关节第i个节点，indexB关节第j个节点，关联综合得分。关联综合得分+两个点的heatmap得分]
                if criterion1 and criterion2:
                    connection_candidate.append(
                        [i, j, score_with_dist_prior, score_with_dist_prior + candA[i][2] + candB[j][2]])
        #对关联综合得分进行重新排序，降序
        connection_candidate = sorted(connection_candidate, key=lambda x: x[2], reverse=True)
        #定义connection 为0行5列数组
        connection = np.zeros((0, 5))
        #对这两个关节点的所有链接候选对
        for c in range(len(connection_candidate)):
            #i，j，s分别为第一个关节点的第i个节点，第二个关节点的第j个节点，链接得分
            i, j, s = connection_candidate[c][0:3]
            #因为是按照链接得分从高到低排序，如果之前有了i和j就不添加
            if (i not in connection[:, 3] and j not in connection[:, 4]):
                #candA[i][3], candB[j][3]都是极点的index，这个index都是唯一的，详见217-220
                #最后connection格式为n*[indexA,indexB,链接得分，关节A的第i个节点，关节B的第j个节点]
                connection = np.vstack([connection, [candA[i][3], candB[j][3], s, i, j]])
                #如果存的总长大于nA或者nB的最小值，退出循环
                if (len(connection) >= min(nA, nB)):
                    break
        #记录下所有关节点对的链接信息
        connection_all.append(connection)
    else:
        #如果不满足(nA != 0 and nB != 0)，则记录这对关联关节点的index：K
        special_k.append(k)
        connection_all.append([])
# last number in each row is the total parts number of that person
# the second last number in each row is the score of the overall configuration
subset = -1 * np.ones((0, 20))
#candidate：array（(x,y,score,唯一id)，(x,y,score,唯一id)，(x,y,score,唯一id)..），19类关节点全部提出来了
candidate = np.array([item for sublist in all_peaks for item in sublist])
#print candidate
#candidate=np.array([[1,2,3],[4,5,6],[7,8,9]])

#len(mapIdx)等于19
for k in range(len(mapIdx)):
    #排除不满足(nA != 0 and nB != 0)的关节点对
    if k not in special_k:
        #对于当前关节点对，记录前节点A（是唯一的index）
        partAs = connection_all[k][:, 0]
        # 对于当前关节点对，记录后节点B（是唯一的index）
        partBs = connection_all[k][:, 1]
        #记录关键点对
        indexA, indexB = np.array(limbSeq[k]) - 1
        #根据paf对取出其中一个connection信息
        for i in range(len(connection_all[k])):# = 1:size(temp,1)
            #print "k:",k
            found = 0
            subset_idx = [-1, -1]
            #对于这对关节点进行循环（小循环）
            for j in range(len(subset)):  # 1:size(subset,1):
                #print "000000000000"
                #如果找到了之前在subset添加的链接信息，前节点的位置又找到了这个前节点唯一ID，或者后节点的位置又找到了这个后节点唯一ID
                if subset[j][indexA] == partAs[i] or subset[j][indexB] == partBs[i]:
                    #记录下找到的行数
                    subset_idx[found] = j
                    found += 1
            #如果只找到了一个
            if found == 1:
                #print '22222222'
                #取出行
                j = subset_idx[0]
                #这种情况说明上节点相同，将新添加的后节点唯一index放在该subset里的对应索引处
                if (subset[j][indexB] != partBs[i]):
                    subset[j][indexB] = partBs[i]
                    #2->3，3->4...
                    #print subset[j]
                    subset[j][-1] += 1
                    #之前链接得分（两个heatmap得分加链接得分）再加上连载链接的得分
                    subset[j][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]
            #如果在之前建立的候选链接中找到了分别又找到了当前候选链接的关节点AB的唯一ID
            elif found == 2:  # if found 2 and disjoint, merge them
                #print '3333333333'
                #取出找到了行
                j1, j2 = subset_idx
                #print "found = 2"
                #print subset[j1]
                #查看一下两个subset在某个位置上（某个关节）有已经有了候选点
                membership = ((subset[j1] >= 0).astype(int) + (subset[j2] >= 0).astype(int))[:-2]
                #如果没有，则合并两个subset
                if len(np.nonzero(membership == 2)[0]) == 0:  # merge
                    subset[j1][:-2] += (subset[j2][:-2] + 1)
                    subset[j1][-2:] += subset[j2][-2:]
                    subset[j1][-2] += connection_all[k][i][2]
                    subset = np.delete(subset, j2, 0)
                #如果有则重复found1过程，默认选择对j1操作
                else:  # as like found == 1
                    subset[j1][indexB] = partBs[i]
                    subset[j1][-1] += 1
                    subset[j1][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]

            # if find no partA in the subset, create a new subset
            elif not found and k < 17:
                #print "111111111"
                row = -1 * np.ones(20)
                row[indexA] = partAs[i]
                row[indexB] = partBs[i]
                row[-1] = 2
                #print candidate[connection_all[k][i, :2].astype(int), 2]
                #candidate[connection_all[k][i, :2].astype(int), 2]就是提出两个唯一id点的score
                #row[-2]里存的就是两个唯一id点的heatmap极值得分+链接得分
                row[-2] = sum(candidate[connection_all[k][i, :2].astype(int), 2]) + connection_all[k][i][2]
                #最后row就是生成[-1,-1,-1,]
                subset = np.vstack([subset, row])
# delete some rows of subset which has few parts occur
#对于subset点个数小于4或者综合得分小于0.4的予以删除
deleteIdx = [];
for i in range(len(subset)):
    if subset[i][-1] < 4 or subset[i][-2]/subset[i][-1] < 0.4:
        deleteIdx.append(i)
subset = np.delete(subset, deleteIdx, axis=0)
print('point match time : %.2f ms' % (1000 * (time.time() - start_time_stage2)))
#可视化过程
# visualize
colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], \
          [0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], \
          [170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85]]
cmap = matplotlib.cm.get_cmap('hsv')

canvas = cv.imread(test_image) # B,G,R order

for i in range(18):
    rgba = np.array(cmap(1 - i/18. - 1./36))
    rgba[0:3] *= 255
    for j in range(len(all_peaks[i])):
        cv.circle(canvas, all_peaks[i][j][0:2], 4, colors[i], thickness=-1)

to_plot = cv.addWeighted(oriImg, 0.3, canvas, 0.7, 0)
plt.imshow(to_plot[:,:,[2,1,0]])
fig = matplotlib.pyplot.gcf()
fig.set_size_inches(12, 12)
# visualize 2
stickwidth = 4

for i in range(17):
    for n in range(len(subset)):
        index = subset[n][np.array(limbSeq[i]) - 1]
        if -1 in index:
            continue
        cur_canvas = canvas.copy()
        Y = candidate[index.astype(int), 0]
        X = candidate[index.astype(int), 1]
        mX = np.mean(X)
        mY = np.mean(Y)
        length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5
        angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1]))
        polygon = cv.ellipse2Poly((int(mY), int(mX)), (int(length / 2), stickwidth), int(angle), 0, 360, 1)
        cv.fillConvexPoly(cur_canvas, polygon, colors[i])
        canvas = cv.addWeighted(canvas, 0.4, cur_canvas, 0.6, 0)

plt.imshow(canvas[:, :, [2, 1, 0]])
fig = matplotlib.pyplot.gcf()
fig.set_size_inches(12, 12)
#plt.show()
Realtime_Multi-Person_Pose_Estimation demo.ipynb代码注释

该部分可以帮助很好的理解论文的实现部分

猜你喜欢