import os
import sys
import argparse
import cv2
import math
import time
import numpy as np
import util
from config_reader import config_reader
from scipy.ndimage.filters import gaussian_filter
from model.cmu_model import get_testing_model
# find connection in the specified sequence, center 29 is in the position 15 连接肢的两个part的索引
limbSeq = [[2, 3], [2, 6], [3, 4], [4, 5], [6, 7], [7, 8], [2, 9], [9, 10], \
[10, 11], [2, 12], [12, 13], [13, 14], [2, 1], [1, 15], [15, 17], \
[1, 16], [16, 18], [3, 17], [6, 18]]
# the middle joints heatmap correpondence 肢对应的paf索引
mapIdx = [[31, 32], [39, 40], [33, 34], [35, 36], [41, 42], [43, 44], [19, 20], [21, 22], \
[23, 24], [25, 26], [27, 28], [29, 30], [47, 48], [49, 50], [53, 54], [51, 52], \
[55, 56], [37, 38], [45, 46]]
# visualize
colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0],
[0, 255, 0], \
[0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255],
[85, 0, 255], \
[170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85]]
def process (input_image, params, model_params):
oriImg = cv2.imread(input_image) # B,G,R order
multiplier = [x * model_params['boxsize'] / oriImg.shape[0] for x in params['scale_search']] #计算乘子,四个比例缩放box的大小
heatmap_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 19))
paf_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 38))
for m in range(len(multiplier)):
scale = multiplier[m]
imageToTest = cv2.resize(oriImg, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC) #缩放图像
imageToTest_padded, pad = util.padRightDownCorner(imageToTest, model_params['stride'], #右下角填充灰色,使宽、高像素是8的倍数。
model_params['padValue'])
input_img = np.transpose(np.float32(imageToTest_padded[:,:,:,np.newaxis]), (3,0,1,2)) #(width, height, channels) to (1, width, height, channels)
output_blobs = model.predict(input_img)
# extract outputs, resize, and remove padding
heatmap = np.squeeze(output_blobs[1]) # output 1 is heatmaps
heatmap = cv2.resize(heatmap, (0, 0), fx=model_params['stride'], fy=model_params['stride'],
interpolation=cv2.INTER_CUBIC) #output的宽高都缩小了8倍,这里恢复到与input_img相同。
heatmap = heatmap[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3],
:] #remove padding
heatmap = cv2.resize(heatmap, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv2.INTER_CUBIC) #resize到oriImg
paf = np.squeeze(output_blobs[0]) # output 0 is PAFs
paf = cv2.resize(paf, (0, 0), fx=model_params['stride'], fy=model_params['stride'],
interpolation=cv2.INTER_CUBIC)
paf = paf[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :]
paf = cv2.resize(paf, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv2.INTER_CUBIC)
heatmap_avg = heatmap_avg + heatmap / len(multiplier)
paf_avg = paf_avg + paf / len(multiplier) #四个scale的结果求平均
all_peaks = []
peak_counter = 0
for part in range(18):
map_ori = heatmap_avg[:, :, part]
map = gaussian_filter(map_ori, sigma=3) #高斯去噪
#找到峰值(当前像素值大小比上下左右的都大)
map_left = np.zeros(map.shape)
map_left[1:, :] = map[:-1, :]
map_right = np.zeros(map.shape)
map_right[:-1, :] = map[1:, :]
map_up = np.zeros(map.shape)
map_up[:, 1:] = map[:, :-1]
map_down = np.zeros(map.shape)
map_down[:, :-1] = map[:, 1:]
peaks_binary = np.logical_and.reduce(
(map >= map_left, map >= map_right, map >= map_up, map >= map_down, map > params['thre1']))#这里输出:像素值都是T or F,峰值T,图像大小和原图一样
peaks = list(zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0])) # 输出T的坐标,即是峰值的一系列坐标 [(h1, w1), (h2, w2), (h3, w3), (h4, w4)],此处坐标与原图是反转的(x,y反转了)??
peaks_with_score = [x + (map_ori[x[1], x[0]],) for x in peaks] #输出坐标即score,原图像素值作为score。 [(h1, w1, s1), (h2, w2, s2), (h3, w3 ,s3), (h4, w4, s4)]
id = range(peak_counter, peak_counter + len(peaks))
peaks_with_score_and_id = [peaks_with_score[i] + (id[i],) for i in range(len(id))]
all_peaks.append(peaks_with_score_and_id)#所有part的峰值全存入
#all_peaks=[ [((h0, w0, s0,0),(h1, w1, s1,1)....]\ 第一个part的所有值
# [((hi, wi, si,i),(hi+1, wi+1, si+1,i+1)....]\
# .....
# ]
peak_counter += len(peaks)
connection_all = []
special_k = []
mid_num = 10
for k in range(len(mapIdx)):
score_mid = paf_avg[:, :, [x - 19 for x in mapIdx[k]]] #取两行,一个类型的paf
candA = all_peaks[limbSeq[k][0] - 1]
candB = all_peaks[limbSeq[k][1] - 1]#paf对应的两个part
nA = len(candA)
nB = len(candB)
indexA, indexB = limbSeq[k]
if (nA != 0 and nB != 0):
connection_candidate = []
for i in range(nA):
for j in range(nB):
vec = np.subtract(candB[j][:2], candA[i][:2])
norm = math.sqrt(vec[0] * vec[0] + vec[1] * vec[1])
# failure case when 2 body partss overlaps
if norm == 0:
continue
vec = np.divide(vec, norm) #A指向B的单位向量
startend = list(zip(np.linspace(candA[i][0], candB[j][0], num=mid_num), \
np.linspace(candA[i][1], candB[j][1], num=mid_num))) #AB间均匀采样10个位置
vec_x = np.array(
[score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 0] \
for I in range(len(startend))])
vec_y = np.array(
[score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 1] \
for I in range(len(startend))]) #startend中采样像素的paf值。 上面坐标反转了,所以这里是先1再0.
score_midpts = np.multiply(vec_x, vec[0]) + np.multiply(vec_y, vec[1])#十个位置的score
score_with_dist_prior = sum(score_midpts) / len(score_midpts) + min( #AB相连的score
0.5 * oriImg.shape[0] / norm - 1, 0)
criterion1 = len(np.nonzero(score_midpts > params['thre2'])[0]) > 0.8 * len(
score_midpts) #至少有9个位置的score>0.05
criterion2 = score_with_dist_prior > 0 #
if criterion1 and criterion2:
connection_candidate.append([i, j, score_with_dist_prior,
score_with_dist_prior + candA[i][2] + candB[j][2]]) #存入AB相连信息,第四个值为paf的score+两个part的score
connection_candidate = sorted(connection_candidate, key=lambda x: x[2], reverse=True) #按照paf的score降序排序,存的是该肢的所有
#connection_candidate=[(A_index, B_index, score, score+A_score+B_score),...
# ]
connection = np.zeros((0, 5))
for c in range(len(connection_candidate)):
i, j, s = connection_candidate[c][0:3]
if (i not in connection[:, 3] and j not in connection[:, 4]):
connection = np.vstack([connection, [candA[i][3], candB[j][3], s, i, j]])
if (len(connection) >= min(nA, nB)):
break
connection_all.append(connection)
#connection_all=[
# [[A_id, B_id, score, A_index, B_index],....] 一个肢体的所有,id是所有part(all_peaks)中的id,index是某一类part中的索引
# .....
# ]
else:
special_k.append(k) #没有某个肢体 存入special
connection_all.append([])
# last number in each row is the total parts number of that person
# the second last number in each row is the score of the overall configuration
subset = -1 * np.ones((0, 20))
candidate = np.array([item for sublist in all_peaks for item in sublist]) #全部part,和前面all_peaks格式不同
for k in range(len(mapIdx)):
if k not in special_k:
partAs = connection_all[k][:, 0] #A的id
partBs = connection_all[k][:, 1] #B的id
indexA, indexB = np.array(limbSeq[k]) - 1 #A B对应的part索引 means a b是哪两个part
for i in range(len(connection_all[k])): # = 1:size(temp,1)
found = 0
subset_idx = [-1, -1]
for j in range(len(subset)): # 1:size(subset,1):
if subset[j][indexA] == partAs[i] or subset[j][indexB] == partBs[i]:
subset_idx[found] = j
found += 1
if found == 1:
j = subset_idx[0]
if (subset[j][indexB] != partBs[i]): #对于不同肢partA可能相同,但是partB都是不相同的,如果相同,就是已经包含了两个part的
subset[j][indexB] = partBs[i]
subset[j][-1] += 1
subset[j][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]
elif found == 2: # if found 2 and disjoint, merge them
j1, j2 = subset_idx
membership = ((subset[j1] >= 0).astype(int) + (subset[j2] >= 0).astype(int))[:-2]
if len(np.nonzero(membership == 2)[0]) == 0: # merge 两个subset分别包含一个part,两者是分离的,合并
subset[j1][:-2] += (subset[j2][:-2] + 1)
subset[j1][-2:] += subset[j2][-2:]
subset[j1][-2] += connection_all[k][i][2] #加上肢体的score,part的score之前已经包含了
subset = np.delete(subset, j2, 0)
else: # as like found == 1 重合了,有一个part一样,???
subset[j1][indexB] = partBs[i]
subset[j1][-1] += 1
subset[j1][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]
# if find no partA in the subset, create a new subset
elif not found and k < 17:
row = -1 * np.ones(20)
row[indexA] = partAs[i]
row[indexB] = partBs[i]
row[-1] = 2
row[-2] = sum(candidate[connection_all[k][i, :2].astype(int), 2]) + \
connection_all[k][i][2]
subset = np.vstack([subset, row])
# delete some rows of subset which has few parts occur
deleteIdx = [];
for i in range(len(subset)): #part少于4 或者 分数过低的 人删除
if subset[i][-1] < 4 or subset[i][-2] / subset[i][-1] < 0.4:
deleteIdx.append(i)
subset = np.delete(subset, deleteIdx, axis=0)
canvas = cv2.imread(input_image) # B,G,R order
for i in range(18):
for j in range(len(all_peaks[i])):
cv2.circle(canvas, all_peaks[i][j][0:2], 4, colors[i], thickness=-1)
stickwidth = 4
for i in range(17):
for n in range(len(subset)):
index = subset[n][np.array(limbSeq[i]) - 1]
if -1 in index:
continue
cur_canvas = canvas.copy()
Y = candidate[index.astype(int), 0]
X = candidate[index.astype(int), 1]
mX = np.mean(X)
mY = np.mean(Y)
length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5
angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1]))
polygon = cv2.ellipse2Poly((int(mY), int(mX)), (int(length / 2), stickwidth), int(angle), 0,
360, 1)
cv2.fillConvexPoly(cur_canvas, polygon, colors[i])
canvas = cv2.addWeighted(canvas, 0.4, cur_canvas, 0.6, 0)
return canvas
def test():
print(model)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--image', type=str, required=True, help='input image')
parser.add_argument('--output', type=str, default='result.png', help='output image')
parser.add_argument('--model', type=str, default='model/keras/model.h5', help='path to the weights file')
args = parser.parse_args()
input_image = args.image
output = args.output
keras_weights_file = args.model
#test()
tic = time.time()
print('start processing...')
# load model
# authors of original model don't use
# vgg normalization (subtracting mean) on input images
model = get_testing_model()
model.load_weights(keras_weights_file)
#load config
params, model_params = config_reader()
# generate image with body parts
canvas = process(input_image, params, model_params)
toc = time.time()
print ('processing time is %.5f' % (toc - tic))
cv2.imwrite(output, canvas)
cv2.destroyAllWindows()
cv2.destroyAllWindows()