  • poseval-master代码解读

    这部分是PoseTrack数据集的Evaluation Tools中的code:https://github.com/leonid-pishchulin/poseval.git

    Step1 :首先是README.md

      要求预测的结果要按所属视频分开保存(video_pred_1.mat    video_pred_2.mat ...),并且与GT内部的数据形式要相同,下面是最终的json格式:

       "annolist": [
           "image": [
              "name": "images/bonn_5sec/000342_mpii/00000001.jpg"
               "annorect": [
                   "x1": [625],
               "y1": [94],
               "x2": [681],
               "y2": [178],
               "score": [0.9],
               "track_id": [0],
               "annopoints": [
                   "point": [
                           "id": [0],
                       "x": [394],
                       "y": [173],
                       { ... }
            { ... }
           { ... }





    然后通过poseval-master/matlab/mat2json.m 转换成json格式。然后调用poseval-master/py/evaluate.py --args..得到测试结果。

    Step2 : evaluate.py     eval_helper.py     evaluateAP.py 这三个用到的code解读。

      显示利用输入的参数调用eval_helper.load_data_dir(argv) :

    gtFramesAll,prFramesAll = eval_helpers.load_data_dir(argv)

    这里输出的prFramesAll中有三个keys()  score 、 annopoints  、seq_id 依次存贮了所有需要待测图片的结果,同时需要保证len(gt)== len(pr)


    apAll,preAll,recAll = evaluateAP(gtFramesAll,prFramesAll)

      打开evaluateAP.py 代码如下:

    def evaluateAP(gtFramesAll, prFramesAll):
        distThresh = 0.5
        # assign predicted poses to GT poses  
    # scoreAll[nJoints][len(gtFrames)] = np.zeros([0,0],dtypr=np.float32)

    scoresAll, labelsAll, nGTall, _ = eval_helpers.assignGTmulti(gtFramesAll, prFramesAll, distThresh) # compute average precision (AP), precision and recall per part apAll, preAll, recAll = computeMetrics(scoresAll, labelsAll, nGTall) return apAll, preAll, recAll

    scoreAll[ i ][ imgidx ] = np.append( scoreAll[ i ][ imgidx ], s[ i ] )    Prd的第 i 个关键点的score

    labelAll[ i ][ imgidx ] = np.append( labelAll[ i ][ imgidx ], m[ i ] )     如果第i个关键点hasPrd = 1 ,那么 m[ i ] =1 ,否则 =0

    nGTall.shape = nJoints x len(gtFrames)            存的是每张图片中各个关节点的数量

    接下来看computeMetrics 这个函数:

    def computeMetrics(scoresAll, labelsAll, nGTall):
        apAll = np.zeros((nGTall.shape[0] + 1, 1))
        recAll = np.zeros((nGTall.shape[0] + 1, 1))
        preAll = np.zeros((nGTall.shape[0] + 1, 1))
        # iterate over joints
        for j in range(nGTall.shape[0]):
            scores = np.zeros([0, 0], dtype=np.float32)
            labels = np.zeros([0, 0], dtype=np.int8)
            # iterate over images
            for imgidx in range(nGTall.shape[1]):
                scores = np.append(scores, scoresAll[j][imgidx])
                labels = np.append(labels, labelsAll[j][imgidx])
            # compute recall/precision values
            nGT = sum(nGTall[j, :])
            precision, recall, scoresSortedIdxs = eval_helpers.computeRPC(scores, labels, nGT)
            if (len(precision) > 0):
                apAll[j] = eval_helpers.VOCap(recall, precision) * 100
                preAll[j] = precision[len(precision) - 1] * 100
                recAll[j] = recall[len(recall) - 1] * 100
        apAll[nGTall.shape[0]] = apAll[:nGTall.shape[0], 0].mean()
        recAll[nGTall.shape[0]] = recAll[:nGTall.shape[0], 0].mean()
        preAll[nGTall.shape[0]] = preAll[:nGTall.shape[0], 0].mean()
        return apAll, preAll, recAll

    这里涉及到 precision 和 recall . 不了解的可以参考检测和姿态估计的评价标准 http://blog.csdn.net/xiaojiajia007/article/details/78746149

    Precison = TP / (TP +FP )         

    Recall = TP / ( TP + FN )


    最后附上eval_helper.assignGTmulti(gtFrames,prFrames,distThresh) 代码的标注

    def assignGTmulti(gtFrames, prFrames, distThresh):
        assert (len(gtFrames) == len(prFrames))
        nJoints = Joint().count
        # part detection scores
        scoresAll = {}
        # positive / negative labels
        labelsAll = {}
        # number of annotated GT joints per image
        nGTall = np.zeros([nJoints, len(gtFrames)])
        for pidx in range(nJoints):
            scoresAll[pidx] = {}
            labelsAll[pidx] = {}
            for imgidx in range(len(gtFrames)):
                scoresAll[pidx][imgidx] = np.zeros([0, 0], dtype=np.float32)
                labelsAll[pidx][imgidx] = np.zeros([0, 0], dtype=np.int8)
        # GT track IDs
        trackidxGT = []
        # prediction track IDs
        trackidxPr = []
        # number of GT poses 实际的每张图中包含pose的个数
        nGTPeople = np.zeros((len(gtFrames), 1))
        # number of predicted poses 预测的每张图片中有几个人的pose
        nPrPeople = np.zeros((len(gtFrames), 1))
        # container to save info for computing MOT metrics
        motAll = {}
        for imgidx in range(len(gtFrames)):
            # distance between predicted and GT joints
            dist = np.full((len(prFrames[imgidx]["annorect"]), len(gtFrames[imgidx]["annorect"]), nJoints), np.inf)
            # score of the predicted jointlen(prFrames[imgidx]["annorect"]), 
            score = np.full((nJoints), np.nan)
            # body joint prediction exist
            hasPr = np.zeros((len(prFrames[imgidx]["annorect"]), nJoints), dtype=bool)
            # body joint is annotated
            hasGT = np.zeros((len(gtFrames[imgidx]["annorect"]), nJoints), dtype=bool)
            trackidxGT = []
            trackidxPr = []
            idxsPr = []
            for ridxPr in range(len(prFrames[imgidx]["annorect"])):
                if (("annopoints" in prFrames[imgidx]["annorect"][ridxPr].keys()) and
                    ("point" in prFrames[imgidx]["annorect"][ridxPr]["annopoints"][0].keys())):
                    idxsPr += [ridxPr];
            prFrames[imgidx]["annorect"] = [prFrames[imgidx]["annorect"][ridx] for ridx in idxsPr]
            nPrPeople[imgidx, 0] = len(prFrames[imgidx]["annorect"])
            nGTPeople[imgidx, 0] = len(gtFrames[imgidx]["annorect"])
            # iterate over GT poses
            for ridxGT in range(len(gtFrames[imgidx]["annorect"])):
                # GT pose
                rectGT = gtFrames[imgidx]["annorect"][ridxGT]
                if ("track_id" in rectGT.keys()):
                    trackidxGT += [rectGT["track_id"][0]]
                pointsGT = []
                if len(rectGT["annopoints"]) > 0:
                    pointsGT = rectGT["annopoints"][0]["point"]
                # iterate over all possible body joints
                for i in range(nJoints):
                    # GT joint in LSP format
                    ppGT = getPointGTbyID(pointsGT, i)
                    if len(ppGT) > 0:
                        hasGT[ridxGT, i] = True
            # iterate over predicted poses
            for ridxPr in range(len(prFrames[imgidx]["annorect"])):
                # predicted pose
                rectPr = prFrames[imgidx]["annorect"][ridxPr]
                if ("track_id" in rectPr.keys()):
                    trackidxPr += [rectPr["track_id"][0]]
                pointsPr = rectPr["annopoints"][0]["point"]
                for i in range(nJoints):
                    # predicted joint in LSP format
                    ppPr = getPointGTbyID(pointsPr, i)
                    if len(ppPr) > 0:
                        assert("score" in ppPr.keys() and "keypoint score is missing")
                        score[ridxPr, i] = ppPr["score"][0]
                        hasPr[ridxPr, i] = True
            if len(prFrames[imgidx]["annorect"]) and len(gtFrames[imgidx]["annorect"]):
                # predictions and GT are present
                # iterate over GT poses 得到distance   (pred_num x gt_num x nJoints)
                for ridxGT in range(len(gtFrames[imgidx]["annorect"])):
                    # GT pose # 一张图中的某一个pose
                    rectGT = gtFrames[imgidx]["annorect"][ridxGT]
                    # compute reference distance as head size
                    headSize = getHeadSize(rectGT["x1"][0], rectGT["y1"][0],
                                                        rectGT["x2"][0], rectGT["y2"][0])
                    pointsGT = []
                    if len(rectGT["annopoints"]) > 0:
                        pointsGT = rectGT["annopoints"][0]["point"]
                    # iterate over predicted poses
                    for ridxPr in range(len(prFrames[imgidx]["annorect"])):
                        # predicted pose
                        rectPr = prFrames[imgidx]["annorect"][ridxPr]
                        pointsPr = rectPr["annopoints"][0]["point"]
                        # iterate over all possible body joints
                        for i in range(nJoints):
                            # GT joint
                            ppGT = getPointGTbyID(pointsGT, i)
                            # predicted joint
                            ppPr = getPointGTbyID(pointsPr, i)
                            # compute distance between predicted and GT joint locations
                            if hasPr[ridxPr, i] and hasGT[ridxGT, i]:
                                pointGT = [ppGT["x"][0], ppGT["y"][0]]
                                pointPr = [ppPr["x"][0], ppPr["y"][0]]
                                dist[ridxPr, ridxGT, i] = np.linalg.norm(np.subtract(pointGT, pointPr)) / headSize
                dist = np.array(dist)
                hasGT = np.array(hasGT)
                # number of annotated joints
                nGTp = np.sum(hasGT, axis=1)#每个pose分别有多少个点
                match = dist <= distThresh  #dist中距离小于阈值的置1,否则置0
                pck = 1.0 * np.sum(match, axis=2)#pck = pr_num x gt_num
                for i in range(hasPr.shape[0]):
                    for j in range(hasGT.shape[0]):
                        if nGTp[j] > 0:
                            pck[i, j] = pck[i, j] / nGTp[j] #Pred的Pose点个数 / GT 的点个数
                # preserve best GT match only
                idx = np.argmax(pck, axis=1)
                val = np.max(pck, axis=1)
                for ridxPr in range(pck.shape[0]):
                    for ridxGT in range(pck.shape[1]):
                        if (ridxGT != idx[ridxPr]):
                            pck[ridxPr, ridxGT] = 0
                prToGT = np.argmax(pck, axis=0)
                val = np.max(pck, axis=0)
                prToGT[val == 0] = -1
                # info to compute MOT metrics
                mot = {}
                for i in range(nJoints):
                    mot[i] = {}
                for i in range(nJoints):
                    ridxsGT = np.argwhere(hasGT[:,i] == True); ridxsGT = ridxsGT.flatten().tolist()#得到hasGT[:,i] = 1的人ID
                    ridxsPr = np.argwhere(hasPr[:,i] == True); ridxsPr = ridxsPr.flatten().tolist()
                    #mot[i]["trackidxGT"] = [trackidxGT[idx] for idx in ridxsGT]
                    #mot[i]["trackidxPr"] = [trackidxPr[idx] for idx in ridxsPr]
                    mot[i]["ridxsGT"] = np.array(ridxsGT)
                    mot[i]["ridxsPr"] = np.array(ridxsPr)
                    mot[i]["dist"] = np.full((len(ridxsGT),len(ridxsPr)),np.nan)
                    for iPr in range(len(ridxsPr)):
                        for iGT in range(len(ridxsGT)):
                            if (match[ridxsPr[iPr], ridxsGT[iGT], i]):
                                mot[i]["dist"][iGT,iPr] = dist[ridxsPr[iPr], ridxsGT[iGT], i]
                # assign predicted poses to GT poses
                for ridxPr in range(hasPr.shape[0]):
                    if (ridxPr in prToGT):  # pose matches to GT
                        # GT pose that matches the predicted pose
                        ridxGT = np.argwhere(prToGT == ridxPr)
                        assert(ridxGT.size == 1)
                        ridxGT = ridxGT[0,0]
                        s = score[ridxPr, :]
                        m = np.squeeze(match[ridxPr, ridxGT, :])#从数组的形状中删除单维条目,即把shape中为1的维度去掉
                        hp = hasPr[ridxPr, :]
                        for i in range(len(hp)):
                            if (hp[i]):
                                scoresAll[i][imgidx] = np.append(scoresAll[i][imgidx], s[i])
                                labelsAll[i][imgidx] = np.append(labelsAll[i][imgidx], m[i])
                    else:  # no matching to GT
                        s = score[ridxPr, :]
                        m = np.zeros([match.shape[2], 1], dtype=bool)
                        hp = hasPr[ridxPr, :]
                        for i in range(len(hp)):
                            if (hp[i]):
                                scoresAll[i][imgidx] = np.append(scoresAll[i][imgidx], s[i])
                                labelsAll[i][imgidx] = np.append(labelsAll[i][imgidx], m[i])
                if not len(gtFrames[imgidx]["annorect"]):
                    # No GT available. All predictions are false positives
                    for ridxPr in range(hasPr.shape[0]):
                        s = score[ridxPr, :]
                        m = np.zeros([nJoints, 1], dtype=bool)
                        hp = hasPr[ridxPr, :]
                        for i in range(len(hp)):
                            if hp[i]:
                                scoresAll[i][imgidx] = np.append(scoresAll[i][imgidx], s[i])
                                labelsAll[i][imgidx] = np.append(labelsAll[i][imgidx], m[i])
            # save number of GT joints
            for ridxGT in range(hasGT.shape[0]):
                hg = hasGT[ridxGT, :]
                for i in range(len(hg)):
                    nGTall[i, imgidx] += hg[i]
            motAll[imgidx] = mot
        return scoresAll, labelsAll, nGTall, motAll
