zoukankan      html  css  js  c++  java
  • 多分类与多标签分类评价指标

    单标签评价指标

    import matplotlib.pyplot as plt
    import numpy as np
    
    def F1(P,R):
        return 2*P*R/(P+R)
    
    def ROC(pos,neg):
        TPR = []
        FPR = []
        for i in np.arange(0.05,1,0.05):
            if i == 0.8:
                print(1)
            TP = 0
            FP = 0
            TN = 0
            FN = 0
            for item in pos:
                if item >=i: #预测为正
                    TP +=1
                else: #预测为负
                    FN +=1
            for item in neg:
                if item <i: #预测为负
                    TN +=1
                else: #预测为正
                    FP +=1
            TPR.append(TP/(TP+FN))
            FPR.append(FP/(FP+TN))
        plt.xlabel("FPR", fontsize=12)
        plt.ylabel("TPR", fontsize=12)
        plt.plot(FPR,TPR)
        plt.show()
    
    def AUC(pos,neg):
        pos = sorted(pos)
        neg = sorted(neg)
        count = 0
        for pos1 in pos:
            for neg1 in neg:
                if pos1>neg1:
                    count += 1
                elif pos1==neg1:
                    count+=0.5
                else:
                    break
        return count/(len(pos) *len(neg))
    View Code

    多标签评价指标

    #多标签评价指标
    import numpy as np
    
    #汉明损失
    # 错误率:sum(sum(y != yhat)) / (N*D)
    # 1.每个样本的错误率 2.对所有样本错误率平均
    #from sklearn.metrics import hamming_loss
    def HammingLoss(label, predict):
        # label: (N, D)
        D = len(label[0])
        N = len(label)
        tmp = 0
        for i in range(N):
            tmp = tmp + np.sum(label[i] ^ predict[i])
        hamming_loss = tmp / N / D
        return hamming_loss
    
    #覆盖率
    #覆盖所有true标签的最大序号(降序)的均值
    #1.降序序号 2.True标签对应序号 3.求序号最大值
    def Coverage(label, logit):
        N = len(label)
        label_index = []
        for i in range(N):
            index = np.where(label[i] == 1)[0]
            label_index.append(index)
        cover = 0
        for i in range(N):
            # 从大到小排序
            index = np.argsort(-logit[i]).tolist()
            tmp = 0
            for item in label_index[i]:
                tmp = max(tmp, index.index(item) + 1)
            cover += tmp
        coverage = cover * 1.0 / N
        return coverage
    
    # 1-错误率
    # 概率最大标签错误率
    def One_error(label, logit):
        N = len(label)
        for i in range(N):
            if max(label[i]) == 0:
                print("该条数据哪一类都不是")
        label_index = []
        for i in range(N):
            index = np.where(label[i] == 1)[0]
            label_index.append(index)
        OneError = 0
        for i in range(N):
            if np.argmax(logit[i]) not in label_index[i]:
                OneError += 1
        OneError = OneError * 1.0 / N
        return OneError
    
    
    #平均精确率
    #1.计算每个样本所有标签实际排名与预测排名比值的均值 2.对整个数据集合平均
    def Average_Precision(label, logit):
        N = len(label)
        for i in range(N):
            if max(label[i]) == 0 or min(label[i]) == 1:
                print("该条数据哪一类都不是或者全都是")
        precision = 0
        for i in range(N):
            index = np.where(label[i] == 1)[0]
            score = logit[i][index]
            score = sorted(score)
            score_all = sorted(logit[i])
            precision_tmp = 0
            for item in score:
                tmp1 = score.index(item)
                tmp1 = len(score) - tmp1
                tmp2 = score_all.index(item)
                tmp2 = len(score_all) - tmp2
                precision_tmp += tmp1 / tmp2
            precision += precision_tmp / len(score)
        Average_Precision = precision / N
        return Average_Precision
    
    #排序损失#
    # 1.计算每个样本False标签预测值大于True标签预测值的比重(偏离程度) 2.对整个样本集平均
    def RankingLoss(label, logit):
        N = len(label)
        for i in range(N):
            if max(label[i]) == 0 or min(label[i]) == 1:
                print("该条数据哪一类都不是或者全都是")
        rankloss = 0
        for i in range(N):
            index1 = np.where(label[i] == 1)[0]
            index0 = np.where(label[i] == 0)[0]
            tmp = 0
            for j in index1:
                for k in index0:
                    if logit[i][j] <= logit[i][k]:
                        tmp += 1
            rankloss += tmp * 1.0 / ((len(index1)) * len(index0))
        rankloss = rankloss / N
        return rankloss
    
    #杰卡德系数 jaccard similarity
    #交并比
    #from sklearn.metrics import jaccard_similarity_score
    
    
    logit = np.array([[0.3, 0.4, 0.5, 0.1, 0.15]])
    label = np.array([[1, 0, 1, 0, 0]])
    pred = np.array([[0, 1, 1, 0, 0]])
    
    print(HammingLoss(label, pred))

      

  • 相关阅读:
    ChineseAlphabetUtil获取汉字首字母工具类
    RandomCodeUtil随机数工具类,随机生成数字、字母、数字字母组合、中文姓名
    ValidateUtil常用验证工具类,如手机、密码、邮箱等
    聊天项目
    日期
    字符串
    java中属性,set get 以及如何学习类的一些用法
    继承 多态 封装
    方法 属性 构造方法和包
    面向对象知识
  • 原文地址:https://www.cnblogs.com/iupoint/p/14579552.html
Copyright © 2011-2022 走看看