zoukankan      html  css  js  c++  java
  • 文本摘要 Rouge 指标计算 python

    #Rouge gram-1,2,4,SU4
    def Rouge_1(pred, true):
        #结巴精准分词
        gram_1_true = list(jieba.cut(true))
        gram_1_pred = list(jieba.cut(pred))
        
        #统计参考摘要长度、预测摘要长度、预测正确的摘要长度
        total_num = len(gram_1_true)
        pred_num = len(gram_1_pred)
        correct_num =  len(set(gram_1_pred) & set(gram_1_true))
        return total_num, pred_num, correct_num
    
    
    def Rouge_2(pred, true):
        terms_true = list(jieba.cut(true))
        terms_pred = list(jieba.cut(pred))
        gram_2_pred = []
        gram_2_true = []
        temp = 0
    
        for i in range(len(terms_pred)-1):
             gram_2_pred.append(terms_pred[i] + terms_pred[i+1])
        for i in range(len(terms_true)-1):
             gram_2_true.append(terms_true[i] + terms_true[i+1])
        
        total_num = len(gram_2_true)
        pred_num = len(gram_2_pred)
        correct_num =  len(set(gram_2_pred) & set(gram_2_true))
        return total_num, pred_num, correct_num
    
    
    def Rouge_4(pred, true):
        terms_true = list(jieba.cut(true))
        terms_pred = list(jieba.cut(pred))
        gram_4_pred = []
        gram_4_true = []
    
        for i in range(len(terms_pred)-3):
             gram_4_pred.append(terms_pred[i] + terms_pred[i+1] + terms_pred[i+2] + terms_pred[i+3])
        for i in range(len(terms_true)-3):
             gram_4_true.append(terms_true[i] + terms_true[i+1] + terms_true[i+2] + terms_true[i+3])
        
        total_num = len(gram_4_true)
        pred_num = len(gram_4_pred)
        correct_num =  len(set(gram_4_pred) & set(gram_4_true))
        return total_num, pred_num, correct_num
    
    
    def update_rouge_score(rouge,data=None,mode='1'):
        if data:
            rouge['Rouge_'+ mode]['total_gram'] += int(data[0])
            rouge['Rouge_'+ mode]['pred_gram'] += int(data[1])
            rouge['Rouge_'+ mode]['correct_gram'] += int(data[2])
        else:
            corr = rouge['Rouge_'+ mode]['correct_gram']
            total = rouge['Rouge_'+ mode]['total_gram']
            pred = rouge['Rouge_'+ mode]['pred_gram']
            rouge['Rouge_'+ mode]['precision'] =  corr/(e + pred)
            rouge['Rouge_'+ mode]['recall'] = corr/(e + total)
            precision = rouge['Rouge_'+ mode]['precision']
            recall = rouge['Rouge_'+ mode]['recall']
            rouge['Rouge_'+ mode]['f1'] = (2*precision*recall) / (e + precision + recall) 
        return rouge
  • 相关阅读:
    002-Linux下防火墙相关命令操作
    001-网卡配置
    vs2012中自带IIS如何让其他电脑访问
    001-Mono for android在vs2012中发布设置
    小知识:utf-8和utf8mb4字符集
    Maven 模块化开发
    JUnit 单元测试
    解决8080端口占用问题
    (三)Tomcat服务器 -------JavaWeb的学习之路
    (一)走进JavaWeb的世界 -------JavaWeb的学习之路
  • 原文地址:https://www.cnblogs.com/cupleo/p/15607186.html
Copyright © 2011-2022 走看看