zoukankan      html  css  js  c++  java
  • 一些好用的代码

    ##### 正则化 ####

    def
    re_fun(seq): rule = re.compile(u'[^a-zA-Z.,;《》?!“”‘’@#¥%…&×()——+【】{};;●,。&~、|s::' + 'u4e00-u9fa5]+') seq = re.sub(rule, '', seq) seq = re.sub('[<]+', '', seq) seq = re.sub('[>]+', '', seq) seq = re.sub('[:]+', '', seq) seq = re.sub('[;]+', '', seq) seq = re.sub('[!]+', '', seq) seq = re.sub('[.]+', '', seq) seq = re.sub('[。]+', '', seq) seq = re.sub('[,]+', '', seq) seq = re.sub('[ ]+', '', seq) return seq

    #############################################################################################################################################

    ##### 生成词表 #####
    def
    vocab_fun(filename): vocab = ct.Counter() with codecs.getreader('utf-8')(tf.gfile.GFile(filename,'rb')) as file: for line in file.readlines(): line = re_fun(line) line = line.strip().split(' ') #print(line) for word in line: #print(word) vocab.update([word]) return vocab

    ##########################################################################################################################################

    ##### 写入文件路径 #####
    dir_path = "D:mathine_learningpre_estidataset" tgt = 'europarl-v7.de-en.de' src = 'europarl-v7.de-en.en' train_src = os.path.join(dir_path,os.path.basename(src)) train_tgt = os.path.join(dir_path,os.path.basename(tgt))

    #########################################################################################################################################

    ##### 测bleu值 #####
    from
    nltk.translate.bleu_score import corpus_bleu src_seq = open(src_file,'r',encoding='utf-8') tgt_seq = open(tgt_file,'r',encoding='utf-8') temp1 = [] temp2 = [] for line1,line2 in zip(src_seq,tgt_seq): line1 = line1.strip(' ').split(' ') line2 = line2.strip(' ').split(' ') temp1.append(line1) temp2.append(line2) a = corpus_bleu(temp1,temp2) print( a ) src_seq.close() tgt_seq.close()
  • 相关阅读:
    青魔法圣堂法术 Django的技术栈(持续更新)
    青魔法圣堂法术 Django REST framework (DRF) 框架(持续更新)
    Python无法卸载的解决办法
    Django开发social-auth-app-django 第三方登陆
    【转载】青魔法圣堂法术Django项目知识点汇总
    基于session 的springMvc 国际化
    java导出生成csv文件
    mybatis + log4j 打印mybatis的sql
    spring Mvc + Mybatis 中使用junit
    spring官网项目
  • 原文地址:https://www.cnblogs.com/hanouba/p/11544867.html
Copyright © 2011-2022 走看看