zoukankan      html  css  js  c++  java
  • 一些好用的代码

    ##### 正则化 ####

    def
    re_fun(seq): rule = re.compile(u'[^a-zA-Z.,;《》?!“”‘’@#¥%…&×()——+【】{};;●,。&~、|s::' + 'u4e00-u9fa5]+') seq = re.sub(rule, '', seq) seq = re.sub('[<]+', '', seq) seq = re.sub('[>]+', '', seq) seq = re.sub('[:]+', '', seq) seq = re.sub('[;]+', '', seq) seq = re.sub('[!]+', '', seq) seq = re.sub('[.]+', '', seq) seq = re.sub('[。]+', '', seq) seq = re.sub('[,]+', '', seq) seq = re.sub('[ ]+', '', seq) return seq

    #############################################################################################################################################

    ##### 生成词表 #####
    def
    vocab_fun(filename): vocab = ct.Counter() with codecs.getreader('utf-8')(tf.gfile.GFile(filename,'rb')) as file: for line in file.readlines(): line = re_fun(line) line = line.strip().split(' ') #print(line) for word in line: #print(word) vocab.update([word]) return vocab

    ##########################################################################################################################################

    ##### 写入文件路径 #####
    dir_path = "D:mathine_learningpre_estidataset" tgt = 'europarl-v7.de-en.de' src = 'europarl-v7.de-en.en' train_src = os.path.join(dir_path,os.path.basename(src)) train_tgt = os.path.join(dir_path,os.path.basename(tgt))

    #########################################################################################################################################

    ##### 测bleu值 #####
    from
    nltk.translate.bleu_score import corpus_bleu src_seq = open(src_file,'r',encoding='utf-8') tgt_seq = open(tgt_file,'r',encoding='utf-8') temp1 = [] temp2 = [] for line1,line2 in zip(src_seq,tgt_seq): line1 = line1.strip(' ').split(' ') line2 = line2.strip(' ').split(' ') temp1.append(line1) temp2.append(line2) a = corpus_bleu(temp1,temp2) print( a ) src_seq.close() tgt_seq.close()
  • 相关阅读:
    2017的结束2018的开始
    .NET Core使用swagger进行API接口文档管理
    数据库可扩展设计方案
    容量估算--随时更新
    分库分表设计基础
    利用bucardo搭建PostgreSQL数据库双活
    通过pgAgent实现PostgreSQL上的自动作业
    MySQL主主配置
    Oracle统计信息不准(谓词越界)造成的性能问题
    获取Oracle中SQL语句的执行计划
  • 原文地址:https://www.cnblogs.com/hanouba/p/11544867.html
Copyright © 2011-2022 走看看