zoukankan      html  css  js  c++  java
  • 一些好用的代码

    ##### 正则化 ####

    def
    re_fun(seq): rule = re.compile(u'[^a-zA-Z.,;《》?!“”‘’@#¥%…&×()——+【】{};;●,。&~、|s::' + 'u4e00-u9fa5]+') seq = re.sub(rule, '', seq) seq = re.sub('[<]+', '', seq) seq = re.sub('[>]+', '', seq) seq = re.sub('[:]+', '', seq) seq = re.sub('[;]+', '', seq) seq = re.sub('[!]+', '', seq) seq = re.sub('[.]+', '', seq) seq = re.sub('[。]+', '', seq) seq = re.sub('[,]+', '', seq) seq = re.sub('[ ]+', '', seq) return seq

    #############################################################################################################################################

    ##### 生成词表 #####
    def
    vocab_fun(filename): vocab = ct.Counter() with codecs.getreader('utf-8')(tf.gfile.GFile(filename,'rb')) as file: for line in file.readlines(): line = re_fun(line) line = line.strip().split(' ') #print(line) for word in line: #print(word) vocab.update([word]) return vocab

    ##########################################################################################################################################

    ##### 写入文件路径 #####
    dir_path = "D:mathine_learningpre_estidataset" tgt = 'europarl-v7.de-en.de' src = 'europarl-v7.de-en.en' train_src = os.path.join(dir_path,os.path.basename(src)) train_tgt = os.path.join(dir_path,os.path.basename(tgt))

    #########################################################################################################################################

    ##### 测bleu值 #####
    from
    nltk.translate.bleu_score import corpus_bleu src_seq = open(src_file,'r',encoding='utf-8') tgt_seq = open(tgt_file,'r',encoding='utf-8') temp1 = [] temp2 = [] for line1,line2 in zip(src_seq,tgt_seq): line1 = line1.strip(' ').split(' ') line2 = line2.strip(' ').split(' ') temp1.append(line1) temp2.append(line2) a = corpus_bleu(temp1,temp2) print( a ) src_seq.close() tgt_seq.close()
  • 相关阅读:
    【40讲系列1】数组、链表
    更改凭证类型
    将公司代码设置给生产性的(不能删除业务数据的配置)
    使用参考过账
    查看凭证行项目
    查看凭证过账行项目
    预制凭证
    做凭证时凭证日期等于过账日期
    英语-20210302
    自动计算税额
  • 原文地址:https://www.cnblogs.com/hanouba/p/11544867.html
Copyright © 2011-2022 走看看