zoukankan      html  css  js  c++  java
  • 一些好用的代码

    ##### 正则化 ####

    def
    re_fun(seq): rule = re.compile(u'[^a-zA-Z.,;《》?!“”‘’@#¥%…&×()——+【】{};;●,。&~、|s::' + 'u4e00-u9fa5]+') seq = re.sub(rule, '', seq) seq = re.sub('[<]+', '', seq) seq = re.sub('[>]+', '', seq) seq = re.sub('[:]+', '', seq) seq = re.sub('[;]+', '', seq) seq = re.sub('[!]+', '', seq) seq = re.sub('[.]+', '', seq) seq = re.sub('[。]+', '', seq) seq = re.sub('[,]+', '', seq) seq = re.sub('[ ]+', '', seq) return seq

    #############################################################################################################################################

    ##### 生成词表 #####
    def
    vocab_fun(filename): vocab = ct.Counter() with codecs.getreader('utf-8')(tf.gfile.GFile(filename,'rb')) as file: for line in file.readlines(): line = re_fun(line) line = line.strip().split(' ') #print(line) for word in line: #print(word) vocab.update([word]) return vocab

    ##########################################################################################################################################

    ##### 写入文件路径 #####
    dir_path = "D:mathine_learningpre_estidataset" tgt = 'europarl-v7.de-en.de' src = 'europarl-v7.de-en.en' train_src = os.path.join(dir_path,os.path.basename(src)) train_tgt = os.path.join(dir_path,os.path.basename(tgt))

    #########################################################################################################################################

    ##### 测bleu值 #####
    from
    nltk.translate.bleu_score import corpus_bleu src_seq = open(src_file,'r',encoding='utf-8') tgt_seq = open(tgt_file,'r',encoding='utf-8') temp1 = [] temp2 = [] for line1,line2 in zip(src_seq,tgt_seq): line1 = line1.strip(' ').split(' ') line2 = line2.strip(' ').split(' ') temp1.append(line1) temp2.append(line2) a = corpus_bleu(temp1,temp2) print( a ) src_seq.close() tgt_seq.close()
  • 相关阅读:
    访问系统内容提供器,获取联系人列表
    ubuntu下查看IP Gateway DNS信息
    使用fragment,Pad手机共用一套代码
    动态注册广播接收器,监听网络变化
    启动Activity,传递参数最佳实践
    管理Activity,随时随地控制Activity的销毁工作
    unzip解压中文文件名乱码
    mysql null值转换
    (转)使用scp命令在linux操作系统之间传递文件
    比较两个日期的大小
  • 原文地址:https://www.cnblogs.com/hanouba/p/11544867.html
Copyright © 2011-2022 走看看