zoukankan      html  css  js  c++  java
  • python批量替代

    import re
    import jieba.analyse
    import codecs
    import pandas as pd
    def word_replace(xianbingshi,hospital1):
        """替换词表"""
        data = []
        hospital = []
        """去重"""
        with codecs.open(hospital1,'r','utf8') as f:
            for line in f:
                line = line.strip()
                if line not in hospital:
                    hospital.append(line)
                else:
                    continue
        hospital.sort(key=len, reverse=True)
        with codecs.open(xianbingshi,'r','utf8') as f:
            """优先级:医院、手术、检查、症状、疾病、部位、时间"""
            for line in f:
                for ho in hospital:
                    if ho in hospital:
                        line = line.replace(ho,'[hospital]')
                line = line.strip()
                data.append(line)
                print(line)
        with codecs.open(r'C:UsersAdministrator.SC-201812211013PycharmProjects词表工作代码yiwoqucodexianbingshi_write_sub.txt','w','utf8') as f:
            for line in data:
                f.write(line + '
    ')
            f.close()
    
    def word_replace3(xianbingshi2, operation1):
        data = []
        operation = []
        with codecs.open(operation1,'r','utf8') as f:
            for line in f:
                line = line.strip()
                if line not in operation:
                    operation.append(line)
                else:
                    continue
        """排序"""
        operation.sort(key=len, reverse=True)
        with codecs.open(xianbingshi2,'r','utf8') as f:
            """优先级:医院、手术、检查、症状、疾病、部位、时间"""
            for line in f:
                for op in operation:
                    if op in line:
                        line = line.replace(op, '[operation]')
                line = line.strip()
                data.append(line)
                print(line)
        with codecs.open(xianbingshi2, 'w','utf8') as f:
            for line in data:
                f.write(line + '
    ')
            f.close()
    def word_replace1(xianbingshi2,disease1):
        data = []
        disease = []
        with codecs.open(disease1,'r','utf8') as f:
            for line in f:
                line = line.strip()
                if line not in disease:
                    disease.append(line)
                else:
                    continue
        disease.sort(key=len, reverse=True)
        with codecs.open(xianbingshi2,'r','utf8') as f:
            """优先级:医院、手术、检查、症状、疾病、部位、时间"""
            for line in f:
                for di in disease:
                    if di in line and len(di)>1:
                        line = line.replace(di, '[disease]')
                line = line.strip()
                data.append(line)
                print(line)
        with codecs.open(xianbingshi2,'w', 'utf8') as f:
            for line in data:
                f.write(line + '
    ')
            f.close()
    def word_replace2(xianbingshi2, symptom1):
        data = []
        symptom = []
        with codecs.open(symptom1,'r','utf8') as f:
            for line in f:
                line = line.strip()
                if line not in symptom:
                    symptom.append(line)
                else:
                    continue
        """排序"""
        symptom.sort(key=len, reverse=True)
        with codecs.open(xianbingshi2,'r','utf8') as f:
            """优先级:医院、手术、检查、症状、疾病、部位、时间"""
            for line in f:
                for sy in symptom:
                    if sy in line and len(sy) > 1:
                        line = line.replace(sy, '[symptom]')
                line = line.strip()
                data.append(line)
                print(line)
        with codecs.open(xianbingshi2,'w', 'utf8') as f:
            for line in data:
                f.write(line + '
    ')
            f.close()
    
    def word_replace4(xianbingshi2, test1):
        data = []
        test = []
        with codecs.open(test1,'r','utf8') as f:
            for line in f:
                line = line.strip()
                if line not in test:
                    test.append(line)
                else:
                    continue
        """排序"""
        test.sort(key=len, reverse=True)
        with codecs.open(xianbingshi2,'r','utf8') as f:
            """优先级:医院、手术、检查、症状、疾病、部位、时间"""
            for line in f:
                for te in test:
                    if te in line:
                        line = line.replace(te, '[test]')
                line = line.strip()
                data.append(line)
                print(line)
        with codecs.open(xianbingshi2, 'w','utf8') as f:
            for line in data:
                f.write(line + '
    ')
            f.close()
    def word_replace5(xianbingshi2, time1):
        data = []
        time = []
        with codecs.open(time1,'r','utf8') as f:
            for line in f:
                line = line.strip()
                if line not in time:
                    time.append(line)
                else:
                    continue
        """排序"""
        time.sort(key=len, reverse=True)
        with codecs.open(xianbingshi2,'r','utf8') as f:
            """优先级:医院、手术、检查、症状、疾病、部位、时间"""
            for line in f:
                for t in time:
                    if t in line:
                        line = line.replace(t,'[time]')
                line = line.strip()
                data.append(line)
                print(line)
        with codecs.open(xianbingshi2,'w', 'utf8') as f:
            for line in data:
                f.write(line + '
    ')
            f.close()
    def word_replace6(xianbingshi2, organ1):
        data = []
        organ = []
        with codecs.open(organ1,'r','utf8') as f:
            for line in f:
                line = line.strip()
                if line not in organ:
                    organ.append(line)
                else:
                    continue
        """排序"""
        organ.sort(key=len, reverse=True)
        with codecs.open(xianbingshi2,'r','utf8') as f:
            """优先级:医院、手术、检查、症状、疾病、部位、时间"""
            for line in f:
                for o in organ:
                    if o in line and len(o) > 1:
                        line = line.replace(o, '[organ]')
                line = line.strip()
                data.append(line)
                print(line)
        with codecs.open(xianbingshi2,'w', 'utf8') as f:
            for line in data:
                f.write(line + '
    ')
            f.close()
    def word_replace7(xianbingshi2, symptom1):
        data = []
        symptom = []
        with codecs.open(symptom1,'r','utf8') as f:
            for line in f:
                line = line.strip()
                if line not in symptom and len(line) == 1:
                    symptom.append(line)
                    print(line)
                else:
                    continue
        """排序"""
        symptom.sort(key=len, reverse=True)
        with codecs.open(xianbingshi2,'r','utf8') as f:
            """优先级:医院、手术、检查、症状、疾病、部位、时间"""
            for line in f:
                for sy in symptom:
                    line = line.replace(sy, '[symptom]')
                line = line.strip()
                data.append(line)
                print(line)
        with codecs.open(xianbingshi2,'w', 'utf8') as f:
            for line in data:
                f.write(line + '
    ')
            f.close()
    def word_replace8(xianbingshi2, disease1):
        data = []
        disease = []
        with codecs.open(disease1,'r','utf8') as f:
            for line in f:
                line = line.strip()
                if line not in disease and line == 1:
                    disease.append(line)
                else:
                    continue
        """排序"""
        disease.sort(key=len, reverse=True)
        with codecs.open(xianbingshi2,'r','utf8') as f:
            """优先级:医院、手术、检查、症状、疾病、部位、时间"""
            for line in f:
                for di in disease:
                    line = line.replace(di, '[disease]')
                line = line.strip()
                data.append(line)
                print(line)
        with codecs.open(xianbingshi2,'w', 'utf8') as f:
            for line in data:
                f.write(line + '
    ')
            f.close()
    def word_replace9(xianbingshi2, organ1):
        data = []
        organ = []
        with codecs.open(organ1,'r','utf8') as f:
            for line in f:
                line = line.strip()
                if line not in organ and line == 1:
                    organ.append(line)
                else:
                    continue
        """排序"""
        organ.sort(key=len, reverse=True)
        with codecs.open(xianbingshi2,'r','utf8') as f:
            """优先级:医院、手术、检查、症状、疾病、部位、时间"""
            for line in f:
                for o in organ:
                     line = line.replace(o, '[organ]')
                line = line.strip()
                data.append(line)
                print(line)
        with codecs.open(xianbingshi2,'w', 'utf8') as f:
            for line in data:
                f.write(line + '
    ')
            f.close()
    if __name__ == '__main__':
        disease1 =r'C:UsersAdministrator.SC-201812211013PycharmProjects词表工作代码yiwoquTXTdisease_0903.txt'
        organ1 = r"C:UsersAdministrator.SC-201812211013PycharmProjects词表工作代码yiwoquTXTorgan_0903.txt"
        test1 = r"C:UsersAdministrator.SC-201812211013PycharmProjects词表工作代码yiwoquTXT	est_0903.txt"
        time1 = r"C:UsersAdministrator.SC-201812211013PycharmProjects词表工作代码yiwoqu	ime1.txt"
        operation1 = r"C:UsersAdministrator.SC-201812211013PycharmProjects词表工作代码yiwoquTXToperation_0903.txt"
        symptom1 = r"C:UsersAdministrator.SC-201812211013PycharmProjects词表工作代码yiwoqucode症状.txt"
        xianbingshi = r'C:UsersAdministrator.SC-201812211013PycharmProjects词表工作代码yiwoqucodexianbingshi_write.txt'
        xianbingshi2 =r'C:UsersAdministrator.SC-201812211013PycharmProjects词表工作代码yiwoqucodexianbingshi_write_sub.txt'
        hospital1 = r'C:UsersAdministrator.SC-201812211013PycharmProjects词表工作代码yiwoquTXThospital_0903.txt'
        word_replace(xianbingshi, hospital1)
        word_replace3(xianbingshi2, operation1)
        word_replace1(xianbingshi2, disease1)
        word_replace2(xianbingshi2, symptom1)
        word_replace4(xianbingshi2, test1)
        # word_replace5(xianbingshi2, time1)
        word_replace6(xianbingshi2, organ1)
        word_replace7(xianbingshi2, symptom1)
        word_replace8(xianbingshi2, disease1)
        word_replace9(xianbingshi2, organ1)
  • 相关阅读:
    读《大道至简》第二章有感
    读大道至简之感
    C#学习笔记二:并行编程基础:在 PLINQ 和 TPL 中的 Lambda 表达式
    C#学习笔记一:委托、匿名函数、Lambda 表达式
    VS2013 最常用 和 不是最常用的快捷键备忘
    EntityFramework初上手
    C#的值类型,引用类型,栈,堆,ref,out
    python批量下载图片
    django部署for新浪SAE
    Linux下安装搜狗拼音输入法
  • 原文地址:https://www.cnblogs.com/yiwoqu/p/11542059.html
Copyright © 2011-2022 走看看