zoukankan      html  css  js  c++  java
  • 用python按行处理文件内容,并输出到另外一个文件

    用python按行处理文件内容,并输出到另外一个文件:

    import os
    import pymysql
    import json
    import threading
    import re
    
    
    def get_full_read_file_name(name):
        read_file = read_dic + read_file_prefix + str(name) + read_file_suffix
        print("read file name:" + read_file)
        return read_file
    
    
    def get_full_write_file_name(name):
        write_file = insert_dic + insert_file_prefix + str(name) + insert_file_suffix
        print("write file name:" + write_file)
        return write_file
    
    
    def do_something(line, file_read, file_write):
        file_write.write(line)
    
    
    def task(thread_name, file_name):
        print("当前线程:" + str(thread_name))
        # 如果有连接数据库的需要,可以放开此处代码
        # db = pymysql.connect("localhost", "root", "root", "test", 3307)
        # cursor = db.cursor()
        if not os.path.isdir(read_dic):
            return
        if not os.path.isfile(get_full_read_file_name(file_name)):
            return
        if not os.path.isdir(insert_dic):
            os.mkdir(insert_dic)
        # 每读取一个文件,就创建一个对应的文件
        file = open(get_full_read_file_name(file_name), "r")
        print("当前读取文件:" + get_full_read_file_name(file_name))
        file_insert = open(get_full_write_file_name(file_name), "w")
        print("当前写入文件:" + get_full_write_file_name(file_name))
        # 初始化行计数器
        line_count = 0
        for line in file:
            # 针对每行作校验,满足条件的才进行处理
            if re.search("err", line):
                continue
            # 针对每一行做响应的处理
            do_something(line, file, file_insert)
            line_count += 1
            # 每一千行插入一个sleep
            if line_count == 1000:
                file_insert.write("select SLEEP(1);
    ")
                line_count = 0
        file_insert.close()
        file.close()
        # 打开的数据库记得关闭
        # db.close()
    
    
    if __name__ == '__main__':
        # 读取当前文件夹下面的所有文件
        threads = []
        # 读取文件目录
        read_dic = '/Users/dxm/Documents/'
        # 写入文件目录
        insert_dic = '/Users/dxm/Documents/'
        # 读取文件前缀
        read_file_prefix = 'bid_did'
        # 读取文件后缀
        read_file_suffix = '.sql'
        # 写入文件前缀
        insert_file_prefix = 'bid_did_with_sleep'
        # 写入文件后缀
        insert_file_suffix = '.sql'
        try:
            thread_count = 10
            while thread_count < 50:
                # 创建线程拼接SQL
                t = threading.Thread(target=task, args=("thread-" + str(thread_count), thread_count))
                threads.append(t)
                thread_count += 1
    
            for t in threads:
                t.setDaemon(True)
                t.start()
    
            for t in threads:
                t.join()
    
            print("任务执行成功")
        except (
                RuntimeError, ValueError, TypeError, BufferError, ConnectionError, ConnectionResetError,
                ConnectionAbortedError):
            print("线程被终止,异常")
    
    
  • 相关阅读:
    Rails 5 Test Prescriptions 第6章Adding Data to Tests
    Rails 5 Test Prescriptions 第4章 什么制造了伟大的测试
    Rails 5 Test Prescriptions 第3章Test-Driven Rails
    VisualBasic6.0程序设计教程(第4版)(普通高等教育“十一五”国家级规划教材)
    Word Excel PPT 2016三合一办公应用实战从入门到精通 超值版
    R语言数据挖掘方法及应用
    Word--Excel 2016商务办公实战从新手到高手
    妙哉!Excel数据分析与处理就该这么学
    Visual C++串口通信开发入门与编程实践
    Hello C 语言
  • 原文地址:https://www.cnblogs.com/StivenYang/p/13589634.html
Copyright © 2011-2022 走看看