zoukankan      html  css  js  c++  java
  • 分批查询数据库数据存入文件

    ------------恢复内容开始------------

    来自https://blog.csdn.net/sinat_36190649/article/details/107941638,根据需求略做修改

    import pymysql
    import pandas as pd
    import gc
    import time
    import threading
     
    class Sql_df(object):
        def __init__(self,input_db):
            self.host = ip地址
            self.port = 3306
            self.username = 用户名
            self.password = 密码
            self.input_db = input_db
            self.conn = pymysql.connect(host = self.host,port = self.port,user = self.username,passwd = self.password,db = self.input_db,charset = 'utf8')
        def sql_input_all(self,sql_state):  #全部读取用pd处理
            cur_1 = self.conn.cursor(cursor = pymysql.cursors.DictCursor)
            cur_1.execute(sql_state+' limit 1')
            column_df = cur_1.fetchall()
            column_list = column_df[0].keys()
            cur_2 = self.conn.cursor()
            start_time = time.time()
            cur_2.execute(sql_state)
            tmp_list = cur_2.fetchall()
            result_df = pd.DataFrame(list(tmp_list),columns = column_list)
            end_time = time.time()
            during_time = round(end_time-start_time,0)/60
            print('input data has spend %f minutes'%during_time)
            return result_df
        def sql_input_batch(self,sql_state,nums_sql_state,batch_size):    #分批处理写入txt文件
            cur_2 = self.conn.cursor()
            start_time = time.time()
            cur_2.execute(nums_sql_state)
            nums_sample = cur_2.fetchall()[0][0]
            batches = nums_sample//batch_size
            cur_3 = self.conn.cursor()
            for i in range(batches):
                cur_3.execute(sql_state+' limit '+str(i*batch_size)+','+str(batch_size))
                tmp_d = cur_3.fetchall()
                tmp_list = ['. '.join(i) for i in tmp_d]
                with open('./all_data.txt', 'a+', encoding='utf-8') as f:
                    for i in tmp_list:
                        line = i.split()
                        if len(line) < 30:
                            continue
                        
                        new = content_pro(line)
                        f.write(new+'
    ')
                del tmp_d
                del tmp_list
                gc.collect()
                gc.collect()
            last_index = batches*batch_size
            cur_3.execute(sql_state+' limit '+str(last_index)+','+str(nums_sample-last_index))
            tmp_d = cur_3.fetchall()
            tmp_list = ['. '.join(i) for i in tmp_d]
            with open('./all_data.txt', 'a+', encoding='utf-8') as f:
                for i in tmp_list:
                    line = i.split()
                    if len(line) < 30:
                            continue
                        
                    new = content_pro(line)
                    f.write(new+'
    ')
            end_time = time.time()
            during_time = round(end_time-start_time,0)/60
            print('input data has spend %f minutes'%during_time)
            del tmp_d
            gc.collect()
            return 'success'
    
    
    if __name__ == '__main__':
        data_input = Sql_df(数据库名)
        rr = data_input.sql_input_batch('select * from 表名','select count(1) from 表名',1000000)        

    ------------恢复内容结束------------

  • 相关阅读:
    Android 图表控件的使用
    使用kotlin开发android
    Servlet 使用介绍(3)
    Servlet 使用介绍(2)
    Servlet 使用介绍(1)
    Android 中的style和Theme的使用
    Java Web工程目录结构
    IP地址解析
    Android 音视频深入 十六 FFmpeg 推流手机摄像头,实现直播 (附源码下载)
    Android 音视频深入 十五 FFmpeg 推流mp4文件(附源码下载)
  • 原文地址:https://www.cnblogs.com/harbin-ho/p/15005291.html
Copyright © 2011-2022 走看看