zoukankan      html  css  js  c++  java
  • Python一次性获取各个业务对接的数据量和存储大小

    Python一次性获取各个业务对接的数据量

    根据表名模糊查找或者表明列表

    # -*- coding: utf-8
    import json
    import pandas as pd
    import pymysql
    import time
    
    
    def main():
        dataSum = []  # 数据总量
        dataRealtimeSum = []  # 实时数据
        dataJingtaiSum = []  # 静态数据
    
        datasizeSum = []  # 数据大小
        datasizeRealtimeSum = []  # 实时数据大小
        datasizeJingtaiSum = []  # 静态数据大小
    
        connInfo = "connInfo-format.json"
        connFile = open(connInfo, 'r', encoding='utf8')
        connRecords = connFile.read(102400)
        connRecordsjs = json.loads(connRecords)
        conn = pymysql.connect(host='localhost', port=3306, user='root', passwd='root', charset='utf8')
    
        #write_txt(filename_overview, " ".join(["实时静态", "委办局", "数据库", "数据表", "数据大小M", "索引大小M", "行数"]))
        write_txt(filename_overview, " ".join(["实时/静态", "委办局", "行数",  "数据大小M", "索引大小M"]))
    
        for single in connRecordsjs:
            sql = ""
            if "共享平台" in single.get("key"):
                sql = "select " 
                      "table_schema, " 
                      "table_name, " 
                      "table_comment, " 
                      "round(data_length/1024/1024, 2) as 'datasizeM', " 
                      "round(index_length/1024/1024, 2) as 'indexsizeM', " 
                      "table_rows as 'rows' " 
                      "from information_schema.tables " 
                      "where table_name in (" + str(single.get("tablelist")).split('[')[1].split(']')[0] + ")"
            else:
                sql = "select " 
                      "table_schema, " 
                      "table_name, " 
                      "table_comment, " 
                      "round(data_length/1024/1024, 2) as 'datasizeM', " 
                      "round(index_length/1024/1024, 2) as 'indexsizeM', " 
                      "table_rows as 'rows' " 
                      "from information_schema.tables " 
                      "where table_name like '"+single.get('selectkeystr')+"'"
    
            df = pd.read_sql(sql, conn)
            write_txt(filename_overview, " ".join([single.get("realtime"), single.get("key"), str(df['rows'].sum()), str(round(df['datasizeM'].sum(), 2)), str(round(df['indexsizeM'].sum(), 2))]))
    
            dataSum.append(df['rows'].sum())
            datasizeSum.append(round(df['datasizeM'].sum(), 2))
            datasizeSum.append(round(df['indexsizeM'].sum(), 2))
            if single.get("realtime") == "实时":
                dataRealtimeSum.append(df['rows'].sum())
                datasizeRealtimeSum.append(round(df['datasizeM'].sum(), 2))
                datasizeRealtimeSum.append(round(df['indexsizeM'].sum(), 2))
            else:
                dataJingtaiSum.append(df['rows'].sum())
                datasizeJingtaiSum.append(round(df['datasizeM'].sum(), 2))
                datasizeJingtaiSum.append(round(df['indexsizeM'].sum(), 2))
    
        write_txt(filename_overview, '-' * 30)
        write_txt(filename_overview, " ".join(["数据量总计:", str(sum(dataSum)), ""]))
        write_txt(filename_overview, " ".join(["实时数据量总计:", str(sum(dataRealtimeSum)), ""]))
        write_txt(filename_overview, " ".join(["静态数据量总计:", str(sum(dataJingtaiSum)), ""]))
        write_txt(filename_overview, " ".join(["数据量总计大小", str(sum(datasizeRealtimeSum) + sum(datasizeJingtaiSum)), "M"]))
        write_txt(filename_overview, " ".join(["实时数据量总计大小:", str(round(sum(datasizeRealtimeSum),2)), "M"]))
        write_txt(filename_overview, " ".join(["静态数据量总计大小:", str(round(sum(datasizeJingtaiSum),2)), "M"]))
    
        conn.close()
    
    
    def write_txt(filename, line):
        with open(filename, 'a+', encoding='utf8') as fh:
            fh.write(line+"
    ")
    
    
    if __name__ == '__main__':
        t = time.localtime(time.time())
        ymdhms = [t.tm_year, t.tm_mon, t.tm_mday, t.tm_hour, t.tm_min, t.tm_sec]
        filename_overview = "backupdata/overview_" + '_'.join(str(s) for s in ymdhms) + ".txt"
        write_txt(filename_overview, "*"*51)
        write_txt(filename_overview, "***一次性统计所有对接数据的委办局,和其对应的数据(条数)***")
        write_txt(filename_overview, "*"*51)
        write_txt(filename_overview, "")
        main()
        print("successfull 结果在 ", filename_overview, "文件中")

    注:

    connInfo-format.json

    为一个json文件,部分内容如下:

    [
      {
        "key": "市场监管局-共享平台",
        "realtime": "实时",
        "selecttype": "table",
        "selectstr": "like",
        "dbschema": "statistics_data",
        "selectkeystr": "gxpt_%",
        "tablelist": ["gxpt_qiye","gxpt_geti"]
      },
    ...
    ]

    后续Python获取MySQL的数据量,很多时候可以以这个文件为字典,获取必要的编码方式

    代码比较烂

    如有哪位大虾发现有待提升的部分,还请不吝赐教!

    谢谢

  • 相关阅读:
    Codeforces 294B Shaass and Bookshelf:dp
    Codeforces 372B Counting Rectangles is Fun:dp套dp
    Codeforces 402D Upgrading Array:贪心 + 数学
    Codeforces 571B Minimization:dp + 贪心【前后相消】
    Codeforces 509F Progress Monitoring:区间dp【根据遍历顺序求树的方案数】
    codeforces 447E or 446C 线段树 + fib性质或二次剩余性质
    类斐波那契数列的一些性质
    CF 1097D
    最近点对问题
    2018ACM-ICPC EC-Final 现场赛I题 Misunderstanding...Missing 倒着DP
  • 原文地址:https://www.cnblogs.com/zhzhang/p/14506303.html
Copyright © 2011-2022 走看看