环境
Anaconda3 Python 3.6, Window 64bit
- 目的
从MySQL数据库读取数据,进行数据查询、关联
- 代码

# -*- coding: utf-8 -*- """ Author: kimbo zhang Mail: kimbo_zhang@163.com """ import pymysql import pandas as pd import numpy as np db_conn = pymysql.connect( host="***", user="kimbo", passwd="***", port=3306, database="kimbo_test", charset="utf8" ) # 执行sql语句 sql_cmd = "select id,coll_type,coll_amt,coll_cost from kimbo_test;" sql_cmd2 = "select id,re_type as coll_type,re_amt as coll_amt,re_cost as coll_cost from kimbo_test2;" # 导入数据 data1 = pd.read_sql(sql_cmd, db_conn) # 赋值 kimbo_test data2 = pd.read_sql(sql_cmd2, db_conn) # 赋值 kimbo_test2 # 1. select 查询 # eg: select * from kimbo_test limit 5; sl = data1.head(10) print("查询前10条记录: ") print(sl) print(" --------------> ") # 2. 查询类型是:偏低的数据 # eg: select * from kimbo_test where coll_type='偏低'; wh = data1[(data1['coll_type'] == '偏低')] print(wh) # 3. 查询类型是:偏低和昂贵的数据 # eg:select * from kimbo_test where coll_type in ('偏低','昂贵'); wh2 = data1[(data1['coll_type'] == '偏低') | (data1['coll_type'] == '昂贵')] print(wh2) # 4. 查询类型是:偏低和昂贵,id 大于7000 # eg: select * from kimbo_test where coll_type in ('偏低','昂贵') and id >5000; wh3 = data1[((data1['coll_type'] == '偏低') | (data1['coll_type'] == '昂贵')) & (data1['id'] > 5000)] print(wh3) # 5. 分组、汇总 # eg: select coll_type,sum(coll_amt)as coll_amt,sum(coll_cost)as coll_cost from kimbo_test group by coll_type; gb = data1.groupby('coll_type').agg({'coll_amt': np.sum, 'coll_cost': np.sum}) print(gb) # 5. 关联 inner join # eg: select * from kimbo_test a inner join kimbo_test2 b on a.id=b.id ; jn = pd.merge(data1, data2, on='id') print(jn) # 6. 关联 left join # eg: select * from kimbo_test a left join kimbo_test2 b on a.id=b.id ; jn2 = pd.merge(data1, data1, on='id', how='left') print(jn2) # 7. 关联 full join # eg: select * from kimbo_test a full join kimbo_test2 b on a.id=b.id ; jn3 = pd.merge(data1, data1, on='id', how='outer') print(jn3) # 8. 合并 union all ,列名需要一致 # eg select * from kimbo_test a union all select * from kimbo_test2 b ; un2 = pd.concat([data1, data2]) print(un2) # 9. 分析函数 # eg: # SELECT * FROM ( # SELECT # t.*, # ROW_NUMBER() OVER(PARTITION BY coll_type ORDER BY coll_amt DESC) AS rn # FROM kimbo_test t # ) # WHERE rn < 3 # ORDER BY rn; rn = data1.assign(rn=data1.sort_values(['coll_amt'], ascending=False).groupby('coll_type').cumcount() + 1 ).query('rn < 3').sort_values(['coll_type', 'rn']) print("row_number 分析函数结果:") print(rn)
- 结果展示
第一个和最后一个结果: