#导入第三方库sqlalchemy的数据库引擎 from sqlalchemy import create_engine #导入科学计算库 import pandas as pd #导入绘图库 import matplotlib.pyplot as plt if __name__ == "__main__": #建立数据库引擎 engine = create_engine('mysql+pymysql://root:123456@localhost:3306/mymac') #建立一个基于pandas查询数据框架类的对象 sql = 'select id,name,age,gender from student' df = pd.read_sql_query(sql,engine) print("#建立一个基于pandas查询数据框架类的对象 {}".format(df)) #select * from student order by age asc | desc df0 = df.sort_values(['age','gender'],ascending=False) print("#指定多字段排序,遵循最左原则,注意:这是矩阵数组操作,不是在数据库操作,需要重新赋值 {}".format(df0)) df1 = df.sort_values(['age']).head(1) print("#取年龄最小的学生,从头部取第一个 {}".format(df1)) print('#所有字段的平均值 {}'.format(df.mean())) print('#矩阵 按列 求平均值 {}'.format(df.mean(0))) print('#矩阵 按行 求平均值 {}'.format(df.mean(1))) #使用loc[]方法指定字段 .age指定操作数据字段 avg_age = df.loc[df['gender'] == 0].age.mean() print("查整个班的女生的平均年龄: {}".format(avg_age)) view = df.loc[(df['age'] < avg_age) & (df['gender'] == 0)] print("查小于全班平均年龄的女生 {}".format(view)) print("全班平均年龄: {}".format(df.age.mean())) print("全班年龄和: {}".format(df.age.sum())) print("男生的年龄和: {}".format(df.loc[df['gender'] == 1].age.sum())) print("全班最小年龄: {}".format(df.age.min())) print(df.loc[(df['gender'] == 0) | (df['gender'] == 1)]) print("全班人数: {}".format(df.id.count())) #排除 print(df.loc[(df['gender'] != 0) & (df['gender'] != 1)]) #修改 df.loc[3,'gender'] = 0 df.loc[3,'age'] = 18 print(df) #取单值 nplist[2,3] nplist[2][3] print(df.loc[3,'name']) data = df.loc[ df['gend.er'] == 1, ['id','name']] print("#查指定字段的数据 {}".format(data)) """ 导出excel文件 pandas的矩阵转成excel文件,需要安装一个Exel插件 pip install openpyxl 基于清华学校的镜像源安装 pip install -i https://pypi.tuna.tsinghua.edu.cn/simple openpyxl 将dataframe写入数据表 表名,数据引擎,索引是否入库 """ df.to_sql('student_copy',engine,index=False) #导出excel文件 df.to_excel('student.xlsx') # 一、利用dataframe内置方法,统计 李老师班身高超过1.80的所有男生 sql_0 = "select * from student as s inner join teacher as t on s.tid = t.id where t.id = 2 and s.gender = 1" df_0 = pd.read_sql_query(sql_0,engine) print(df_0) view_0 = df_0.loc[df_0['hight'] > 1.80] print(view_0) #二、利用dataframe内置方法,统计 王老师班身高不低于李老师班女生平均身高的所有女生 sql_1 = "select * from student as s inner join teacher as t on s.tid = t.id where s.gender = 0" df_1 = pd.read_sql_query(sql_1,engine) print("#1、查出所有老师的班级的女生 {}".format(df_1)) view_1 = df_1.loc[(df_1['tid'] == 2)].hight.mean() print("#2、李老师班的女生平均身高: {}".format(view_1)) view_2 = df_1.loc[(df_1['tid'] == 1) & (df_1['hight'] > view_1)] print(view_2) """ 要求:给student 表加入字段weight,利用matplotlib 画一个散点图,将全班的身高(x轴) 和 体重(y轴) 以散点图的形式展示出来 """ #建立一个基于pandas与sqlalchemy交互的对象 sql = 'select hight,weight from student' df = pd.read_sql_query(sql,engine) print(df) # #定义x轴数据 x = df['hight'] y = df['weight'] # 填充数据 #s 表示点的大小和粗细 c 表示颜色 plt.scatter(x,y,s=20,c='blue') #设置标题 plt.title('身高(x轴)/体重(y轴)') #绘制 plt.show()