1 import pandas as pd
2 import numpy as np
3
4 # 加载数据
5 detail = pd.read_excel("../day05/meal_order_detail.xlsx")
6 # print("detail:
", detail)
7 print("detail的列名称:
", detail.columns)
8
9 # 构建交叉表
10 # index ——指定某列进行行分组
11 # columns ——指定某列进行列分组
12 # 如果不传其他参数,只传index 与columns, 那么交叉表构建 index 与 columns的相对数量
13
14 res = pd.crosstab(
15 index=detail.loc[:, "dishes_name"],
16 columns=detail.loc[:, "amounts"],
17 )
18 print("res:
", res)
19
20 # 以index这一列为行分组,以columns这一列为列分组,对values进行统计aggfunc指标
21 # values 与 aggfunc必须同时存在
22 # 只能按照单列进行行分组,只能按照按照单列进行列分组,只能统计某一列的aggfunc指标
23 res = pd.crosstab(
24 index=detail.loc[:, "dishes_name"],
25 columns=detail.loc[:, "amounts"],
26 values=detail.loc[:, "counts"],
27 aggfunc=np.max
28 )
29
30 print("res:
", res)
1 import pandas as pd
2 import numpy as np
3
4 # 创建一个dataframe
5 df = pd.DataFrame(
6 data={
7 "cls_id": ["A", "B", "C", "A", "B", "C", "A", "B", "C", "A", "B","C"],
8 "group_id": [1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2],
9 "name": ["xixi", "haha", "taotao", "huihui", "ranran", "Island", "Tree" ,"bamao", "simao", "hanhan", "qimao", "sanmao"],
10 "score": [92, 93, 39, 89, 90.5, 80, 91, 92, 65, 73, 34.5, 56],
11 "height": [165, 166, 167, 168, 152, 193, 192, 190, 173, 172, 170, 169]
12 },
13 )
14
15 res = df.groupby(by="cls_id")[["score", "height"]].mean()
16 print("groupby-res:
", res)
17
18 # data ——df
19 # index 指定行分组 ——可以按照多列进行行分组
20 # values ——aggfunc 所针对的主体,可以是多列
21 # aggfunc ——默认为均值
22
23 # 创建透视表
24 res = pd.pivot_table(
25 data=df,
26 # index="cls_id", # 按照行或列分组
27 # columns="cls_id",
28 columns=["cls_id", "group_id"],
29 values=["score", "height"],
30 aggfunc="mean",
31 margins=True,
32 margins_name="xixi"
33 )
34
35 print("res:
", res)
36
37 print("~"*60)
38
39 # 加载数据
40 detail = pd.read_excel("../day05/meal_order_detail.xlsx")
41 # print("detail:
", detail)
42 print("detail的列名称:
", detail.columns)
43
44 res = pd.pivot_table(
45 data=detail,
46 # index="order_id",
47 # index="cost",
48 columns="dishes_id",
49 # values="amounts",
50 values="cost",
51 aggfunc="max",
52 fill_value=-1 # 将全为空的数据进行填充
53 )
54
55 print("res:
", res)