zoukankan html css js c++ java

pandas之透视表和交叉表

import pandas as pd
import warnings

warnings.filterwarnings('ignore')

# 读取用户表
users = pd.read_table('./users.dat', header=None, names=['UserID', 'Gender', 'Age', 'Occupation', 'Zip-code'], sep='::',
                      engine='python')
# print(users.head())
# 读取评分表
ratings = pd.read_table('./ratings.dat', header=None, names=['UserID', 'MovieID', 'Rating', 'Timestamp'], sep='::',
                        engine='python')
# print(ratings.head())
# 读取电影详情表
movies = pd.read_table('./movies.dat', header=None, names=['MovieID', 'Title', 'Genres'], sep='::', engine='python')
# print(movies.head())
# 将表进行合并
data = pd.merge(pd.merge(ratings, users), movies)
print(data.head())

# 使用pivot_table方法查看，每一部电影不同性别的平均评分
data_gender = pd.pivot_table(data, index='Title', columns='Gender', values='Rating', margins=True)
# data_gender = data.pivot_table(index='Title', columns='Gender', values='Rating')
print(data_gender.head())

# 使用groupby方法
data_gender = data.groupby(['Title', 'Gender']).agg({'Rating': 'mean'})
print(data_gender.head())

# 使用crosstab方法查看每一部电影不同性别的平均评分
data_gender = pd.crosstab(data.Title, data.Gender, data.Rating, aggfunc='mean')
print(data_gender.head())


输出结果：
   UserID  MovieID  ...                                   Title  Genres
0       1     1193  ...  One Flew Over the Cuckoo's Nest (1975)   Drama
1       2     1193  ...  One Flew Over the Cuckoo's Nest (1975)   Drama
2      12     1193  ...  One Flew Over the Cuckoo's Nest (1975)   Drama
3      15     1193  ...  One Flew Over the Cuckoo's Nest (1975)   Drama
4      17     1193  ...  One Flew Over the Cuckoo's Nest (1975)   Drama

[5 rows x 10 columns]
Gender                                F         M       All
Title                                                      
$1,000,000 Duck (1971)         3.375000  2.761905  3.027027
'Night Mother (1986)           3.388889  3.352941  3.371429
'Til There Was You (1997)      2.675676  2.733333  2.692308
'burbs, The (1989)             2.793478  2.962085  2.910891
...And Justice for All (1979)  3.828571  3.689024  3.713568
                                    Rating
Title                     Gender          
$1,000,000 Duck (1971)    F       3.375000
                          M       2.761905
'Night Mother (1986)      F       3.388889
                          M       3.352941
'Til There Was You (1997) F       2.675676
Gender                                F         M
Title                                            
$1,000,000 Duck (1971)         3.375000  2.761905
'Night Mother (1986)           3.388889  3.352941
'Til There Was You (1997)      2.675676  2.733333
'burbs, The (1989)             2.793478  2.962085
...And Justice for All (1979)  3.828571  3.689024

import pandas as pd

data = pd.DataFrame({'Sample': range(1, 11),
                     'Gender': ['Female', 'Male', 'Female', 'Male', 'Male', 'Male', 'Female', 'Female', 'Male',
                                'Female'],
                     'Handedness': ['Right-handed', 'Left-handed', 'Right-handed', 'Right-handed', 'Left-handed',
                                    'Right-handed', 'Right-handed', 'Left-handed', 'Right-handed', 'Right-handed']})
print(data)

# 方法1 ：使用pivot_table
data1 = pd.pivot_table(data, index='Gender', columns='Handedness', aggfunc=len, margins=True)
print(data1)

# 方法2：使用crosstab
data2 = pd.crosstab(data.Gender, data.Handedness, data.Sample, aggfunc=len, margins=True)
print(data2)

输出结果：
   Sample  Gender    Handedness
0       1  Female  Right-handed
1       2    Male   Left-handed
2       3  Female  Right-handed
3       4    Male  Right-handed
4       5    Male   Left-handed
5       6    Male  Right-handed
6       7  Female  Right-handed
7       8  Female   Left-handed
8       9    Male  Right-handed
9      10  Female  Right-handed
                Sample                 
Handedness Left-handed Right-handed All
Gender                                 
Female               1            4   5
Male                 2            3   5
All                  3            7  10
Handedness  Left-handed  Right-handed  All
Gender                                    
Female                1             4    5
Male                  2             3    5
All                   3             7   10

查看全文

相关阅读:
JVM 垃圾回收 Minor gc vs Major gc vs Full gc
CMS垃圾回收与G1垃圾回收
 使用Mat分析大堆信息
 Java线程池--ThreadPoolExecutor
IOC容器初始化过程
 linux 下查mac
logstash之multiline插件，匹配多行日志
 logstash json和rubydebug 第次重启logstash都会把所有的日志读完　而不是只读入新输入的内容
 快速定位关键字所在日志的行号　查看特定行以下的内容
 redis　常用命令

原文地址：https://www.cnblogs.com/yuxiangyang/p/11266863.html