数据分析大作业,这里做个简单的记录,数据集下载https://gitee.com/Arno_vc/python.git
#通用函数
import pandas as pd;
import re
import numpy as np
import matplotlib.pyplot as plt
#plt字体设置
plt.rcParams['font.sans-serif']=['SimHei'];
plt.rcParams['font.size'] = '16';
def drawBox(data,title):
data.plot.box(title=title,figsize=(10,10));
plt.grid(linestyle="--", alpha=0.3);
plt.show();
def drawBar(data,title):
data.plot.bar(stacked=True,title=title,figsize=(10,10));
plt.show();
def drawPie(data,title):
data.plot.pie(title=title,figsize=(10,10));
plt.show();
def drawScatter(data,x,y):
data.plot.scatter(x=x, y=y,figsize=(10,10))
plt.show();
def drawLine(data,title):
data.plot.line(title=title,figsize=(10,10));
plt.show();
def count(data):
return len(data);
#获得年份
def getYear(data):
res = re.findall(r'[(](.*?)[)]',data);
return res[len(res)-1];
综合实验 MovieLens 1M数据分析
-
数据集说明:
MovieLens 1M Dataset:该数据集采集了一组从20世纪90年末到21世纪初由MovieLens用户提供的电影怦分数据。这些数据中包括电影评分、电影数据(风格类型和年代)以及关于用户的人口统计学数据(年龄、邮编、性别和职业等)。
来源:https://grouplens.org/datasets/movielens/
3个数据文件:
- 电影:movies.dat,列:movie_id, title, genres
- 用户:users.dat,列:user_id, gender, age, occupation, zip
- 评分:ratings.dat,列:user_id, movie_id, rating, timestamp
-
概要统计
电影数量:总数、按年代、风格统计
用户数量:总数、按性别、年龄、职业统计
评分条数:总数、按电影、性别、职业统计
-
分析目标
(1) 每部电影的得分情况分析
(2) 每种风格电影的得分情况分析
(3) 不同性别用户偏爱的电影分析
(4) 不同年龄段用户偏爱的电影分析
(5) 不同性别用户偏爱的电影,随着年代的变化情况分析
#电影数量:总数、按年代、风格统计
import pandas as pd;
import re
import numpy as np
import matplotlib.pyplot as plt
#plt字体设置
plt.rcParams['font.sans-serif']=['SimHei']
#sep:分隔符,names:列名
movies = pd.read_table("../大作业2020/大作业题目1/movielens/movies.dat",sep="::",names=["movie_id","title","genres"],engine='python');
movies["year"] = movies.agg({"title":getYear},axios=1);
movies.head(1000)
movie_id | title | genres | year | |
---|---|---|---|---|
0 | 1 | Toy Story (1995) | Animation|Children's|Comedy | 1995 |
1 | 2 | Jumanji (1995) | Adventure|Children's|Fantasy | 1995 |
2 | 3 | Grumpier Old Men (1995) | Comedy|Romance | 1995 |
3 | 4 | Waiting to Exhale (1995) | Comedy|Drama | 1995 |
4 | 5 | Father of the Bride Part II (1995) | Comedy | 1995 |
... | ... | ... | ... | ... |
995 | 1008 | Davy Crockett, King of the Wild Frontier (1955) | Western | 1955 |
996 | 1009 | Escape to Witch Mountain (1975) | Adventure|Children's|Fantasy | 1975 |
997 | 1010 | Love Bug, The (1969) | Children's|Comedy | 1969 |
998 | 1011 | Herbie Rides Again (1974) | Adventure|Children's|Comedy | 1974 |
999 | 1012 | Old Yeller (1957) | Children's|Drama | 1957 |
1000 rows × 4 columns
检查是否有缺失值
print("是否有缺失值:");
movies[movies.isnull().values==True]
是否有缺失值:
movie_id | title | genres | year |
---|
查看电影总数:共3883条
print("电影总数:{}
".format(len(movies)));
电影总数:3883
电影年份相关统计:分析:该涵盖了1919年到2000年的电影,总数共3883条,统计不同年代的电影发行数并按升序排列,如图可知,发行最少的年代是1921年,只有1部;最多的年代是1996年,多达345部,且平均每年电影发行数为48部.通过变化的折线图可以看出,自1919年起,电影发行数目一直在逐年缓慢攀升,并从90年代开始有了剧烈的增长,1996到达顶峰(结合上一张图),之后到2000急剧下降.同时根据该箱线图,可以看出超过一半的年份的电影发行数在50以下,只有10个年份的电影发行数超过100.由以上电影类别统计的柱形图可知,Film-Noir(黑色电影)风格的电影最少,在这近100年来仅44部;最多的是Drama(戏剧)和Comedy(喜剧)
print("年份统计:
");
ageCount = movies[["title","year"]].groupby(by="year").count().sort_values(by="year");
drawLine(ageCount.loc[:,"title"],"不同年份的电影发行统计折线图");
drawBox(ageCount.loc[:,"title"],"不同年份的电影发行数统计箱线图");
ageCount
年份统计:
title | |
---|---|
year | |
1919 | 3 |
1920 | 2 |
1921 | 1 |
1922 | 2 |
1923 | 3 |
... | ... |
1996 | 345 |
1997 | 315 |
1998 | 337 |
1999 | 283 |
2000 | 156 |
81 rows × 1 columns
电影类别统计:可以看出电影最多的是Drama类型的电影,有1606;最少的Film-Noir类型的电影,仅44部
genres=[];
for i in range(0,len(movies)):
temp = movies.loc[i,"genres"];
temp = temp.split("|");
for j in range(0,len(temp)):
genres.append([movies.loc[i,"movie_id"],temp[j]]);
genres = pd.DataFrame(genres,columns=["movie_id","genres"]);
genresCount = genres.groupby(by="genres").count().sort_values(by="movie_id");
#关于类别统计的箱线图
drawBox(genresCount.loc[:,["movie_id"]],"电影类别统计箱线图")
drawBar(genresCount,"电影类别统计柱形图");
print("电影类别统计
");
genresCount
电影类别统计
movie_id | |
---|---|
genres | |
Film-Noir | 44 |
Fantasy | 68 |
Western | 68 |
Animation | 105 |
Mystery | 106 |
Musical | 114 |
Documentary | 127 |
War | 143 |
Crime | 211 |
Children's | 251 |
Sci-Fi | 276 |
Adventure | 283 |
Horror | 343 |
Romance | 471 |
Thriller | 492 |
Action | 503 |
Comedy | 1200 |
Drama | 1603 |
导入用户数据,同时检查用户数据
#用户数量:总数、按性别、年龄、职业统计
import pandas as pd;
import re
import numpy as np
#sep:分隔符,names:列名,occupation:职业,zip:邮编
users = pd.read_table("../大作业2020/大作业题目1/movielens/users.dat",sep="::",names=["user_id","gender","age","occupation","zip"],engine='python');
users.head(1000);
检查是否有缺失值
print("是否有缺失值");
users[users.isnull().values==True]
是否有缺失值
user_id | gender | age | occupation | zip |
---|
用户总数
print("用户总数:{}
".format(len(users)));
用户总数:6040
性别统计:用户总数多达6040人,其中有1709人的女性,4331人的男性.由对应的饼图可以看出,男性占比接近3/4.
genderCount = users.loc[:,["user_id","gender"]].groupby(by="gender").count();
print("性别统计");
drawPie(genderCount.loc[:,"user_id"],"用户分布");
genderCount
性别统计
user_id | |
---|---|
gender | |
F | 1709 |
M | 4331 |
ageCount = users.loc[:,["user_id","age"]].groupby(by="age").count();
drawBar(ageCount.loc[:,"user_id"],"观众年龄统计条形图");
print("年龄统计");
ageCount
年龄统计
user_id | |
---|---|
age | |
1 | 222 |
18 | 1103 |
25 | 2096 |
35 | 1193 |
45 | 550 |
50 | 496 |
56 | 380 |
观众职业统计:由于无法得知具体的职业名称,这里无法做过多的评判.
occupationCount = users.loc[:,["user_id","occupation"]].groupby(by="occupation").count();
drawBar(occupationCount.loc[:,"user_id"],"观众职业分布图");
print("职业统计
{}
");
occupationCount
user_id | |
---|---|
occupation | |
0 | 711 |
1 | 528 |
2 | 267 |
3 | 173 |
4 | 759 |
5 | 112 |
6 | 236 |
7 | 679 |
8 | 17 |
9 | 92 |
10 | 195 |
11 | 129 |
12 | 388 |
13 | 142 |
14 | 302 |
15 | 144 |
16 | 241 |
17 | 502 |
18 | 70 |
19 | 72 |
20 | 281 |
评分统计
#评分条数:总数、按电影、性别、职业统计
import pandas as pd;
import re
import numpy as np
#sep:分隔符,names:列名,occupation:职业,zip:邮编
rating = pd.read_table("../大作业2020/大作业题目1/movielens/ratings.dat",sep="::",names=["user_id","movie_id","rating","timestamp"],engine='python');
movies = pd.read_table("../大作业2020/大作业题目1/movielens/movies.dat",sep="::",names=["movie_id","title","genres"],engine='python');
data = pd.merge(movies,rating,how="inner",on="movie_id");
users = pd.read_table("../大作业2020/大作业题目1/movielens/users.dat",sep="::",names=["user_id","gender","age","occupation","zip"],engine='python');
data = pd.merge(data,users,how="inner",on="user_id");
rating.head(1000)
user_id | movie_id | rating | timestamp | |
---|---|---|---|---|
0 | 1 | 1193 | 5 | 978300760 |
1 | 1 | 661 | 3 | 978302109 |
2 | 1 | 914 | 3 | 978301968 |
3 | 1 | 3408 | 4 | 978300275 |
4 | 1 | 2355 | 5 | 978824291 |
... | ... | ... | ... | ... |
995 | 10 | 3704 | 2 | 978228364 |
996 | 10 | 1020 | 3 | 978228726 |
997 | 10 | 784 | 3 | 978230946 |
998 | 10 | 858 | 3 | 978224375 |
999 | 10 | 1022 | 5 | 979775689 |
1000 rows × 4 columns
评分总条数统计
print("用户总数:{}
".format(len(rating)));
用户总数:1000209
按电影统计评分:由以下相关的统计数据可知,总评条数为1000209条,评论最少的电影有多部,都为1条;评论最多的电影为American Beaty,有3428条.平均每部电影有900条,然而超过50%的电影甚至连500条都远远没有达到.
titleCount = data.loc[:,["movie_id","title"]].groupby(by="title").count().sort_values(by="movie_id");
print("按电影统计评分");
drawBox(titleCount.loc[:,"movie_id"],"电影评分条数统计");
titleCount.head(1000)
按电影统计评分
movie_id | |
---|---|
title | |
Another Man's Poison (1952) | 1 |
Night Tide (1961) | 1 |
Shadows (Cienie) (1988) | 1 |
McCullochs, The (1975) | 1 |
Anna (1996) | 1 |
... | ... |
Cats Don't Dance (1997) | 37 |
How I Won the War (1967) | 37 |
Idiots, The (Idioterne) (1998) | 37 |
Isn't She Great? (2000) | 37 |
Boys of St. Vincent, The (1993) | 37 |
1000 rows × 1 columns
职业统计:可以看出0号和4号职业的电影评论较多,8号职业的评论格外少
occupationCount = data.loc[:,["movie_id","occupation"]].groupby(by="occupation").count();
drawBar(occupationCount.loc[:,"movie_id"],"电影评分与职业统计");
print("按职业统计评分");
occupationCount
按职业统计评分
movie_id | |
---|---|
occupation | |
0 | 130499 |
1 | 85351 |
2 | 50068 |
3 | 31623 |
4 | 131032 |
5 | 21850 |
6 | 37205 |
7 | 105425 |
8 | 2706 |
9 | 11345 |
10 | 23290 |
11 | 20563 |
12 | 57214 |
13 | 13754 |
14 | 49109 |
15 | 22951 |
16 | 46021 |
17 | 72816 |
18 | 12086 |
19 | 14904 |
20 | 60397 |
电影评分与性别统计:这里,尽管男性用户整体占比没有超过3/4,但男性评论的条数占比超过了3/4,说明了看电影的男性中有不少也是很感性的.
genderCount = data.loc[:,["movie_id","gender"]].groupby(by="gender").count();
drawPie(genderCount.loc[:,"movie_id"],"电影评分与性别统计");
print("按性别统计评分");
genderCount
按性别统计评分
movie_id | |
---|---|
gender | |
F | 246440 |
M | 753769 |
#电影得分分析
titleCount = data.loc[:,["title","rating"]].groupby(by="title").mean().sort_values(by="rating");
drawBox(titleCount,"各电影评价得分箱线图");
print("各电影评价得分");
titleCount.head(1000)
各电影评价得分
rating | |
---|---|
title | |
Elstree Calling (1930) | 1.000000 |
Get Over It (1996) | 1.000000 |
Venice/Venice (1992) | 1.000000 |
Windows (1980) | 1.000000 |
Kestrel's Eye (Falkens 鰃a) (1998) | 1.000000 |
... | ... |
Net, The (1995) | 2.869947 |
End of Violence, The (1997) | 2.870370 |
Renaissance Man (1994) | 2.870968 |
Funeral, The (1996) | 2.870968 |
Robert A. Heinlein's The Puppet Masters (1994) | 2.871508 |
1000 rows × 1 columns
- 每种风格电影的得分情况分析:由上图可知,即便不同电影的评价得分有较大的变化区间,不同类别电影之间的评分差距却显得较为平和,可以认为无论是哪一类电影,都是既有糟糕的作品,也有优秀的作品.其中,平均得分最高的是Film-Noir电影,高达4.7,最低的是Horror类型的电影
#分隔信息表:movie_id与genres不是一一对应的关系
import pandas as pd
del data["genres"]; #如果没有删除则需要删除
genres = pd.merge(genres,data,how="inner",on="movie_id");
ratingCount = genres.loc[:,["genres","rating"]].groupby(by="genres").mean().sort_values(by="genres");
drawBar(ratingCount.loc[:,"rating"],"不同电影风格得分统计");
print("不同风格电影得分情况比较");
ratingCount
不同风格电影得分情况比较
rating | |
---|---|
genres | |
Action | 3.491185 |
Adventure | 3.477257 |
Animation | 3.684868 |
Children's | 3.422035 |
Comedy | 3.522099 |
Crime | 3.708679 |
Documentary | 3.933123 |
Drama | 3.766332 |
Fantasy | 3.447371 |
Film-Noir | 4.075188 |
Horror | 3.215013 |
Musical | 3.665519 |
Mystery | 3.668102 |
Romance | 3.607465 |
Sci-Fi | 3.466521 |
Thriller | 3.570466 |
War | 3.893327 |
Western | 3.637770 |
- 不同性别用户偏爱的电影分析:这里分析了不同类型电影的男女观影人数比较,可以看出有人男性占据绝对的人数优势,在所有电影的观看人次上都超过了女性,其中差距自大的Action类型电影,最小的是Documentary类型电影,当然这在一定程度上也和该电影的总体观影人数较少有关.
#使用交叉表展示性别与电影评分的关系
crosstab = pd.crosstab(genres["genres"],genres["gender"],values=genres["rating"],aggfunc=count);
crosstab.loc[:,"dis"] = crosstab.loc[:,"F"] - crosstab.loc[:,"M"]
#绘制双柱形图
crosstab.plot(kind="bar",title="不同性别用户偏爱的电影类别分析",figsize=(10,10))
plt.show();
print("不同性别用户偏爱的电影类别分析");
crosstab
c:usersgclappdatalocalprogramspythonpython37-32libsite-packagesmatplotlibackendsackend_agg.py:238: RuntimeWarning: Glyph 8722 missing from current font.
font.set_text(s, 0.0, flags=flags)
c:usersgclappdatalocalprogramspythonpython37-32libsite-packagesmatplotlibackendsackend_agg.py:201: RuntimeWarning: Glyph 8722 missing from current font.
font.set_text(s, 0, flags=flags)
不同性别用户偏爱的电影类别分析
gender | F | M | dis |
---|---|---|---|
genres | |||
Action | 45650 | 211807 | -166157 |
Adventure | 27332 | 106621 | -79289 |
Animation | 12221 | 31072 | -18851 |
Children's | 21317 | 50869 | -29552 |
Comedy | 96271 | 260309 | -164038 |
Crime | 16442 | 63099 | -46657 |
Documentary | 1940 | 5970 | -4030 |
Drama | 98153 | 256376 | -158223 |
Fantasy | 8718 | 27583 | -18865 |
Film-Noir | 4202 | 14059 | -9857 |
Horror | 14635 | 61751 | -47116 |
Musical | 13505 | 28028 | -14523 |
Mystery | 9976 | 30202 | -20226 |
Romance | 50297 | 97226 | -46929 |
Sci-Fi | 27400 | 129894 | -102494 |
Thriller | 40308 | 149372 | -109064 |
War | 14093 | 54434 | -40341 |
Western | 3477 | 17206 | -13729 |
- 不同年龄段用户偏爱的电影分析:各个年龄段的用户都喜欢看Action,Comedy,Drama;年级为1的用户最喜欢看Comedy;25岁,35岁的青壮年用户对于Romance,Sci-Fi(科幻小说),THriller也有不错的偏好.到45岁后用户的电影观看数量多比较少
crosstab = pd.crosstab(genres["genres"],genres["age"],values=genres["rating"],aggfunc=count);
drawBar(crosstab,"不同年龄段用户偏爱的电影类别分析");
print("不同年龄段用户偏爱的电影分析");
crosstab
age | 1 | 18 | 25 | 35 | 45 | 50 | 56 |
---|---|---|---|---|---|---|---|
genres | |||||||
Action | 6578 | 50186 | 105678 | 50503 | 19357 | 17012 | 8143 |
Adventure | 3998 | 26324 | 52633 | 26682 | 10738 | 9090 | 4488 |
Animation | 2449 | 10269 | 16454 | 8117 | 2889 | 2032 | 1083 |
Children's | 4337 | 16924 | 25743 | 14004 | 5400 | 3890 | 1888 |
Comedy | 11162 | 69980 | 143210 | 69244 | 27890 | 23133 | 11961 |
Crime | 1701 | 15373 | 33030 | 14895 | 6048 | 5520 | 2974 |
Documentary | 130 | 1081 | 3489 | 1708 | 687 | 555 | 260 |
Drama | 7483 | 58104 | 138695 | 71590 | 32141 | 29247 | 17269 |
Fantasy | 1360 | 7875 | 14290 | 7006 | 2695 | 2127 | 948 |
Film-Noir | 330 | 2280 | 6539 | 4175 | 1860 | 1870 | 1207 |
Horror | 2211 | 15184 | 31235 | 15122 | 6192 | 4681 | 1761 |
Musical | 1647 | 7555 | 14705 | 8746 | 3898 | 3093 | 1889 |
Mystery | 920 | 6401 | 15160 | 8179 | 3851 | 3520 | 2147 |
Romance | 3599 | 25656 | 58003 | 29330 | 13283 | 11373 | 6279 |
Sci-Fi | 4178 | 29033 | 63156 | 32333 | 13040 | 10674 | 4880 |
Thriller | 4824 | 35877 | 77429 | 36840 | 14933 | 13240 | 6537 |
War | 1578 | 10874 | 24830 | 14514 | 6642 | 6314 | 3775 |
Western | 335 | 2863 | 7053 | 4546 | 2133 | 2420 | 1333 |
- 不同性别用户偏爱的电影,随着年代的变化情况分析:
#通用函数
import pandas as pd;
import re
import numpy as np
import matplotlib.pyplot as plt
#plt字体设置
plt.rcParams['font.sans-serif']=['SimHei'];
plt.rcParams['font.size'] = '16';
def drawBox(data,title):
data.plot.box(title=title,figsize=(10,10));
plt.grid(linestyle="--", alpha=0.3);
plt.show();
def drawBar(data,title):
data.plot.bar(stacked=True,title=title,figsize=(10,10));
plt.show();
def drawPie(data,title):
data.plot.pie(title=title,figsize=(10,10));
plt.show();
def drawScatter(data,x,y):
data.plot.scatter(x=x, y=y,figsize=(10,10))
plt.show();
def drawLine(data,title):
data.plot.line(title=title,figsize=(20,20));
plt.show();
def count(data):
return len(data);
#获得年份
def getYear(data):
res = re.findall(r'[(](.*?)[)]',data);
return res[len(res)-1];
#sep:分隔符,names:列名,occupation:职业,zip:邮编
rating = pd.read_table("../大作业2020/大作业题目1/movielens/ratings.dat",sep="::",names=["user_id","movie_id","rating","timestamp"],engine='python');
movies = pd.read_table("../大作业2020/大作业题目1/movielens/movies.dat",sep="::",names=["movie_id","title","genres"],engine='python');
movies["year"] = movies.agg({"title":getYear},axios=1);
data = pd.merge(movies,rating,how="inner",on="movie_id");
users = pd.read_table("../大作业2020/大作业题目1/movielens/users.dat",sep="::",names=["user_id","gender","age","occupation","zip"],engine='python');
data = pd.merge(data,users,how="inner",on="user_id");
rating
genres=[];
for i in range(0,len(movies)):
temp = movies.loc[i,"genres"];
temp = temp.split("|");
for j in range(0,len(temp)):
genres.append([movies.loc[i,"movie_id"],temp[j]]);
genres = pd.DataFrame(genres,columns=["movie_id","genres"]);
genresCount = genres.groupby(by="genres").count().sort_values(by="movie_id");
del data["genres"]
data = pd.merge(data,genres,how="inner",on="movie_id");
data.head(1000)
movie_id | title | year | user_id | rating | timestamp | gender | age | occupation | zip | genres | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | Toy Story (1995) | 1995 | 1 | 5 | 978824268 | F | 1 | 10 | 48067 | Animation |
1 | 1 | Toy Story (1995) | 1995 | 1 | 5 | 978824268 | F | 1 | 10 | 48067 | Children's |
2 | 1 | Toy Story (1995) | 1995 | 1 | 5 | 978824268 | F | 1 | 10 | 48067 | Comedy |
3 | 1 | Toy Story (1995) | 1995 | 6 | 4 | 978237008 | F | 50 | 9 | 55117 | Animation |
4 | 1 | Toy Story (1995) | 1995 | 6 | 4 | 978237008 | F | 50 | 9 | 55117 | Children's |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
995 | 1 | Toy Story (1995) | 1995 | 973 | 4 | 975860648 | F | 25 | 1 | 80026 | Comedy |
996 | 1 | Toy Story (1995) | 1995 | 977 | 3 | 975106685 | M | 25 | 2 | 80110 | Animation |
997 | 1 | Toy Story (1995) | 1995 | 977 | 3 | 975106685 | M | 25 | 2 | 80110 | Children's |
998 | 1 | Toy Story (1995) | 1995 | 977 | 3 | 975106685 | M | 25 | 2 | 80110 | Comedy |
999 | 1 | Toy Story (1995) | 1995 | 979 | 4 | 988055769 | M | 1 | 10 | 48073 | Animation |
1000 rows × 11 columns
import pandas as pd;
import re
import numpy as np
import matplotlib.pyplot as plt
pivot_table = pd.pivot_table(data,index="genres",columns=["gender","year"],aggfunc=count);
#pivot_table[pivot_table.isnull().values==False]=0
pivot_table.fillna(value=0,inplace=True);
#使用transpose实现行列转换
drawLine(pivot_table[("age","M")].transpose(),"1919~2020年男性电影观看变化");
drawLine(pivot_table[("age","F")].transpose(),"1919~2020年女性电影观看变化");
pivot_table.head(1000)
age | ... | zip | |||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
gender | F | ... | M | ||||||||||||||||||
year | 1919 | 1920 | 1921 | 1922 | 1923 | 1925 | 1926 | 1927 | 1928 | 1929 | ... | 1991 | 1992 | 1993 | 1994 | 1995 | 1996 | 1997 | 1998 | 1999 | 2000 |
genres | |||||||||||||||||||||
Action | 2.0 | 0.0 | 11.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 5557.0 | 8286.0 | 10967.0 | 9739.0 | 13057.0 | 12812.0 | 16087.0 | 16399.0 | 12329.0 | 9797.0 |
Adventure | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 7.0 | 0.0 | 0.0 | 0.0 | ... | 2094.0 | 1346.0 | 4810.0 | 4366.0 | 6120.0 | 7453.0 | 7446.0 | 4133.0 | 4986.0 | 1848.0 |
Animation | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 824.0 | 1280.0 | 984.0 | 964.0 | 2694.0 | 1965.0 | 851.0 | 3004.0 | 3354.0 | 2159.0 |
Children's | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 1029.0 | 1964.0 | 2208.0 | 2746.0 | 4539.0 | 2637.0 | 1924.0 | 3562.0 | 3231.0 | 1826.0 |
Comedy | 24.0 | 4.0 | 0.0 | 0.0 | 1.0 | 76.0 | 1.0 | 33.0 | 3.0 | 0.0 | ... | 4970.0 | 11283.0 | 12658.0 | 16739.0 | 14666.0 | 14462.0 | 15431.0 | 17832.0 | 26541.0 | 11129.0 |
Crime | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | ... | 248.0 | 5468.0 | 2075.0 | 3737.0 | 6011.0 | 4659.0 | 6912.0 | 6971.0 | 2364.0 | 1509.0 |
Documentary | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 148.0 | 33.0 | 260.0 | 1048.0 | 297.0 | 693.0 | 437.0 | 415.0 | 664.0 | 334.0 |
Drama | 2.0 | 0.0 | 0.0 | 0.0 | 1.0 | 51.0 | 16.0 | 13.0 | 0.0 | 0.0 | ... | 7834.0 | 9497.0 | 11740.0 | 13705.0 | 18379.0 | 15226.0 | 18115.0 | 16693.0 | 21320.0 | 9881.0 |
Fantasy | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 983.0 | 352.0 | 348.0 | 1816.0 | 888.0 | 1787.0 | 494.0 | 343.0 | 2039.0 | 110.0 |
Film-Noir | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 23.0 | 0.0 | 272.0 | 242.0 | 1787.0 | 932.0 | 0.0 | 0.0 |
Horror | 0.0 | 0.0 | 0.0 | 51.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 309.0 | 2389.0 | 1495.0 | 1868.0 | 1809.0 | 2609.0 | 2646.0 | 2790.0 | 5189.0 | 1846.0 |
Musical | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 15.0 | ... | 1201.0 | 1158.0 | 934.0 | 844.0 | 666.0 | 1277.0 | 742.0 | 660.0 | 324.0 | 147.0 |
Mystery | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 1120.0 | 713.0 | 980.0 | 475.0 | 975.0 | 1831.0 | 6457.0 | 3965.0 | 1200.0 | 445.0 |
Romance | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 12.0 | 0.0 | 0.0 | ... | 2454.0 | 4957.0 | 5728.0 | 9492.0 | 7489.0 | 7146.0 | 6180.0 | 8539.0 | 5597.0 | 1382.0 |
Sci-Fi | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 65.0 | 0.0 | 0.0 | 0.0 | ... | 4136.0 | 3479.0 | 4782.0 | 2849.0 | 5958.0 | 6270.0 | 10448.0 | 7026.0 | 8233.0 | 4608.0 |
Thriller | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 8.0 | 0.0 | 0.0 | 2.0 | ... | 5227.0 | 5824.0 | 5594.0 | 6460.0 | 10813.0 | 12358.0 | 16133.0 | 14574.0 | 16534.0 | 7839.0 |
War | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 50.0 | 0.0 | 12.0 | 0.0 | 0.0 | ... | 66.0 | 1696.0 | 2404.0 | 2531.0 | 3463.0 | 3478.0 | 2523.0 | 3348.0 | 1161.0 | 1021.0 |
Western | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 838.0 | 547.0 | 1808.0 | 538.0 | 238.0 | 0.0 | 20.0 | 735.0 | 0.0 |
18 rows × 1296 columns
temp = pivot_table[("age","M")].transpose();
temp.head(50)
genres | Action | Adventure | Animation | Children's | Comedy | Crime | Documentary | Drama | Fantasy | Film-Noir | Horror | Musical | Mystery | Romance | Sci-Fi | Thriller | War | Western |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
year | ||||||||||||||||||
1919 | 2.0 | 3.0 | 0.0 | 0.0 | 14.0 | 0.0 | 0.0 | 5.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
1920 | 0.0 | 0.0 | 0.0 | 0.0 | 20.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
1921 | 51.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
1922 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 187.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
1923 | 0.0 | 0.0 | 0.0 | 0.0 | 8.0 | 0.0 | 0.0 | 7.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
1925 | 0.0 | 0.0 | 0.0 | 0.0 | 246.0 | 0.0 | 0.0 | 188.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 184.0 | 0.0 |
1926 | 0.0 | 7.0 | 0.0 | 0.0 | 9.0 | 1.0 | 0.0 | 23.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 323.0 | 13.0 | 0.0 | 0.0 |
1927 | 0.0 | 0.0 | 0.0 | 0.0 | 173.0 | 0.0 | 0.0 | 33.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 33.0 | 0.0 | 0.0 | 33.0 | 0.0 |
1928 | 0.0 | 0.0 | 0.0 | 0.0 | 24.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
1929 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 17.0 | 0.0 | 0.0 | 0.0 | 9.0 | 0.0 | 0.0 |
1930 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 1.0 | 72.0 | 0.0 | 0.0 | 0.0 | 1.0 | 8.0 | 0.0 | 0.0 | 7.0 | 221.0 | 0.0 |
1931 | 0.0 | 0.0 | 0.0 | 0.0 | 216.0 | 248.0 | 0.0 | 198.0 | 0.0 | 248.0 | 527.0 | 0.0 | 0.0 | 198.0 | 0.0 | 248.0 | 0.0 | 22.0 |
1932 | 0.0 | 0.0 | 0.0 | 0.0 | 37.0 | 0.0 | 0.0 | 76.0 | 0.0 | 0.0 | 127.0 | 0.0 | 0.0 | 203.0 | 0.0 | 1.0 | 39.0 | 0.0 |
1933 | 649.0 | 649.0 | 0.0 | 0.0 | 474.0 | 0.0 | 0.0 | 3.0 | 0.0 | 0.0 | 822.0 | 1.0 | 0.0 | 0.0 | 188.0 | 0.0 | 473.0 | 0.0 |
1934 | 0.0 | 0.0 | 0.0 | 0.0 | 370.0 | 0.0 | 0.0 | 27.0 | 0.0 | 0.0 | 0.0 | 54.0 | 196.0 | 54.0 | 0.0 | 96.0 | 0.0 | 0.0 |
1935 | 0.0 | 176.0 | 0.0 | 0.0 | 133.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 172.0 | 133.0 | 0.0 | 133.0 | 0.0 | 195.0 | 0.0 | 0.0 |
1936 | 0.0 | 0.0 | 0.0 | 0.0 | 364.0 | 0.0 | 0.0 | 15.0 | 0.0 | 0.0 | 0.0 | 24.0 | 2.0 | 0.0 | 0.0 | 61.0 | 0.0 | 0.0 |
1937 | 0.0 | 0.0 | 512.0 | 512.0 | 109.0 | 8.0 | 0.0 | 315.0 | 0.0 | 0.0 | 0.0 | 621.0 | 0.0 | 107.0 | 0.0 | 8.0 | 124.0 | 0.0 |
1938 | 294.0 | 294.0 | 0.0 | 0.0 | 431.0 | 0.0 | 0.0 | 137.0 | 0.0 | 0.0 | 0.0 | 0.0 | 129.0 | 138.0 | 0.0 | 129.0 | 0.0 | 0.0 |
1939 | 0.0 | 1294.0 | 83.0 | 1320.0 | 131.0 | 9.0 | 0.0 | 2278.0 | 0.0 | 0.0 | 53.0 | 1211.0 | 0.0 | 801.0 | 0.0 | 0.0 | 715.0 | 0.0 |
1940 | 0.0 | 81.0 | 1150.0 | 1150.0 | 1137.0 | 0.0 | 0.0 | 432.0 | 0.0 | 0.0 | 37.0 | 802.0 | 0.0 | 678.0 | 9.0 | 283.0 | 0.0 | 34.0 |
1941 | 0.0 | 0.0 | 368.0 | 368.0 | 119.0 | 0.0 | 0.0 | 982.0 | 0.0 | 808.0 | 115.0 | 375.0 | 913.0 | 111.0 | 0.0 | 105.0 | 0.0 | 0.0 |
1942 | 93.0 | 0.0 | 395.0 | 395.0 | 462.0 | 0.0 | 0.0 | 1426.0 | 0.0 | 0.0 | 60.0 | 162.0 | 0.0 | 1164.0 | 0.0 | 71.0 | 1426.0 | 8.0 |
1943 | 0.0 | 83.0 | 12.0 | 12.0 | 21.0 | 0.0 | 0.0 | 81.0 | 0.0 | 162.0 | 105.0 | 0.0 | 9.0 | 0.0 | 0.0 | 237.0 | 85.0 | 20.0 |
1944 | 89.0 | 0.0 | 0.0 | 0.0 | 485.0 | 579.0 | 0.0 | 259.0 | 0.0 | 656.0 | 78.0 | 101.0 | 726.0 | 0.0 | 0.0 | 788.0 | 259.0 | 0.0 |
1945 | 0.0 | 0.0 | 94.0 | 94.0 | 56.0 | 0.0 | 0.0 | 234.0 | 0.0 | 0.0 | 82.0 | 150.0 | 132.0 | 196.0 | 0.0 | 132.0 | 26.0 | 0.0 |
1946 | 0.0 | 153.0 | 172.0 | 172.0 | 0.0 | 16.0 | 0.0 | 787.0 | 0.0 | 769.0 | 0.0 | 172.0 | 416.0 | 357.0 | 0.0 | 317.0 | 174.0 | 60.0 |
1947 | 0.0 | 9.0 | 27.0 | 27.0 | 58.0 | 25.0 | 0.0 | 426.0 | 0.0 | 118.0 | 0.0 | 27.0 | 0.0 | 98.0 | 0.0 | 0.0 | 0.0 | 43.0 |
1948 | 0.0 | 364.0 | 19.0 | 19.0 | 171.0 | 361.0 | 0.0 | 621.0 | 0.0 | 398.0 | 165.0 | 19.0 | 0.0 | 0.0 | 0.0 | 508.0 | 0.0 | 0.0 |
1949 | 0.0 | 111.0 | 0.0 | 114.0 | 0.0 | 1.0 | 0.0 | 217.0 | 0.0 | 0.0 | 0.0 | 136.0 | 377.0 | 1.0 | 0.0 | 377.0 | 0.0 | 0.0 |
1950 | 0.0 | 0.0 | 353.0 | 362.0 | 546.0 | 123.0 | 0.0 | 255.0 | 0.0 | 476.0 | 0.0 | 353.0 | 19.0 | 0.0 | 105.0 | 19.0 | 0.0 | 0.0 |
1951 | 853.0 | 853.0 | 338.0 | 338.0 | 0.0 | 0.0 | 0.0 | 915.0 | 0.0 | 347.0 | 0.0 | 534.0 | 0.0 | 1046.0 | 588.0 | 347.0 | 853.0 | 0.0 |
1952 | 4.0 | 65.0 | 0.0 | 0.0 | 219.0 | 1.0 | 0.0 | 94.0 | 0.0 | 0.0 | 0.0 | 454.0 | 0.0 | 613.0 | 60.0 | 0.0 | 0.0 | 340.0 |
1953 | 668.0 | 0.0 | 396.0 | 396.0 | 245.0 | 0.0 | 0.0 | 1074.0 | 396.0 | 15.0 | 0.0 | 434.0 | 0.0 | 462.0 | 585.0 | 50.0 | 1086.0 | 249.0 |
1954 | 784.0 | 479.0 | 0.0 | 478.0 | 201.0 | 404.0 | 0.0 | 956.0 | 478.0 | 0.0 | 182.0 | 169.0 | 1010.0 | 172.0 | 986.0 | 1250.0 | 240.0 | 0.0 |
1955 | 0.0 | 0.0 | 545.0 | 545.0 | 1154.0 | 0.0 | 0.0 | 1063.0 | 0.0 | 33.0 | 56.0 | 689.0 | 108.0 | 977.0 | 103.0 | 384.0 | 333.0 | 114.0 |
1956 | 0.0 | 235.0 | 0.0 | 0.0 | 217.0 | 112.0 | 0.0 | 269.0 | 0.0 | 183.0 | 556.0 | 217.0 | 0.0 | 0.0 | 1020.0 | 280.0 | 18.0 | 210.0 |
1957 | 0.0 | 0.0 | 0.0 | 210.0 | 257.0 | 0.0 | 1.0 | 2062.0 | 0.0 | 0.0 | 56.0 | 128.0 | 0.0 | 139.0 | 67.0 | 32.0 | 999.0 | 0.0 |
1958 | 319.0 | 318.0 | 0.0 | 0.0 | 289.0 | 405.0 | 0.0 | 554.0 | 214.0 | 405.0 | 1036.0 | 292.0 | 671.0 | 369.0 | 879.0 | 1076.0 | 369.0 | 53.0 |
1959 | 576.0 | 683.0 | 319.0 | 544.0 | 697.0 | 577.0 | 0.0 | 2047.0 | 107.0 | 0.0 | 286.0 | 319.0 | 150.0 | 0.0 | 133.0 | 983.0 | 100.0 | 0.0 |
1960 | 0.0 | 203.0 | 0.0 | 281.0 | 617.0 | 19.0 | 0.0 | 988.0 | 0.0 | 0.0 | 1472.0 | 30.0 | 0.0 | 0.0 | 177.0 | 1228.0 | 0.0 | 0.0 |
1961 | 453.0 | 146.0 | 378.0 | 909.0 | 544.0 | 0.0 | 0.0 | 1806.0 | 381.0 | 0.0 | 81.0 | 644.0 | 0.0 | 835.0 | 146.0 | 16.0 | 484.0 | 189.0 |
1962 | 1042.0 | 746.0 | 0.0 | 12.0 | 0.0 | 0.0 | 0.0 | 1306.0 | 0.0 | 859.0 | 157.0 | 53.0 | 0.0 | 0.0 | 158.0 | 1082.0 | 991.0 | 43.0 |
1963 | 670.0 | 917.0 | 207.0 | 320.0 | 721.0 | 0.0 | 0.0 | 721.0 | 0.0 | 0.0 | 889.0 | 0.0 | 185.0 | 185.0 | 1245.0 | 683.0 | 1740.0 | 211.0 |
1964 | 1236.0 | 0.0 | 0.0 | 657.0 | 1081.0 | 0.0 | 0.0 | 157.0 | 0.0 | 0.0 | 59.0 | 1466.0 | 0.0 | 336.0 | 0.0 | 221.0 | 0.0 | 477.0 |
1965 | 407.0 | 0.0 | 0.0 | 72.0 | 689.0 | 0.0 | 0.0 | 595.0 | 0.0 | 0.0 | 12.0 | 918.0 | 72.0 | 408.0 | 57.0 | 186.0 | 385.0 | 518.0 |
1966 | 723.0 | 292.0 | 0.0 | 0.0 | 30.0 | 0.0 | 120.0 | 601.0 | 0.0 | 0.0 | 0.0 | 0.0 | 190.0 | 83.0 | 292.0 | 42.0 | 0.0 | 749.0 |
1967 | 636.0 | 180.0 | 445.0 | 474.0 | 1703.0 | 515.0 | 0.0 | 2914.0 | 0.0 | 0.0 | 0.0 | 630.0 | 258.0 | 886.0 | 59.0 | 0.0 | 668.0 | 303.0 |
1968 | 973.0 | 317.0 | 428.0 | 187.0 | 840.0 | 289.0 | 0.0 | 1907.0 | 0.0 | 0.0 | 1112.0 | 912.0 | 1372.0 | 202.0 | 3254.0 | 2100.0 | 75.0 | 0.0 |
1969 | 1735.0 | 917.0 | 0.0 | 172.0 | 1512.0 | 11.0 | 0.0 | 1001.0 | 0.0 | 0.0 | 3.0 | 0.0 | 0.0 | 0.0 | 0.0 | 42.0 | 304.0 | 1858.0 |
temp = pivot_table[("age","F")].transpose();
temp.head(50)
genres | Action | Adventure | Animation | Children's | Comedy | Crime | Documentary | Drama | Fantasy | Film-Noir | Horror | Musical | Mystery | Romance | Sci-Fi | Thriller | War | Western |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
year | ||||||||||||||||||
1919 | 2.0 | 0.0 | 0.0 | 0.0 | 24.0 | 0.0 | 0.0 | 2.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
1920 | 0.0 | 0.0 | 0.0 | 0.0 | 4.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
1921 | 11.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
1922 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 51.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
1923 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
1925 | 0.0 | 0.0 | 0.0 | 0.0 | 76.0 | 0.0 | 0.0 | 51.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 50.0 | 0.0 |
1926 | 0.0 | 7.0 | 0.0 | 0.0 | 1.0 | 1.0 | 0.0 | 16.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 65.0 | 8.0 | 0.0 | 0.0 |
1927 | 0.0 | 0.0 | 0.0 | 0.0 | 33.0 | 0.0 | 0.0 | 13.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 12.0 | 0.0 | 0.0 | 12.0 | 0.0 |
1928 | 0.0 | 0.0 | 0.0 | 0.0 | 3.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
1929 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 15.0 | 0.0 | 0.0 | 0.0 | 2.0 | 0.0 | 0.0 |
1930 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 30.0 | 0.0 | 0.0 | 0.0 | 0.0 | 3.0 | 0.0 | 0.0 | 3.0 | 47.0 | 0.0 |
1931 | 0.0 | 0.0 | 0.0 | 0.0 | 76.0 | 60.0 | 0.0 | 73.0 | 0.0 | 60.0 | 117.0 | 0.0 | 0.0 | 73.0 | 0.0 | 60.0 | 0.0 | 7.0 |
1932 | 0.0 | 0.0 | 0.0 | 0.0 | 15.0 | 0.0 | 0.0 | 25.0 | 0.0 | 0.0 | 35.0 | 0.0 | 0.0 | 67.0 | 0.0 | 0.0 | 17.0 | 0.0 |
1933 | 124.0 | 124.0 | 0.0 | 0.0 | 121.0 | 0.0 | 0.0 | 2.0 | 0.0 | 0.0 | 166.0 | 0.0 | 0.0 | 0.0 | 44.0 | 0.0 | 121.0 | 0.0 |
1934 | 0.0 | 0.0 | 0.0 | 0.0 | 196.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 51.0 | 96.0 | 51.0 | 0.0 | 32.0 | 0.0 | 0.0 |
1935 | 0.0 | 49.0 | 0.0 | 0.0 | 118.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 44.0 | 118.0 | 0.0 | 118.0 | 0.0 | 58.0 | 0.0 | 0.0 |
1936 | 0.0 | 0.0 | 0.0 | 0.0 | 129.0 | 0.0 | 0.0 | 9.0 | 0.0 | 0.0 | 0.0 | 12.0 | 0.0 | 0.0 | 0.0 | 12.0 | 0.0 | 0.0 |
1937 | 0.0 | 0.0 | 251.0 | 251.0 | 91.0 | 2.0 | 0.0 | 131.0 | 0.0 | 0.0 | 0.0 | 342.0 | 0.0 | 91.0 | 0.0 | 2.0 | 41.0 | 0.0 |
1938 | 84.0 | 84.0 | 0.0 | 0.0 | 258.0 | 0.0 | 0.0 | 73.0 | 0.0 | 0.0 | 0.0 | 0.0 | 70.0 | 77.0 | 0.0 | 70.0 | 0.0 | 0.0 |
1939 | 0.0 | 541.0 | 34.0 | 592.0 | 85.0 | 2.0 | 0.0 | 1108.0 | 0.0 | 0.0 | 12.0 | 507.0 | 0.0 | 487.0 | 0.0 | 0.0 | 441.0 | 0.0 |
1940 | 0.0 | 24.0 | 488.0 | 488.0 | 608.0 | 0.0 | 0.0 | 167.0 | 0.0 | 0.0 | 8.0 | 338.0 | 0.0 | 525.0 | 3.0 | 196.0 | 0.0 | 12.0 |
1941 | 0.0 | 0.0 | 200.0 | 200.0 | 66.0 | 0.0 | 0.0 | 363.0 | 0.0 | 235.0 | 19.0 | 200.0 | 297.0 | 74.0 | 0.0 | 62.0 | 0.0 | 0.0 |
1942 | 8.0 | 0.0 | 194.0 | 194.0 | 239.0 | 0.0 | 0.0 | 587.0 | 0.0 | 0.0 | 9.0 | 113.0 | 0.0 | 505.0 | 0.0 | 18.0 | 587.0 | 5.0 |
1943 | 0.0 | 15.0 | 3.0 | 3.0 | 7.0 | 0.0 | 0.0 | 37.0 | 0.0 | 71.0 | 19.0 | 0.0 | 4.0 | 0.0 | 0.0 | 108.0 | 15.0 | 3.0 |
1944 | 12.0 | 0.0 | 0.0 | 0.0 | 250.0 | 264.0 | 0.0 | 55.0 | 0.0 | 299.0 | 20.0 | 88.0 | 425.0 | 0.0 | 0.0 | 394.0 | 55.0 | 0.0 |
1945 | 0.0 | 0.0 | 32.0 | 32.0 | 47.0 | 0.0 | 0.0 | 138.0 | 0.0 | 0.0 | 21.0 | 79.0 | 74.0 | 110.0 | 0.0 | 74.0 | 3.0 | 0.0 |
1946 | 0.0 | 68.0 | 69.0 | 69.0 | 0.0 | 4.0 | 0.0 | 297.0 | 0.0 | 324.0 | 0.0 | 69.0 | 125.0 | 187.0 | 0.0 | 171.0 | 62.0 | 12.0 |
1947 | 0.0 | 3.0 | 8.0 | 8.0 | 7.0 | 11.0 | 0.0 | 236.0 | 0.0 | 41.0 | 0.0 | 8.0 | 0.0 | 85.0 | 0.0 | 0.0 | 0.0 | 8.0 |
1948 | 0.0 | 89.0 | 5.0 | 5.0 | 41.0 | 129.0 | 0.0 | 212.0 | 0.0 | 137.0 | 41.0 | 5.0 | 0.0 | 0.0 | 0.0 | 173.0 | 0.0 | 0.0 |
1949 | 0.0 | 33.0 | 0.0 | 38.0 | 0.0 | 1.0 | 0.0 | 64.0 | 0.0 | 0.0 | 0.0 | 87.0 | 103.0 | 0.0 | 0.0 | 103.0 | 0.0 | 0.0 |
1950 | 0.0 | 0.0 | 224.0 | 224.0 | 291.0 | 28.0 | 0.0 | 157.0 | 0.0 | 145.0 | 0.0 | 224.0 | 7.0 | 0.0 | 12.0 | 7.0 | 0.0 | 0.0 |
1951 | 309.0 | 309.0 | 187.0 | 187.0 | 0.0 | 0.0 | 0.0 | 306.0 | 0.0 | 135.0 | 0.0 | 335.0 | 0.0 | 488.0 | 106.0 | 135.0 | 309.0 | 0.0 |
1952 | 0.0 | 15.0 | 0.0 | 0.0 | 80.0 | 0.0 | 0.0 | 43.0 | 0.0 | 0.0 | 0.0 | 297.0 | 0.0 | 364.0 | 13.0 | 0.0 | 0.0 | 63.0 |
1953 | 116.0 | 0.0 | 198.0 | 198.0 | 251.0 | 0.0 | 0.0 | 313.0 | 198.0 | 3.0 | 0.0 | 231.0 | 0.0 | 327.0 | 75.0 | 19.0 | 237.0 | 56.0 |
1954 | 145.0 | 97.0 | 0.0 | 97.0 | 201.0 | 127.0 | 0.0 | 246.0 | 97.0 | 0.0 | 41.0 | 102.0 | 419.0 | 193.0 | 180.0 | 459.0 | 40.0 | 0.0 |
1955 | 0.0 | 0.0 | 319.0 | 319.0 | 574.0 | 0.0 | 0.0 | 401.0 | 0.0 | 5.0 | 6.0 | 435.0 | 39.0 | 585.0 | 12.0 | 206.0 | 88.0 | 16.0 |
1956 | 0.0 | 53.0 | 0.0 | 0.0 | 52.0 | 18.0 | 0.0 | 141.0 | 0.0 | 45.0 | 143.0 | 155.0 | 0.0 | 0.0 | 221.0 | 124.0 | 1.0 | 35.0 |
1957 | 0.0 | 0.0 | 0.0 | 91.0 | 205.0 | 0.0 | 2.0 | 519.0 | 0.0 | 0.0 | 7.0 | 118.0 | 0.0 | 166.0 | 8.0 | 6.0 | 169.0 | 0.0 |
1958 | 83.0 | 71.0 | 0.0 | 0.0 | 223.0 | 109.0 | 0.0 | 336.0 | 44.0 | 109.0 | 229.0 | 226.0 | 234.0 | 201.0 | 188.0 | 343.0 | 131.0 | 11.0 |
1959 | 128.0 | 179.0 | 192.0 | 281.0 | 295.0 | 255.0 | 0.0 | 636.0 | 51.0 | 0.0 | 72.0 | 192.0 | 49.0 | 0.0 | 36.0 | 332.0 | 7.0 | 0.0 |
1960 | 0.0 | 73.0 | 0.0 | 145.0 | 292.0 | 6.0 | 0.0 | 358.0 | 0.0 | 0.0 | 459.0 | 19.0 | 0.0 | 0.0 | 42.0 | 364.0 | 0.0 | 0.0 |
1961 | 49.0 | 34.0 | 187.0 | 456.0 | 172.0 | 0.0 | 0.0 | 741.0 | 161.0 | 0.0 | 26.0 | 400.0 | 0.0 | 602.0 | 34.0 | 7.0 | 62.0 | 26.0 |
1962 | 161.0 | 172.0 | 0.0 | 6.0 | 0.0 | 0.0 | 0.0 | 488.0 | 0.0 | 267.0 | 31.0 | 69.0 | 0.0 | 0.0 | 34.0 | 374.0 | 192.0 | 7.0 |
1963 | 101.0 | 223.0 | 86.0 | 140.0 | 303.0 | 0.0 | 0.0 | 232.0 | 0.0 | 0.0 | 286.0 | 0.0 | 121.0 | 121.0 | 258.0 | 295.0 | 323.0 | 68.0 |
1964 | 183.0 | 0.0 | 0.0 | 354.0 | 513.0 | 0.0 | 0.0 | 95.0 | 0.0 | 0.0 | 19.0 | 850.0 | 0.0 | 300.0 | 0.0 | 62.0 | 0.0 | 45.0 |
1965 | 56.0 | 0.0 | 0.0 | 51.0 | 260.0 | 0.0 | 0.0 | 234.0 | 0.0 | 0.0 | 3.0 | 419.0 | 51.0 | 189.0 | 7.0 | 54.0 | 179.0 | 87.0 |
1966 | 99.0 | 56.0 | 0.0 | 0.0 | 19.0 | 0.0 | 29.0 | 257.0 | 0.0 | 0.0 | 0.0 | 0.0 | 56.0 | 57.0 | 56.0 | 16.0 | 0.0 | 101.0 |
1967 | 71.0 | 81.0 | 219.0 | 235.0 | 652.0 | 171.0 | 0.0 | 1038.0 | 0.0 | 0.0 | 0.0 | 309.0 | 90.0 | 375.0 | 10.0 | 0.0 | 76.0 | 20.0 |
1968 | 189.0 | 69.0 | 192.0 | 91.0 | 264.0 | 65.0 | 0.0 | 576.0 | 0.0 | 0.0 | 329.0 | 358.0 | 344.0 | 154.0 | 725.0 | 608.0 | 1.0 | 0.0 |
1969 | 358.0 | 153.0 | 0.0 | 70.0 | 404.0 | 2.0 | 0.0 | 298.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 10.0 | 17.0 | 376.0 |
tempData = data[(data["year"]=="1939")&(data["genres"]=="Drama")];
dramaCount = tempData.loc[:,["title","movie_id"]].groupby(by=["title"]).count();
dramaCount
movie_id | |
---|---|
title | |
Gone with the Wind (1939) | 1156 |
Jamaica Inn (1939) | 8 |
Little Princess, The (1939) | 77 |
Mr. Smith Goes to Washington (1939) | 383 |
Only Angels Have Wings (1939) | 33 |
They Made Me a Criminal (1939) | 11 |
Wizard of Oz, The (1939) | 1718 |
temp = pivot_table[("age","M")].transpose();
temp.tail(20)
genres | Action | Adventure | Animation | Children's | Comedy | Crime | Documentary | Drama | Fantasy | Film-Noir | Horror | Musical | Mystery | Romance | Sci-Fi | Thriller | War | Western |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
year | ||||||||||||||||||
1981 | 6489.0 | 4396.0 | 835.0 | 558.0 | 2086.0 | 534.0 | 43.0 | 2973.0 | 1431.0 | 0.0 | 1435.0 | 97.0 | 172.0 | 1151.0 | 3059.0 | 1495.0 | 909.0 | 0.0 |
1982 | 3993.0 | 2635.0 | 293.0 | 2529.0 | 3118.0 | 285.0 | 42.0 | 4758.0 | 3072.0 | 1488.0 | 2292.0 | 776.0 | 0.0 | 456.0 | 6707.0 | 1550.0 | 323.0 | 0.0 |
1983 | 2944.0 | 2661.0 | 0.0 | 158.0 | 5111.0 | 0.0 | 138.0 | 3870.0 | 0.0 | 0.0 | 1489.0 | 0.0 | 0.0 | 2552.0 | 2661.0 | 737.0 | 2368.0 | 0.0 |
1984 | 5590.0 | 4834.0 | 0.0 | 752.0 | 8039.0 | 512.0 | 182.0 | 6486.0 | 2125.0 | 514.0 | 3385.0 | 868.0 | 443.0 | 2746.0 | 6614.0 | 2747.0 | 1352.0 | 0.0 |
1985 | 3033.0 | 3218.0 | 147.0 | 1216.0 | 9588.0 | 0.0 | 0.0 | 5084.0 | 1623.0 | 0.0 | 1317.0 | 0.0 | 775.0 | 3926.0 | 4736.0 | 1066.0 | 755.0 | 289.0 |
1986 | 7279.0 | 5710.0 | 571.0 | 1889.0 | 9993.0 | 739.0 | 0.0 | 7372.0 | 518.0 | 0.0 | 2681.0 | 674.0 | 930.0 | 2470.0 | 4456.0 | 3195.0 | 3219.0 | 470.0 |
1987 | 9240.0 | 3650.0 | 0.0 | 41.0 | 9542.0 | 3459.0 | 0.0 | 6717.0 | 30.0 | 352.0 | 1817.0 | 396.0 | 484.0 | 3121.0 | 4120.0 | 2568.0 | 2435.0 | 0.0 |
1988 | 4468.0 | 3713.0 | 1884.0 | 96.0 | 8866.0 | 1098.0 | 357.0 | 4010.0 | 2950.0 | 1381.0 | 1126.0 | 105.0 | 0.0 | 1210.0 | 1278.0 | 2433.0 | 483.0 | 484.0 |
1989 | 6877.0 | 5392.0 | 800.0 | 1618.0 | 9875.0 | 2435.0 | 642.0 | 8763.0 | 791.0 | 0.0 | 1608.0 | 661.0 | 0.0 | 2547.0 | 4105.0 | 2251.0 | 1212.0 | 0.0 |
1990 | 9797.0 | 3362.0 | 364.0 | 1186.0 | 6294.0 | 3882.0 | 93.0 | 7150.0 | 557.0 | 661.0 | 2353.0 | 0.0 | 533.0 | 3316.0 | 4819.0 | 8600.0 | 0.0 | 2342.0 |
1991 | 5557.0 | 2094.0 | 824.0 | 1029.0 | 4970.0 | 248.0 | 148.0 | 7834.0 | 983.0 | 0.0 | 309.0 | 1201.0 | 1120.0 | 2454.0 | 4136.0 | 5227.0 | 66.0 | 0.0 |
1992 | 8286.0 | 1346.0 | 1280.0 | 1964.0 | 11283.0 | 5468.0 | 33.0 | 9497.0 | 352.0 | 0.0 | 2389.0 | 1158.0 | 713.0 | 4957.0 | 3479.0 | 5824.0 | 1696.0 | 838.0 |
1993 | 10967.0 | 4810.0 | 984.0 | 2208.0 | 12658.0 | 2075.0 | 260.0 | 11740.0 | 348.0 | 23.0 | 1495.0 | 934.0 | 980.0 | 5728.0 | 4782.0 | 5594.0 | 2404.0 | 547.0 |
1994 | 9739.0 | 4366.0 | 964.0 | 2746.0 | 16739.0 | 3737.0 | 1048.0 | 13705.0 | 1816.0 | 0.0 | 1868.0 | 844.0 | 475.0 | 9492.0 | 2849.0 | 6460.0 | 2531.0 | 1808.0 |
1995 | 13057.0 | 6120.0 | 2694.0 | 4539.0 | 14666.0 | 6011.0 | 297.0 | 18379.0 | 888.0 | 272.0 | 1809.0 | 666.0 | 975.0 | 7489.0 | 5958.0 | 10813.0 | 3463.0 | 538.0 |
1996 | 12812.0 | 7453.0 | 1965.0 | 2637.0 | 14462.0 | 4659.0 | 693.0 | 15226.0 | 1787.0 | 242.0 | 2609.0 | 1277.0 | 1831.0 | 7146.0 | 6270.0 | 12358.0 | 3478.0 | 238.0 |
1997 | 16087.0 | 7446.0 | 851.0 | 1924.0 | 15431.0 | 6912.0 | 437.0 | 18115.0 | 494.0 | 1787.0 | 2646.0 | 742.0 | 6457.0 | 6180.0 | 10448.0 | 16133.0 | 2523.0 | 0.0 |
1998 | 16399.0 | 4133.0 | 3004.0 | 3562.0 | 17832.0 | 6971.0 | 415.0 | 16693.0 | 343.0 | 932.0 | 2790.0 | 660.0 | 3965.0 | 8539.0 | 7026.0 | 14574.0 | 3348.0 | 20.0 |
1999 | 12329.0 | 4986.0 | 3354.0 | 3231.0 | 26541.0 | 2364.0 | 664.0 | 21320.0 | 2039.0 | 0.0 | 5189.0 | 324.0 | 1200.0 | 5597.0 | 8233.0 | 16534.0 | 1161.0 | 735.0 |
2000 | 9797.0 | 1848.0 | 2159.0 | 1826.0 | 11129.0 | 1509.0 | 334.0 | 9881.0 | 110.0 | 0.0 | 1846.0 | 147.0 | 445.0 | 1382.0 | 4608.0 | 7839.0 | 1021.0 | 0.0 |
tempData = data[(data["year"]=="1995")&(data["genres"]=="Drama") & (data["gender"] == "M")];
dramaCount = tempData.loc[:,["title","movie_id"]].groupby(by=["title"]).count().sort_values(by="movie_id");
dramaCount
movie_id | |
---|---|
title | |
Diebinnen (1995) | 1 |
To Have, or Not (1995) | 1 |
Sleepover (1995) | 1 |
Boy Called Hate, A (1995) | 1 |
Billy's Holiday (1995) | 1 |
... | ... |
Apollo 13 (1995) | 923 |
Get Shorty (1995) | 1070 |
Babe (1995) | 1172 |
Twelve Monkeys (1995) | 1233 |
Braveheart (1995) | 1897 |
135 rows × 1 columns
- 与男性不同,80年代中期的终结者并没有吸引太多的女性用户,同期的Drama更为卖座
- 女性在2000年前的一个观影小高峰在1995,还是Drama类型的电影,最受关注的该类型电影是Bravehear(勇敢的心)和Babe(小猪宝贝)
- 女性电影观看的最高峰是在1999年,同样是Commedy类型的电影,最受关注的电影是
temp = pivot_table[("age","F")].transpose();
temp.tail(20)
genres | Action | Adventure | Animation | Children's | Comedy | Crime | Documentary | Drama | Fantasy | Film-Noir | Horror | Musical | Mystery | Romance | Sci-Fi | Thriller | War | Western |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
year | ||||||||||||||||||
1981 | 1376.0 | 983.0 | 182.0 | 232.0 | 573.0 | 140.0 | 19.0 | 853.0 | 321.0 | 0.0 | 253.0 | 15.0 | 85.0 | 382.0 | 487.0 | 348.0 | 173.0 | 0.0 |
1982 | 621.0 | 453.0 | 84.0 | 819.0 | 1010.0 | 82.0 | 4.0 | 1653.0 | 877.0 | 312.0 | 446.0 | 348.0 | 0.0 | 238.0 | 1536.0 | 353.0 | 64.0 | 0.0 |
1983 | 789.0 | 733.0 | 0.0 | 55.0 | 1686.0 | 0.0 | 44.0 | 1486.0 | 0.0 | 0.0 | 327.0 | 0.0 | 0.0 | 822.0 | 733.0 | 164.0 | 697.0 | 0.0 |
1984 | 1254.0 | 1260.0 | 0.0 | 288.0 | 2468.0 | 67.0 | 57.0 | 1981.0 | 708.0 | 114.0 | 973.0 | 250.0 | 55.0 | 978.0 | 1296.0 | 565.0 | 289.0 | 0.0 |
1985 | 550.0 | 934.0 | 53.0 | 440.0 | 3187.0 | 0.0 | 0.0 | 2113.0 | 518.0 | 0.0 | 199.0 | 0.0 | 323.0 | 1801.0 | 1249.0 | 425.0 | 107.0 | 23.0 |
1986 | 1508.0 | 1485.0 | 195.0 | 567.0 | 3351.0 | 141.0 | 0.0 | 2783.0 | 212.0 | 0.0 | 684.0 | 267.0 | 275.0 | 1224.0 | 871.0 | 684.0 | 546.0 | 99.0 |
1987 | 1959.0 | 1019.0 | 0.0 | 22.0 | 3189.0 | 732.0 | 0.0 | 2007.0 | 22.0 | 96.0 | 294.0 | 291.0 | 149.0 | 1463.0 | 651.0 | 634.0 | 602.0 | 0.0 |
1988 | 864.0 | 941.0 | 511.0 | 48.0 | 2849.0 | 171.0 | 89.0 | 1785.0 | 948.0 | 418.0 | 223.0 | 25.0 | 0.0 | 611.0 | 210.0 | 561.0 | 47.0 | 78.0 |
1989 | 1548.0 | 1333.0 | 450.0 | 721.0 | 3416.0 | 611.0 | 183.0 | 3124.0 | 254.0 | 0.0 | 322.0 | 374.0 | 0.0 | 1329.0 | 987.0 | 482.0 | 326.0 | 0.0 |
1990 | 2076.0 | 878.0 | 149.0 | 439.0 | 2088.0 | 950.0 | 48.0 | 2542.0 | 154.0 | 224.0 | 562.0 | 0.0 | 134.0 | 1656.0 | 910.0 | 2156.0 | 0.0 | 626.0 |
1991 | 1366.0 | 448.0 | 437.0 | 483.0 | 1926.0 | 98.0 | 61.0 | 3298.0 | 307.0 | 0.0 | 53.0 | 539.0 | 441.0 | 1245.0 | 846.0 | 1584.0 | 25.0 | 0.0 |
1992 | 1693.0 | 425.0 | 566.0 | 847.0 | 4154.0 | 1339.0 | 12.0 | 3788.0 | 88.0 | 0.0 | 574.0 | 548.0 | 201.0 | 2461.0 | 712.0 | 1341.0 | 633.0 | 159.0 |
1993 | 2336.0 | 1187.0 | 322.0 | 870.0 | 4804.0 | 344.0 | 91.0 | 4896.0 | 125.0 | 9.0 | 273.0 | 345.0 | 404.0 | 2984.0 | 1017.0 | 1671.0 | 796.0 | 93.0 |
1994 | 2191.0 | 1138.0 | 420.0 | 1179.0 | 6433.0 | 1191.0 | 318.0 | 6098.0 | 715.0 | 0.0 | 507.0 | 357.0 | 222.0 | 4437.0 | 525.0 | 1922.0 | 979.0 | 512.0 |
1995 | 2685.0 | 1474.0 | 1061.0 | 2087.0 | 6578.0 | 1503.0 | 159.0 | 8011.0 | 323.0 | 107.0 | 333.0 | 316.0 | 282.0 | 4783.0 | 1177.0 | 2680.0 | 961.0 | 75.0 |
1996 | 2639.0 | 1908.0 | 710.0 | 1222.0 | 5680.0 | 1328.0 | 204.0 | 6962.0 | 499.0 | 48.0 | 605.0 | 656.0 | 550.0 | 4118.0 | 1247.0 | 3274.0 | 1005.0 | 18.0 |
1997 | 3613.0 | 1818.0 | 366.0 | 883.0 | 6021.0 | 2068.0 | 144.0 | 7172.0 | 206.0 | 576.0 | 652.0 | 420.0 | 1886.0 | 3095.0 | 2248.0 | 4418.0 | 556.0 | 0.0 |
1998 | 3705.0 | 1071.0 | 1122.0 | 1462.0 | 7079.0 | 1752.0 | 165.0 | 7037.0 | 89.0 | 167.0 | 623.0 | 221.0 | 1158.0 | 4748.0 | 1344.0 | 3297.0 | 776.0 | 6.0 |
1999 | 3039.0 | 1368.0 | 1136.0 | 1269.0 | 10328.0 | 602.0 | 221.0 | 8282.0 | 626.0 | 0.0 | 1411.0 | 129.0 | 535.0 | 3064.0 | 2074.0 | 4885.0 | 275.0 | 167.0 |
2000 | 2327.0 | 462.0 | 696.0 | 659.0 | 4386.0 | 411.0 | 88.0 | 3781.0 | 25.0 | 0.0 | 519.0 | 53.0 | 132.0 | 754.0 | 997.0 | 2247.0 | 288.0 | 0.0 |
tempData = data[(data["year"]=="1995")&(data["genres"]=="Drama") & (data["gender"] == "F")];
dramaCount = tempData.loc[:,["title","movie_id"]].groupby(by=["title"]).count().sort_values(by="movie_id");
dramaCount
movie_id | |
---|---|
title | |
Killer: A Journal of Murder (1995) | 1 |
Confessional, The (Le Confessionnal) (1995) | 1 |
Fall Time (1995) | 1 |
Midaq Alley (Callej髇 de los milagros, El) (1995) | 1 |
Neon Bible, The (1995) | 1 |
... | ... |
Apollo 13 (1995) | 328 |
American President, The (1995) | 379 |
Sense and Sensibility (1995) | 420 |
Braveheart (1995) | 546 |
Babe (1995) | 579 |
119 rows × 1 columns
综上:可以看出在电影历史上,男女生对电影到的品味有着较高的相似度——Comedy和Drama长期占据着票房的头把交椅,但是对于科幻电影,男生显然更为喜欢一点.