#!/usr/bin/env python # -*- coding:utf-8 -*- import pandas as pd import matplotlib.pyplot as plt import numpy as np unrate = pd.read_csv('unrate.csv') unrate['DATE'] = pd.to_datetime(unrate['DATE'])#把date这一列转换成datetime格式 print(unrate.head(12)) #打印前12个日期 #plt.plot() plot()函数 #plt.show() first_twelve = unrate[0:12] plt.plot(first_twelve['DATE'],first_twelve['VALUE']) #确定左边x轴和y轴 plt.xticks(rotation=45) #x轴的数字旋转45度 plt.xlabel('Month') #给X,Y轴加上标签 plt.ylabel('Unemployment Rate') plt.title('Monthly Unemployment Trends,1948') #标题 plt.show() #绘制子图 fig = plt.figure(figsize=(3,3)) #可以指定figsize ax1 = fig.add_subplot(2,1,1) ax2 = fig.add_subplot(2,1,2) ax3 = fig.add_subplot(4,3,6) ax1.plot(np.random.randint(1,5,5),np.arange(5)) ax2.plot(np.arange(10)*3,np.arange(10)) #传入随机值 plt.show() #同一个图中画两条曲线 unrate['MONTH']=unrate['DATE'].dt.month fig = plt.figure(figsize=(6,3)) plt.plot(unrate[0:12]['MONTH'],unrate[0:12]['VALUE'],c = 'red') plt.plot(unrate[12:24]['MONTH'],unrate[12:24]['VALUE'],c = 'blue') plt.show() #图中划出几条线,用不同的颜色表示 fig = plt.figure(figsize=(10,6)) colors = ['red','blue','green','orange','black'] for i in range(5): start_index = i*12 end_index = (i+1)*12 subset = unrate[start_index:end_index] label = str(1948+i) plt.plot(subset['MONTH'],subset['VALUE'],c = colors[i],label=label) plt.legend(loc='upper left') #指定线表示什么 plt.xlabel('Month,Integer') plt.ylabel('Unemployment Rate,Percent') plt.title('Monthly Unemployment Trends,1948-1952') plt.show() from numpy import arange reviews = pd.read_csv('fandango_scores.csv') cols = ['FILM', 'RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars'] norm_reviews = reviews[cols] print(norm_reviews[:1]) #第一个电影的各种信息 num_cols = ['RT_user_norm','Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue','Fandango_Stars']#各个媒体的评分值 bar_heights = norm_reviews.ix[0,num_cols].values #柱状图的高度 print(bar_heights) bar_positions = arange(5)+0.75 #柱状图的位置,距离原点 print(bar_positions) fig,ax = plt.subplots() #画出这个图,ax画图,fig控制这个图是什么样子 ax.bar(bar_positions,bar_heights,0.3) #bar形图,即柱状图,0.3表示宽度 ax.set_xticklabels(num_cols, rotation=45) #x轴的字母倾斜45度 ax.set_xlabel('Rating Source') #设置x,y的标签 ax.set_ylabel('Average Rating') ax.set_title('Average User Rating For Avengers: Age of Ultron (2015)') plt.show() #散点图:scatter() fig,ax = plt.subplots() #需要传入两个媒体的值,然后设置X和Y轴 ax.scatter(norm_reviews['Fandango_Ratingvalue'],norm_reviews['RT_user_norm']) ax.set_xlabel('Fandango') ax.set_ylabel('Rottten Tomatoes') plt.show() fig = plt.figure(figsize=(5,10)) ax1 = fig.add_subplot(2,1,1) ax1.scatter(norm_reviews['Fandango_Ratingvalue'], norm_reviews['RT_user_norm']) ax1.set_xlabel('Fandango') ax1.set_ylabel('Rotten Tomatoes') plt.show() reviews = pd.read_csv('fandango_scores.csv') cols = ['FILM', 'RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue'] norm_reviews = reviews[cols] print(norm_reviews[:5]) #前五行 fandango_distribution = norm_reviews['Fandango_Ratingvalue'].value_counts() fandango_distribution = fandango_distribution.sort_index() imdb_distribution = norm_reviews['IMDB_norm'].value_counts() imdb_distribution = imdb_distribution.sort_index() print(fandango_distribution) print(imdb_distribution) fig, ax = plt.subplots() ax.hist(norm_reviews['Fandango_Ratingvalue']) ax.hist(norm_reviews['Fandango_Ratingvalue'],bins=20)#bins:指定有多少个区间 ax.hist(norm_reviews['Fandango_Ratingvalue'], range=(4, 5),bins=20) #range:指定起始的区间 plt.show() #作四个图 fig = plt.figure(figsize=(5,20)) ax1 = fig.add_subplot(4,1,1) ax2 = fig.add_subplot(4,1,2) ax3 = fig.add_subplot(4,1,3) ax4 = fig.add_subplot(4,1,4) ax1.hist(norm_reviews['Fandango_Ratingvalue'], bins=20, range=(0, 5)) ax1.set_title('Distribution of Fandango Ratings') ax1.set_ylim(0, 50) #ylim:指定区间的大小 ax2.hist(norm_reviews['RT_user_norm'], 20, range=(0, 5)) ax2.set_title('Distribution of Rotten Tomatoes Ratings') ax2.set_ylim(0, 50) ax3.hist(norm_reviews['Metacritic_user_nom'], 20, range=(0, 5)) ax3.set_title('Distribution of Metacritic Ratings') ax3.set_ylim(0, 50) ax4.hist(norm_reviews['IMDB_norm'], 20, range=(0, 5)) ax4.set_title('Distribution of IMDB Ratings') ax4.set_ylim(0, 50) plt.show()