导入pandas
import pandas as pd
countries = ['Albania', 'Algeria', 'Andorra', 'Angola', 'Antigua and Barbuda', 'Argentina', 'Armenia', 'Australia', 'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain', 'Bangladesh', 'Barbados', 'Belarus', 'Belgium', 'Belize', 'Benin', 'Bhutan', 'Bolivia'] life_expectancy_values = [74.7, 75. , 83.4, 57.6, 74.6, 75.4, 72.3, 81.5, 80.2, 70.3, 72.1, 76.4, 68.1, 75.2, 69.8, 79.4, 70.8, 62.7, 67.3, 70.6] gdp_values = [ 1681.61390973, 2155.48523109, 21495.80508273, 562.98768478, 13495.1274663 , 9388.68852258, 1424.19056199, 24765.54890176, 27036.48733192, 1945.63754911, 21721.61840978, 13373.21993972, 483.97086804, 9783.98417323, 2253.46411147, 25034.66692293, 3680.91642923, 366.04496652, 1175.92638695, 1132.21387981]
#将普通数组转换为pandas数组
life_expectancy = pd.Series(life_expectancy_values)
gdp = pd.Series(gdp_values)
pandas数组和numpy数组有很多一样的操作:
(1) 截取部分
print life_expectancy[0]
# 结果: 74.7 print gdp[3:6]
# 结果: 3 562.987685 4 13495.127466 5 9388.688523 dtype: float64
(2) 循环:
for country_life_expectancy in life_expectancy: print 'Examining life expectancy {}'.format(country_life_expectancy)
# 结果
Examining life expectancy 74.7
Examining life expectancy 75.0
Examining life expectancy 83.4
Examining life expectancy 57.6
Examining life expectancy 74.6
Examining life expectancy 75.4
...
Examining life expectancy 67.3
Examining life expectancy 70.6
(3) 常用函数:
print life_expectancy.mean() # 求平均数
# 结果
72.87
print life_expectancy.std() # 求标准差
# 结果
6.21399947487
print gdp.max() # 求最大值
# 结果
27036.4873319
print gdp.sum() # 求和
# 结果
182957.59833
(4) 向量化运算:
a = pd.Series([1, 2, 3, 4]) b = pd.Series([1, 2, 1, 2]) print a + b # 结果 0 2 1 4 2 4 3 6 dtype: int64 print a * 2 # 结果 0 2 1 4 2 6 3 8 dtype: int64 print a >= 3 # 结果 0 False 1 False 2 True 3 True dtype: bool print a[a >= 3] # 结果 2 3 3 4 dtype: int64