#Dataframe既有行索引也有列索引,可以被看做由多个Series组成的字典(共用一个索引)
#索引方法有多种,记住这5种常用的方法即可
#只选择列 / 只选择行 / 选择行和列 /链式选择 / 布尔判断选择
#一,只选择列
# df[列名],选择列的方法只记这这一种即可,其他的都是不常用的,几多了反而混淆
#只选择一列,df[列名]
#选择多列,用列表包含多个列名:df[[列名1,列名2...]]
#选择多列不可以切片:df[列名1:列名5]会报错,如果填入数字会选择行
import numpy as np
import pandas as pd
df = pd.DataFrame(np.random.rand(12).reshape(3,4)*100,
index = ['one','two','three'],
columns = ['a','b','c','d'])
print(df)
l = df['a']
ls = df[['a','c']]
h = df[0:2] #忘记这个选择行方法
print(data)
print(ls)
print(ls1)
a b c d
one 44.386955 64.943123 84.604522 35.164263
two 75.446304 55.476815 25.105854 81.424303
three 6.303621 42.431963 68.578739 69.393774
one 44.386955
two 75.446304
three 6.303621
Name: a, dtype: float64
a c
one 44.386955 84.604522
two 75.446304 25.105854
three 6.303621 68.578739
a b c d
one 44.386955 64.943123 84.604522 35.164263
two 75.446304 55.476815 25.105854 81.424303
#二,只选择行loc[]和iloc[]
#只选择一行,loc[行标签],行标签可以是索引数字(没指定行索引名字时,且不能为-1)或名称索引(指定了行索引名字后)
#选择多行,用列表包含多个值,loc[[行标签1,行标签2...]]
#选择多行可以切片:df[行标签1:行标签5],loc包含切片尾部
df1 = pd.DataFrame(np.random.rand(16).reshape(4,4)*100,
index = ['one','two','three','four'],
columns = ['a','b','c','d'])
df2 = pd.DataFrame(np.random.rand(16).reshape(4,4)*100,
columns = ['a','b','c','d'])
print(df1)
print(df2)
#单个值
h = df1.loc['one']#指定了行索引以后只能用名称来索引
h1 = df2.loc[0] #没指定名称索引时,默认从0开始的整数
#h2 = df.loc(-1)#报错
print(h)
print(h1)
#多个值
hs = df1.loc[['one','three']]
hs1 = df2.loc[[0,3]]
hs2 = df1.loc['one':'three']
hs3 = df2.loc[0:3] #loc包含切片尾部
print(hs)
print(hs1)
print(hs2)
print(hs3)
#iloc可以理解为正真的位置索引,用法和loc类似不在赘述,区别在于只能用数字索引,值可以为-1,切片不包含尾部
#选择一个和多个不在演示
hs4 = df2.iloc[0:3] #iloc不包含切片尾部
print(hs4)
a b c d
one 51.204447 55.528528 58.210314 54.163497
two 41.858473 30.722846 17.749213 90.469865
three 99.200053 3.001227 72.551832 17.683482
four 27.134902 45.250912 28.113455 68.403044
a b c d
0 87.023917 60.621417 52.059756 77.975245
1 58.333329 14.945754 65.759015 34.399971
2 21.767209 71.009879 68.363179 70.344211
3 56.988215 88.706929 82.538999 34.399141
a 51.204447
b 55.528528
c 58.210314
d 54.163497
Name: one, dtype: float64
a 87.023917
b 60.621417
c 52.059756
d 77.975245
Name: 0, dtype: float64
a b c d
one 51.204447 55.528528 58.210314 54.163497
three 99.200053 3.001227 72.551832 17.683482
a b c d
0 87.023917 60.621417 52.059756 77.975245
3 56.988215 88.706929 82.538999 34.399141
a b c d
one 51.204447 55.528528 58.210314 54.163497
two 41.858473 30.722846 17.749213 90.469865
three 99.200053 3.001227 72.551832 17.683482
a b c d
0 87.023917 60.621417 52.059756 77.975245
1 58.333329 14.945754 65.759015 34.399971
2 21.767209 71.009879 68.363179 70.344211
3 56.988215 88.706929 82.538999 34.399141
a b c d
0 87.023917 60.621417 52.059756 77.975245
1 58.333329 14.945754 65.759015 34.399971
2 21.767209 71.009879 68.363179 70.344211
#三,选择行和列loc[选择行,选择列]
#逗号前面是选择行的操作,逗号后面选择列的操作
#具体用法就是把方法一和方法二结合起来,索引可单个,可间断,可切片
lh = df1.loc['one','a']
lhs = df1.loc[['one','three'],['a','c']]
lhs1 = df1.loc['one':'three':1,'a':'c':1] #1是步长,这点和列表的切片一样,单是包含尾部
print(lh)
print(lhs)
print(lhs1)
51.20444650565864
a c
one 51.204447 58.210314
three 99.200053 72.551832
a b c
one 51.204447 55.528528 58.210314
two 41.858473 30.722846 17.749213
three 99.200053 3.001227 72.551832
#四,五:链式选择一般和布尔选择配合使用:当选择后的结果还是df对象时还可以继续选择
m_c = df1.loc['one':'three':1,'a':'c':1]>20
print(m_c) #返回True和False
print(df1[m_c]) #返回原表,不符合条件的显示为NaN
res = df1[m_c].iloc[0:2]
res1 = df1[df1.loc['one':'three':1,'a':'c':1]>20].iloc[0:2] #当然你也可以把上面的句子写的看起来稍微复杂点0.0
print(res)
print(res1)
a b c
one True True True
two True True False
three True False True
a b c d
one 51.204447 55.528528 58.210314 NaN
two 41.858473 30.722846 NaN NaN
three 99.200053 NaN 72.551832 NaN
four NaN NaN NaN NaN
a b c d
one 51.204447 55.528528 58.210314 NaN
two 41.858473 30.722846 NaN NaN
a b c d
one 51.204447 55.528528 58.210314 NaN
two 41.858473 30.722846 NaN NaN