#构造
import pandas as pd import pickle import numpy as np dates=pd.date_range('20180310',periods=6) df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=['A','B','C','D'])#生成6行4列位置 print(df)
输出:
A B C D 2018-03-10 0.474957 -0.789351 0.827287 0.632483 2018-03-11 -0.147661 2.093837 0.565236 -0.282967 2018-03-12 0.871652 -0.492781 0.213760 1.046995 2018-03-13 0.735719 0.827546 0.139042 1.764413 2018-03-14 0.442560 -0.065412 -1.209434 0.690070 2018-03-15 -0.303560 1.389159 -0.397401 -0.650598
#切片选择指定行
import pandas as pd import pickle import numpy as np dates=pd.date_range('20180310',periods=6) df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=['A','B','C','D'])#生成6行4列位置 print(df) #切片选择指定行 print(df[0:3])
输出
A B C D 2018-03-10 0.474957 -0.789351 0.827287 0.632483 2018-03-11 -0.147661 2.093837 0.565236 -0.282967 2018-03-12 0.871652 -0.492781 0.213760 1.046995 2018-03-13 0.735719 0.827546 0.139042 1.764413 2018-03-14 0.442560 -0.065412 -1.209434 0.690070 2018-03-15 -0.303560 1.389159 -0.397401 -0.650598 A B C D 2018-03-10 0.474957 -0.789351 0.827287 0.632483 2018-03-11 -0.147661 2.093837 0.565236 -0.282967 2018-03-12 0.871652 -0.492781 0.213760 1.046995
#通过行标记获取指定行(包含两端)
import pandas as pd import pickle import numpy as np dates=pd.date_range('20180310',periods=6) df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=['A','B','C','D'])#生成6行4列位置 print(df) #通过行标记获取指定行(包含两端) print(df['20180311':'20180313'])
输出
2018-03-10 0.474957 -0.789351 0.827287 0.632483 2018-03-11 -0.147661 2.093837 0.565236 -0.282967 2018-03-12 0.871652 -0.492781 0.213760 1.046995 2018-03-13 0.735719 0.827546 0.139042 1.764413 2018-03-14 0.442560 -0.065412 -1.209434 0.690070 2018-03-15 -0.303560 1.389159 -0.397401 -0.650598 A B C D 2018-03-11 -0.147661 2.093837 0.565236 -0.282967 2018-03-12 0.871652 -0.492781 0.213760 1.046995 2018-03-13 0.735719 0.827546 0.139042 1.764413
#输出指定行指定列的数据
import pandas as pd import pickle import numpy as np dates=pd.date_range('20180310',periods=6) df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=['A','B','C','D'])#生成6行4列位置 print(df) #输出指定行指定列的数据 print(df.loc['20180312', ['A','B','C','D']])
输出
A B C D 2018-03-10 0.474957 -0.789351 0.827287 0.632483 2018-03-11 -0.147661 2.093837 0.565236 -0.282967 2018-03-12 0.871652 -0.492781 0.213760 1.046995 2018-03-13 0.735719 0.827546 0.139042 1.764413 2018-03-14 0.442560 -0.065412 -1.209434 0.690070 2018-03-15 -0.303560 1.389159 -0.397401 -0.650598 A 0.871652 B -0.492781 C 0.213760 D 1.046995 Name: 2018-03-12 00:00:00, dtype: float64
#输出第三行第一列的数据
import pandas as pd import pickle import numpy as np dates=pd.date_range('20180310',periods=6) df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=['A','B','C','D'])#生成6行4列位置 print(df) #输出第三行第一列的数据 print(df.iloc[3, 1])
输出
A B C D 2018-03-10 0.474957 -0.789351 0.827287 0.632483 2018-03-11 -0.147661 2.093837 0.565236 -0.282967 2018-03-12 0.871652 -0.492781 0.213760 1.046995 2018-03-13 0.735719 0.827546 0.139042 1.764413 2018-03-14 0.442560 -0.065412 -1.209434 0.690070 2018-03-15 -0.303560 1.389159 -0.397401 -0.650598 0.8275459967949839
#df.A 选择某列
import pandas as pd import pickle import numpy as np dates=pd.date_range('20180310',periods=6) df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=['A','B','C','D'])#生成6行4列位置 print(df) #或者df.A 选择某列 print(df['A'])
输出
A B C D 2018-03-10 -1.537480 1.082599 0.174229 -1.841898 2018-03-11 -1.691014 -0.164473 -2.199268 -1.488417 2018-03-12 -1.324199 -0.420854 0.104982 0.754717 2018-03-13 0.138477 1.003904 -0.437110 -2.542149 2018-03-14 -1.049416 0.318146 1.249720 0.781054 2018-03-15 -1.595190 -0.391273 0.783752 -1.225756 2018-03-10 -1.537480 2018-03-11 -1.691014 2018-03-12 -1.324199 2018-03-13 0.138477 2018-03-14 -1.049416 2018-03-15 -1.595190 Freq: D, Name: A, dtype: float64
#进行切片选择,指定行,指定列
import pandas as pd import pickle import numpy as np dates=pd.date_range('20180310',periods=6) df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=['A','B','C','D'])#生成6行4列位置 print(df) #进行切片选择,指定行,指定列 print(df.iloc[2:5,0:2])
输出
A B C D 2018-03-10 -1.537480 1.082599 0.174229 -1.841898 2018-03-11 -1.691014 -0.164473 -2.199268 -1.488417 2018-03-12 -1.324199 -0.420854 0.104982 0.754717 2018-03-13 0.138477 1.003904 -0.437110 -2.542149 2018-03-14 -1.049416 0.318146 1.249720 0.781054 2018-03-15 -1.595190 -0.391273 0.783752 -1.225756 A B 2018-03-12 -1.324199 -0.420854 2018-03-13 0.138477 1.003904 2018-03-14 -1.049416 0.318146
#进行不连续筛选
import pandas as pd import pickle import numpy as np dates=pd.date_range('20180310',periods=6) df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=['A','B','C','D'])#生成6行4列位置 print(df) #进行不连续筛选 print(df.iloc[[1,2,4],[0,2]])
输出
A B C D 2018-03-10 0.900440 -0.062287 -1.483173 -1.586545 2018-03-11 -0.351609 -2.337686 0.471770 0.122194 2018-03-12 1.598436 0.795936 1.102541 -0.471931 2018-03-13 2.753501 0.184064 0.610561 -0.577957 2018-03-14 -2.081754 0.666256 0.345566 0.969266 2018-03-15 0.089630 -0.310928 -0.439767 0.944149 A C 2018-03-11 -0.351609 0.471770 2018-03-12 1.598436 1.102541 2018-03-14 -2.081754 0.345566
#筛选出df.A大于0的元素 布尔条件筛选
import pandas as pd import pickle import numpy as np dates=pd.date_range('20180310',periods=6) df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=['A','B','C','D'])#生成6行4列位置 print(df) #筛选出df.A大于0的元素 布尔条件筛选 print(df[df.A > 0])
输出
A B C D 2018-03-10 0.892268 0.713791 -0.144297 0.739862 2018-03-11 0.991796 -1.688081 1.333420 -0.524965 2018-03-12 2.251776 -1.514738 -0.720530 1.052735 2018-03-13 -0.297195 -0.945455 -1.796431 2.998356 2018-03-14 -0.236509 -0.369757 -0.438734 0.408940 2018-03-15 0.498061 0.778591 -0.282689 1.879702 A B C D 2018-03-10 0.892268 0.713791 -0.144297 0.739862 2018-03-11 0.991796 -1.688081 1.333420 -0.524965 2018-03-12 2.251776 -1.514738 -0.720530 1.052735 2018-03-15 0.498061 0.778591 -0.282689 1.879702
#将df.A大于0的值改变
import pandas as pd import pickle import numpy as np dates=pd.date_range('20180310',periods=6) df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=['A','B','C','D'])#生成6行4列位置 print(df) #将df.A大于0的值改变 df[df.A>0]=999 print(df)
输出
A B C D 2018-03-10 -0.587337 0.166438 -1.536473 -2.118008 2018-03-11 0.327482 -0.383389 0.353157 0.592067 2018-03-12 -0.483211 -2.066614 -0.313845 0.989347 2018-03-13 1.230698 -1.196974 -1.465180 0.585245 2018-03-14 0.180381 -1.289805 0.264123 0.731016 2018-03-15 0.288694 -1.318865 -1.550989 0.467802 A B C D 2018-03-10 -0.587337 0.166438 -1.536473 -2.118008 2018-03-11 999.000000 999.000000 999.000000 999.000000 2018-03-12 -0.483211 -2.066614 -0.313845 0.989347 2018-03-13 999.000000 999.000000 999.000000 999.000000 2018-03-14 999.000000 999.000000 999.000000 999.000000 2018-03-15 999.000000 999.000000 999.000000 999.000000
#新增一列不赋值
import pandas as pd import pickle import numpy as np dates=pd.date_range('20180310',periods=6) df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=['A','B','C','D'])#生成6行4列位置 print(df) #新增一列 df['F']=np.nan print(df)
输出
A B C D 2018-03-10 -0.227356 1.229450 0.488290 -0.910271 2018-03-11 0.756934 -0.489924 1.125287 0.148251 2018-03-12 -1.157556 -0.703575 1.488778 -0.713087 2018-03-13 0.942155 0.972845 -1.765062 0.991459 2018-03-14 1.053055 -0.685858 0.604448 0.837986 2018-03-15 0.809910 0.771260 0.674058 0.420373 A B C D F 2018-03-10 -0.227356 1.229450 0.488290 -0.910271 NaN 2018-03-11 0.756934 -0.489924 1.125287 0.148251 NaN 2018-03-12 -1.157556 -0.703575 1.488778 -0.713087 NaN 2018-03-13 0.942155 0.972845 -1.765062 0.991459 NaN 2018-03-14 1.053055 -0.685858 0.604448 0.837986 NaN 2018-03-15 0.809910 0.771260 0.674058 0.420373 NaN
#新增一列赋值
import pandas as pd import pickle import numpy as np dates=pd.date_range('20180310',periods=6) df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=['A','B','C','D'])#生成6行4列位置 print(df) #新增一列 df['E'] = pd.Series([1,2,3,4,5,6], index=pd.date_range('20180310', periods=6)) print(df)
输出
A B C D 2018-03-10 -0.565898 0.647803 1.018365 -1.269129 2018-03-11 -1.049725 0.718618 0.745133 -2.976616 2018-03-12 -0.859447 -0.686062 0.332352 -0.065416 2018-03-13 -0.291780 1.144493 -1.387311 -0.752532 2018-03-14 0.469711 0.129786 0.677650 0.723333 2018-03-15 0.876061 0.441140 1.566190 -1.628274 A B C D E 2018-03-10 -0.565898 0.647803 1.018365 -1.269129 1 2018-03-11 -1.049725 0.718618 0.745133 -2.976616 2 2018-03-12 -0.859447 -0.686062 0.332352 -0.065416 3 2018-03-13 -0.291780 1.144493 -1.387311 -0.752532 4 2018-03-14 0.469711 0.129786 0.677650 0.723333 5 2018-03-15 0.876061 0.441140 1.566190 -1.628274 6