from __future__ import print_function
import pandas as pd
import numpy as np
np.random.seed(1)
dates = pd.date_range('20130101', periods=6)
df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=['A', 'B', 'C', 'D'])
赋值,新增列数据
df.iloc[2,2], df.loc['2013-01-03', 'D']
df.A[df.A>0], df['F']
df.iloc[2,2] = 1111 # 设置行列编号为2,2的数据只为1
df.loc['2013-01-03', 'D'] = 2222 # 设置行属性值为‘2013……’,列属性值为‘D’的值为2222
df[df.A>0] = 0 # 只保留列属性为‘A’且大于0的值,全部数据中的其他数据都设置为0
df.A[df.A>0] = 0 # 只更改列属性为‘A’的数据
df['F'] = np.nan # 新增加一个属性列‘F’,所有的值为NaN
df['G'] = pd.Series([1,2,3,4,5,6], index=pd.date_range('20130101', periods=6)) # 新增一个列‘G’
以下是所有的运行结果:
print(df)
> A B C D
> 2013-01-01 1.624345 -0.611756 -0.528172 -1.072969
> 2013-01-02 0.865408 -2.301539 1.744812 -0.761207
> 2013-01-03 0.319039 -0.249370 1.462108 -2.060141
> 2013-01-04 -0.322417 -0.384054 1.133769 -1.099891
> 2013-01-05 -0.172428 -0.877858 0.042214 0.582815
> 2013-01-06 -1.100619 1.144724 0.901591 0.502494
df.iloc[2,2] = 1111
print(df)
> A B C D
> 2013-01-01 1.624345 -0.611756 -0.528172 -1.072969
> 2013-01-02 0.865408 -2.301539 1.744812 -0.761207
> 2013-01-03 0.319039 -0.249370 1111.000000 -2.060141
> 2013-01-04 -0.322417 -0.384054 1.133769 -1.099891
> 2013-01-05 -0.172428 -0.877858 0.042214 0.582815
> 2013-01-06 -1.100619 1.144724 0.901591 0.502494
df.loc['2013-01-03', 'D'] = 2222
print(df)
> A B C D
> 2013-01-01 1.624345 -0.611756 -0.528172 -1.072969
> 2013-01-02 0.865408 -2.301539 1.744812 -0.761207
> 2013-01-03 0.319039 -0.249370 1111.000000 2222.000000
> 2013-01-04 -0.322417 -0.384054 1.133769 -1.099891
> 2013-01-05 -0.172428 -0.877858 0.042214 0.582815
> 2013-01-06 -1.100619 1.144724 0.901591 0.502494
df[df.A < 0] = 0
print(df)
> A B C D
> 2013-01-01 1.624345 -0.611756 -0.528172 -1.072969
> 2013-01-02 0.865408 -2.301539 1.744812 -0.761207
> 2013-01-03 0.319039 -0.249370 1.462108 -2.060141
> 2013-01-04 0.000000 0.000000 0.000000 0.000000
> 2013-01-05 0.000000 0.000000 0.000000 0.000000
> 2013-01-06 0.000000 0.000000 0.000000 0.000000
df.A[df.A < 0] = 0
print(df)
> A B C D
> 2013-01-01 1.624345 -0.611756 -0.528172 -1.072969
> 2013-01-02 0.865408 -2.301539 1.744812 -0.761207
> 2013-01-03 0.319039 -0.249370 1.462108 -2.060141
> 2013-01-04 0.000000 -0.384054 1.133769 -1.099891
> 2013-01-05 0.000000 -0.877858 0.042214 0.582815
> 2013-01-06 0.000000 1.144724 0.901591 0.502494
df['E'] = np.nan
print(df)
> A B C D E
> 2013-01-01 1.624345 -0.611756 -0.528172 -1.072969 NaN
> 2013-01-02 0.865408 -2.301539 1.744812 -0.761207 NaN
> 2013-01-03 0.319039 -0.249370 1.462108 -2.060141 NaN
> 2013-01-04 0.000000 -0.384054 1.133769 -1.099891 NaN
> 2013-01-05 0.000000 -0.877858 0.042214 0.582815 NaN
> 2013-01-06 0.000000 1.144724 0.901591 0.502494 NaN
df['G'] = pd.Series([1,2,3,4,5,6], index=pd.date_range('20130101', periods=6))
print(df)
> A B C D E G
> 2013-01-01 1.624345 -0.611756 -0.528172 -1.072969 NaN 1
> 2013-01-02 0.865408 -2.301539 1.744812 -0.761207 NaN 2
> 2013-01-03 0.319039 -0.249370 1.462108 -2.060141 NaN 3
> 2013-01-04 0.000000 -0.384054 1.133769 -1.099891 NaN 4
> 2013-01-05 0.000000 -0.877858 0.042214 0.582815 NaN 5
> 2013-01-06 0.000000 1.144724 0.901591 0.502494 NaN 6
END