1 >>> a=pd.Series([1,2],index=['a','b']) 2 >>> a 3 a 1 4 b 2 5 dtype: int64 6 >>> b=pd.Series(['b','a']) 7 >>> b 8 0 b 9 1 a 10 dtype: object
1 >>> b.index 2 RangeIndex(start=0, stop=2, step=1) 3 >>> b.values 4 array(['b', 'a'], dtype=object) 5 >>> a/2 6 a 0.5 7 b 1.0 8 dtype: float64
1 >>> dic={'zhang':1,'li':2} 2 >>> d=pd.Series(dic) 参数的形式是字典,numpy中参数是列表 3 >>> d 4 li 2 5 zhang 1 6 dtype: int64 7 >>> frame=pd.DataFrame('name':['zhang','li'],'age':[12,13],'addr':['beijing','shanghai']) 8 SyntaxError: invalid syntax 9 >>> dic={'name':['zhang','li'],'age':[12,13],'addr':['beijing','shanghai']} 10 >>> frame=pd.DataFrame(dic) 参数是字典 11 >>> frame 12 addr age name 13 0 beijing 12 zhang 14 1 shanghai 13 li 15 >>> frame.columns index,columns两个关键字属性 16 Index(['addr', 'age', 'name'], dtype='object') 17 >>> frame.index 18 RangeIndex(start=0, stop=2, step=1) 19 >>> frame2=pd.DataFrame(np.arange(16).reshape((4,4)),colums=['name','age','addr'],index=['a','b','c'])
1 >>> frame2 2 like name age addr 3 a 0 1 2 3 4 b 4 5 6 7 5 c 8 9 10 11 6 d 12 13 14 15 7 >>> frame.name 指定列名字 8 0 zhang 9 1 li 10 Name: name, dtype: object
>>> frame2.ix[2] 查看某行 ix【】
like 8
name 9
age 10
addr 11
Name: c, dtype: int32
>>> frame2.ix[2,3]
11
1 >>> frame2.index.name='id';frame2.columns.name='item' 对标头的name属性指定 2 >>> frame2 3 item like name age addr 4 id 5 a 0 1 2 3 6 b 4 5 6 7 7 c 8 9 10 11 8 d 12 13 14 15 9 >>> frame2['new']=12 添加新列 10 >>> frame2 11 item like name age addr new 12 id 13 a 0 1 2 3 12 14 b 4 5 6 7 12 15 c 8 9 10 11 12 16 d 12 13 14 15 12 17 >>> frame2['new'] 18 id 19 a 12 20 b 12 21 c 12 22 d 12 23 Name: new, dtype: int64
1 >>> frame2['new']['b'] 根据列行找到元素 2 12 3 >>> frame2.isin([2]) 4 item like name age addr new 5 id 6 a False False True False False 7 b False False False False False 8 c False False False False False 9 d False False False False False 10 >>> del frame['new']
1 >>> del frame2['new'] 删除列 2 >>> frame2 3 item like name age addr 4 id 5 a 0 1 2 3 6 b 4 5 6 7 7 c 8 9 10 11 8 d 12 13 14 15
1 >>> frame2[frame2<8] 找到小于8的所有元素 2 item like name age addr 3 id 4 a 0.0 1.0 2.0 3.0 5 b 4.0 5.0 6.0 7.0 6 c NaN NaN NaN NaN 7 d NaN NaN NaN NaN 8 >>> frame.T 对表进行转置 9 0 1 10 addr beijing shanghai 11 age 12 13 12 name zhang li 13 >>> frame2.T 14 id a b c d 15 item 16 like 0 4 8 12 17 name 1 5 9 13 18 age 2 6 10 14 19 addr 3 7 11 15
1 >>> frame2.idxmin() 找到索引的最小值 idxmin() 2 item 3 like a 4 name a 5 age a 6 addr a 7 dtype: object 8 >>> frame2.idxmax() 9 item 10 like d 11 name d 12 age d 13 addr d 14 dtype: object 15 >>> frame2.index.is_unique 16 True 17 >>> frame2.reindex(['one','two','three','four']) 18 item like name age addr 19 id 20 one NaN NaN NaN NaN 21 two NaN NaN NaN NaN 22 three NaN NaN NaN NaN 23 four NaN NaN NaN NaN 24 >>> frame2 25 item like name age addr 26 id 27 a 0 1 2 3 28 b 4 5 6 7 29 c 8 9 10 11 30 d 12 13 14 15
1 >>> frame2.drop('a') 删除行根据索引 2 item like name age addr 3 id 4 b 4 5 6 7 5 c 8 9 10 11 6 d 12 13 14 15
>>> frame2.drop(['name'],axis=1) 删除列
item like age addr
id
a 0 2 3
b 4 6 7
c 8 10 11
d 12 14 15
1 >>> frame2 2 item like name age addr 每行-series 3 id 4 a 0 1 2 3 5 b 4 5 6 7 6 c 8 9 10 11 7 d 12 13 14 15 8 >>> ser=[1,2,3,4] 9 >>> frame2-ser 10 item like name age addr 11 id 12 a -1 -1 -1 -1 13 b 3 3 3 3 14 c 7 7 7 7 15 d 11 11 11 11
1 >>> np.sqrt(frame2) 求所有的sqrt 2 item like name age addr 3 id 4 a 0.000000 1.000000 1.414214 1.732051 5 b 2.000000 2.236068 2.449490 2.645751 6 c 2.828427 3.000000 3.162278 3.316625 7 d 3.464102 3.605551 3.741657 3.872983
1 >>> f=lambda x:x.max()-x.min() 对frame f(x)默认是对一列的所有值中寻找 2 >>> frame2.apply(f) 3 item 4 like 12 5 name 12 6 age 12 7 addr 12 8 dtype: int64 9 >>> def f(x): 10 return pd.Series([x.min(),x.max()],index=['min','max']) 11 12 >>> frame.apply(f) 13 addr age name 14 min beijing 12 li 15 max shanghai 13 zhang 16 >>> frame2.sum() 17 item 18 like 24 19 name 28 20 age 32 21 addr 36 22 dtype: int64 23 >>> frame.mean() 24 age 12.5 25 dtype: float64 26 >>> frame2.mean() 27 item 28 like 6.0 29 name 7.0 30 age 8.0 31 addr 9.0 32 dtype: float64 33 >>> frame2.describe() 34 item like name age addr 35 count 4.000000 4.000000 4.000000 4.000000 36 mean 6.000000 7.000000 8.000000 9.000000 37 std 5.163978 5.163978 5.163978 5.163978 38 min 0.000000 1.000000 2.000000 3.000000 39 25% 3.000000 4.000000 5.000000 6.000000 40 50% 6.000000 7.000000 8.000000 9.000000 41 75% 9.000000 10.000000 11.000000 12.000000 42 max 12.000000 13.000000 14.000000 15.000000 43 >>> frame.describe() 44 age 45 count 2.000000 46 mean 12.500000 47 std 0.707107 48 min 12.000000 49 25% 12.250000 50 50% 12.500000 51 75% 12.750000 52 max 13.000000 53 >>> frame2.sort_index() 54 item like name age addr 55 id 56 a 0 1 2 3 57 b 4 5 6 7 58 c 8 9 10 11 59 d 12 13 14 15 60 >>> frame.sort_index() 以索引进行排序 61 addr age name 62 0 beijing 12 zhang 63 1 shanghai 13 li 64 >>> frame 65 addr age name 66 0 beijing 12 zhang 67 1 shanghai 13 li 68 >>> ser.sort_index() 69 Traceback (most recent call last): 70 File "<pyshell#95>", line 1, in <module> 71 ser.sort_index() 72 AttributeError: 'list' object has no attribute 'sort_index' 73 >>> ser 74 [1, 2, 3, 4, 5]