#Stats
# shift 这玩意儿有啥用??? s = pd.Series([1,5,np.nan], index=dates).shift(0) # s1 = pd.Series([1,5,np.nan], index=dates).shift(1) # s2 = pd.Series([1,5,np.nan], index=dates).shift(2) # print(s) # print(s1) # print(s2) # 2018-01-16 1.0 # 2018-01-17 5.0 # 2018-01-18 NaN # Freq: D, dtype: float64 # 2018-01-16 NaN # 2018-01-17 1.0 # 2018-01-18 5.0 # Freq: D, dtype: float64 # 2018-01-16 NaN # 2018-01-17 NaN # 2018-01-18 1.0 # Freq: D, dtype: float64 # print(df) # print(df.sub(s, axis='index')) # "Wise subtraction" # A B C D # 2018-01-16 -1.809723 0.342129 2.048727 0.995959 # 2018-01-17 0.871955 1.960730 0.368855 0.459528 # 2018-01-18 -0.483717 0.031247 0.619609 -0.712104 # A B C D # 2018-01-16 -2.809723 -0.657871 1.048727 -0.004041 # 2018-01-17 -4.128045 -3.039270 -4.631145 -4.540472 # 2018-01-18 NaN NaN NaN NaN
/
# Applying functions to the data # print(df) # print(df.apply(np.cumsum)) # 应用 numpy 的函数 cumsum 对每列累计求和 # A B C D # 2018-01-16 1.516139 0.501701 0.624571 -1.270804 # 2018-01-17 -0.223673 -0.092153 0.782620 -2.073206 # 2018-01-18 0.844318 -1.180269 0.994821 -1.372318 # A B C D # 2018-01-16 1.516139 0.501701 0.624571 -1.270804 # 2018-01-17 1.292466 0.409548 1.407191 -3.344010 # 2018-01-18 2.136784 -0.770721 2.402013 -4.716328
/
# Histogramming(直方图化) ps:就是把每个值出现的次数统计出来 # s = pd.Series(np.random.randint(0, 7, size=10)) # print(s) # print(s.value_counts()) # 0 1 # 1 4 # 2 6 # 3 2 # 4 4 # 5 2 # 6 3 # 7 2 # 8 1 # 9 5 # dtype: int32 # 2 3 # 4 2 # 1 2 # 6 1 # 5 1 # 3 1 # dtype: int64
/
# String Methods # s = pd.Series(['A', 'B', 'C', 'Aaba', 'Baca', np.nan, 'CABA', 'dog', 'cat']) # print(s.str.lower()) # 0 a # 1 b # 2 c # 3 aaba # 4 baca # 5 NaN # 6 caba # 7 dog # 8 cat # dtype: object