# -*- encoding:utf-8 -*- # Copyright (c) 2015 Shiye Inc. # All rights reserved. # # Author: ldq <liangduanqi@shiyejinrong.com> # Date: 2019/2/12 10:07 import numpy as np import pandas as pd dates = pd.date_range("20190101", periods=5) ''' DatetimeIndex(['2019-01-01', '2019-01-02', '2019-01-03', '2019-01-04', '2019-01-05', '2019-01-06'], dtype='datetime64[ns]', freq='D') ''' df = pd.DataFrame(np.random.randn(5, 4), index=dates, columns=["a", "b", "c", "d"]) ''' a b c d 2019-01-01 -0.406321 -0.518128 -0.151546 1.438366 2019-01-02 -0.738235 0.400646 1.337277 1.393154 2019-01-03 1.646115 -0.073540 0.644506 0.987226 2019-01-04 -1.270745 -1.333457 -1.571356 -0.051486 2019-01-05 -0.075171 2.424032 -0.274433 1.205959 ''' df1 = pd.DataFrame(np.arange(12).reshape(3, 4)) ''' 0 1 2 3 0 0 1 2 3 1 4 5 6 7 2 8 9 10 11 ''' data2 = { "a": 1, "b": pd.Timestamp("20190101"), "c": pd.Series(1, index=range(4), dtype=np.float64), "d": np.array([3] * 4, dtype=np.int32), "e": pd.Categorical(["test", "train", "test", "train"]), "f": "foo", "g": pd.date_range("20020205",periods=4), } df2 = pd.DataFrame(data2) ''' a b c d e f g 0 1 2019-01-01 1.0 3 test foo 2002-02-05 1 1 2019-01-01 1.0 3 train foo 2002-02-06 2 1 2019-01-01 1.0 3 test foo 2002-02-07 3 1 2019-01-01 1.0 3 train foo 2002-02-08 ''' columns1 = df2.columns ''' 所有列 Index(['a', 'b', 'c', 'd', 'e', 'f', 'g'], dtype='object') ''' index1 = df2.index ''' RangeIndex(start=0, stop=4, step=1) ''' values1 = df2.values ''' [[1 Timestamp('2019-01-01 00:00:00') 1.0 3 'test' 'foo' Timestamp('2002-02-05 00:00:00')] [1 Timestamp('2019-01-01 00:00:00') 1.0 3 'train' 'foo' Timestamp('2002-02-06 00:00:00')] [1 Timestamp('2019-01-01 00:00:00') 1.0 3 'test' 'foo' Timestamp('2002-02-07 00:00:00')] [1 Timestamp('2019-01-01 00:00:00') 1.0 3 'train' 'foo' Timestamp('2002-02-08 00:00:00')]] ''' describe1 = df2.describe() ''' 数据简单统计 a c d count 4.0 4.0 4.0 mean 1.0 1.0 3.0 std 0.0 0.0 0.0 min 1.0 1.0 3.0 25% 1.0 1.0 3.0 50% 1.0 1.0 3.0 75% 1.0 1.0 3.0 max 1.0 1.0 3.0 ''' transpose1 = df2.T ''' 数据翻转 0 ... 3 a 1 ... 1 b 2019-01-01 00:00:00 ... 2019-01-01 00:00:00 c 1 ... 1 d 3 ... 3 e test ... train f foo ... foo g 2002-02-05 00:00:00 ... 2002-02-08 00:00:00 [7 rows x 4 columns] ''' df2_sort_index = df2.sort_index(axis=0, ascending=False) ''' 对行和列的索引进行排序 a b c d e f g 3 1 2019-01-01 1.0 3 train foo 2002-02-08 2 1 2019-01-01 1.0 3 test foo 2002-02-07 1 1 2019-01-01 1.0 3 train foo 2002-02-06 0 1 2019-01-01 1.0 3 test foo 2002-02-05 ''' df2_sort_values = df2.sort_values(by='g', ascending=False) ''' 根据值排序 a b c d e f g 3 1 2019-01-01 1.0 3 train foo 2002-02-08 2 1 2019-01-01 1.0 3 test foo 2002-02-07 1 1 2019-01-01 1.0 3 train foo 2002-02-06 0 1 2019-01-01 1.0 3 test foo 2002-02-05 '''