zoukankan      html  css  js  c++  java
  • pandas基础-Python3

    未完

    for examples:

    example 1:

     1 # Code based on Python 3.x
     2 # _*_ coding: utf-8 _*_
     3 # __Author: "LEMON"
     4 
     5 import pandas as pd
     6 
     7 d = pd.date_range('20170101', periods=7)
     8 aList = list(range(1,8))
     9 
    10 df = pd.DataFrame(aList, index=d, columns=[' '])
    11 df.index.name = 'value'
    12 
    13 print('----------df.index---------')
    14 print(df.index)
    15 
    16 print('---------df.columns---------')
    17 print(df.columns)
    18 
    19 print('----------df.values---------')
    20 print(df.values)
    21 
    22 print('----------df.describe--------')
    23 print(df.describe)
    24 
    25 print('----------information details--------')
    26 print(df.head(2)) #获取开始的n条记录
    27 print(df.tail(3)) #后去最后的n条记录
    28 print(df[3:5])  # df[a:b],获取第a+1至第b-1的记录

    运行结果如下:

     1 ----------df.index---------
     2 DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03', '2017-01-04',
     3                '2017-01-05', '2017-01-06', '2017-01-07'],
     4               dtype='datetime64[ns]', name='value', freq='D')
     5 ---------df.columns---------
     6 Index([' '], dtype='object')
     7 ----------df.values---------
     8 [[1]
     9  [2]
    10  [3]
    11  [4]
    12  [5]
    13  [6]
    14  [7]]
    15 ----------df.describe--------
    16 <bound method NDFrame.describe of              
    17 value        
    18 2017-01-01  1
    19 2017-01-02  2
    20 2017-01-03  3
    21 2017-01-04  4
    22 2017-01-05  5
    23 2017-01-06  6
    24 2017-01-07  7>
    25 ----------information details--------
    26              
    27 value        
    28 2017-01-01  1
    29 2017-01-02  2
    30              
    31 value        
    32 2017-01-05  5
    33 2017-01-06  6
    34 2017-01-07  7
    35              
    36 value        
    37 2017-01-04  4
    38 2017-01-05  5

    example 2:

     1 # Code based on Python 3.x
     2 # _*_ coding: utf-8 _*_
     3 # __Author: "LEMON"
     4 
     5 from pandas import Series, DataFrame
     6 import pandas as pd
     7 
     8 data = {'state': ['Ohino', 'Ohino', 'Ohino', 'Nevada', 'Nevada'],
     9         'year': [2000, 2001, 2002, 2001, 2002],
    10         'pop': [1.5, 1.7, 3.6, 2.4, 2.9]}
    11 
    12 df = DataFrame(data, index=list(range(1, 6)),
    13                columns=['year', 'state', 'pop', 'name'])
    14 print(df)
    15 
    16 print('
    ', '---------------')
    17 print(list(df.ix[3]))
    18 
    19 print('
    ', '---------------')
    20 print(list(df['year']))
    21 
    22 aList = ['1', '2', '3', '4']
    23 bList = ['aa', 'bb', 'cb', 'dd']
    24 cList = ['lemon', 'apple', 'orange', 'banana']
    25 
    26 d = {'num': aList, 'char': bList, 'fruit': cList}
    27 
    28 
    29 df1 = DataFrame(d, index=['a', 'b', 'c', 'd'])
    30 # df2 = DataFrame(bList)
    31 print('
    ', '---------------')
    32 print(df1)
    33 #print(df1.num)
    34 
    35 print('
    ', '---------------')
    36 print(df1.ix['b'])  # 获取索引号为 'b' 的行的数据
    37 
    38 
    39 print('
    ', '---------------')
    40 print(df1.ix[:2, 1:3]) # 以切片形式获取部分数据

    运行结果如下:

     1  year   state  pop name
     2 1  2000   Ohino  1.5  NaN
     3 2  2001   Ohino  1.7  NaN
     4 3  2002   Ohino  3.6  NaN
     5 4  2001  Nevada  2.4  NaN
     6 5  2002  Nevada  2.9  NaN
     7 
     8  ---------------
     9 [2002, 'Ohino', 3.6000000000000001, nan]
    10 
    11  ---------------
    12 [2000, 2001, 2002, 2001, 2002]
    13 
    14  ---------------
    15   char   fruit num
    16 a   aa   lemon   1
    17 b   bb   apple   2
    18 c   cb  orange   3
    19 d   dd  banana   4
    20 
    21  ---------------
    22 char        bb
    23 fruit    apple
    24 num          2
    25 Name: b, dtype: object
    26 
    27  ---------------
    28    fruit num
    29 a  lemon   1
    30 b  apple   2

    example 3 (数据选择-DateFrame.loc()和DateFrame.iloc()) :

     1 # Code based on Python 3.x
     2 # _*_ coding: utf-8 _*_
     3 # __Author: "LEMON"
     4 
     5 from matplotlib.finance import quotes_historical_yahoo_ochl
     6 from datetime import date
     7 import pandas as pd
     8 
     9 today = date.today()
    10 
    11 start =(today.year-4, today.month+11, today.day-1)
    12 end = (today.year-4, today.month+11, today.day+3)
    13 quotes = quotes_historical_yahoo_ochl('AMX', start, end)
    14 # each items in quotes is type of "tuple"
    15 
    16 fields = ['date', 'open', 'close', 'high', 'low', 'volume']
    17 
    18 quotes1 = []
    19 for t in quotes:
    20     t1 = list(t)
    21     quotes1.append(t1)
    22 # each items in quotes1 is type of "list"
    23 
    24 for i in range(0, len(quotes1)):
    25     quotes1[i][0] = date.fromordinal(int(quotes1[i][0]))
    26     # date format is changed
    27 
    28 df = pd.DataFrame(quotes1, index=range(1, len(quotes1)+1), columns=fields)
    29 # df = pd.DataFrame(quotes1, index=['a','b','c','d','e'], columns=fields)
    30 # df = df.drop(['date'], axis=1)
    31 
    32 print(df)
    33 
    34 print(df['close'].mean())  #计算某列的mean值
    35 # print(dict(df.mean())['close'])  #计算某列的mean值
    36 
    37 print(df.sort_values(['open'],ascending = True)) #进行排序,默认(True)是升序
    38 print(df[df.open>=21].date)
    39 
    40 
    41 
    42 # index是整数
    43 print(df.loc[2:5, 'date':'close'])
    44 print(df.loc[[2,5],['open','close']])  
    45 # loc方法在行和列的选择上是标签形式,可以是连续的选择,或者单个行或列的选择
    46 print(df.iloc[1:6,0:4])  #iloc方法以切片形式选取数据
    47 
    48       
    49 # index是标签形式
    50 # print(df.loc['a':'d', 'date':'close'])
    51 # print(df.loc[['b','e'],['open','close']])  
    52 # loc方法在行和列的选择上是标签形式,可以是连续的选择,或者单个行或列的选择
    53 
    54 # 根据判断条件来选择数据
    55 print(df[(df.index>=4) & (df.open>=21)])
    56 
    57 
    58 # DateFrame 的均值
    59 print(df.mean())  # 默认计算每列的均值
    60 print(df.mean(axis=1))  # axis=1是计算每行的均值
    61 
    62 
    63 '''
    64 # 获取多只股票的信息
    65 d1 = (today.year-1, today.month+11, today.day)
    66 
    67 aList = ['BABA', 'KO', 'AMX'] # List of the stock code of companys
    68 
    69 
    70 for i in aList:
    71     q1 = quotes_historical_yahoo_ochl(i, d1, today)
    72     df1 = pd.DataFrame(q1)
    73     print(df1)
    74 '''

    运行结果如下:

     1          date       open      close       high        low     volume
     2 1  2013-12-03  20.999551  21.156955  21.184731  20.795851  5152600.0
     3 2  2013-12-04  20.971773  20.934738  21.064364  20.703261  5174400.0
     4 3  2013-12-05  20.518079  20.545857  21.231027  20.379193  7225600.0
     5 4  2013-12-06  21.166215  20.601411  21.295841  20.536598  9989500.0
     6 20.80974025
     7 20.80974025
     8          date       open      close       high        low     volume
     9 3  2013-12-05  20.518079  20.545857  21.231027  20.379193  7225600.0
    10 2  2013-12-04  20.971773  20.934738  21.064364  20.703261  5174400.0
    11 1  2013-12-03  20.999551  21.156955  21.184731  20.795851  5152600.0
    12 4  2013-12-06  21.166215  20.601411  21.295841  20.536598  9989500.0
    13 4    2013-12-06
    14 Name: date, dtype: object
    15 
    16 runfile('E:/Python/Anaco/test_yahoo.py', wdir='E:/Python/Anaco')
    17          date       open      close       high        low     volume
    18 1  2013-12-03  20.999551  21.156955  21.184731  20.795851  5152600.0
    19 2  2013-12-04  20.971773  20.934738  21.064364  20.703261  5174400.0
    20 3  2013-12-05  20.518079  20.545857  21.231027  20.379193  7225600.0
    21 4  2013-12-06  21.166215  20.601411  21.295841  20.536598  9989500.0
    22 20.80974025
    23          date       open      close       high        low     volume
    24 3  2013-12-05  20.518079  20.545857  21.231027  20.379193  7225600.0
    25 2  2013-12-04  20.971773  20.934738  21.064364  20.703261  5174400.0
    26 1  2013-12-03  20.999551  21.156955  21.184731  20.795851  5152600.0
    27 4  2013-12-06  21.166215  20.601411  21.295841  20.536598  9989500.0
    28 4    2013-12-06
    29 Name: date, dtype: object
    30          date       open      close
    31 2  2013-12-04  20.971773  20.934738
    32 3  2013-12-05  20.518079  20.545857
    33 4  2013-12-06  21.166215  20.601411
    34         open      close
    35 2  20.971773  20.934738
    36 5        NaN        NaN
    37          date       open      close       high
    38 2  2013-12-04  20.971773  20.934738  21.064364
    39 3  2013-12-05  20.518079  20.545857  21.231027
    40 4  2013-12-06  21.166215  20.601411  21.295841
    41          date       open      close       high        low     volume
    42 4  2013-12-06  21.166215  20.601411  21.295841  20.536598  9989500.0
    43 open      2.091390e+01
    44 close     2.080974e+01
    45 high      2.119399e+01
    46 low       2.060373e+01
    47 volume    6.885525e+06
    48 dtype: float64
    49 1    1.030537e+06
    50 2    1.034897e+06
    51 3    1.445137e+06
    52 4    1.997917e+06
    53 dtype: float64
    View Code

    examples 4: 求微软公司(MSFT2015年每月股票收盘价的平均值。

      1 # Code based on Python 3.x
      2 # _*_ coding: utf-8 _*_
      3 # __Author: "LEMON"
      4 
      5 # 求微软公司(MSFT)2015年每月股票收盘价的平均值。
      6 
      7 
      8 #Method 1 (update)
      9 
     10 from matplotlib.finance import quotes_historical_yahoo_ochl
     11 from datetime import date
     12 import pandas as pd
     13 from datetime import datetime
     14 
     15 today = date.today()
     16 fields = ['date', 'open', 'close', 'high', 'low', 'volume']
     17 
     18 start = (today.year - 3, today.month, today.day)
     19 end = today
     20 quotes = quotes_historical_yahoo_ochl('MSFT', start, end)
     21 # each items in quotes is type of "tuple"
     22 
     23 df = pd.DataFrame(quotes, index=range(1, len(quotes) + 1), columns=fields)
     24 
     25 list = df.date.tolist()
     26 list1 = []
     27 for x in list:
     28     x = date.fromordinal(int(x))
     29     y = date.strftime(x, '%Y/%m')
     30     list1.append(y)
     31 
     32 # print(list1)
     33 df1 = df.set_index([list1]).drop('date',axis=1)
     34 # 把日期设置成索引,并删除“date”列
     35 
     36 df2 = df1['2015/01':'2015/12']  #选取2015年的数据
     37 print(df2.groupby(df2.index).close.mean())
     38 # 将数据按index进行聚类分析,并计算收盘价“close”的均值
     39 
     40 
     41 # -----------------------------------------------------
     42 # #Method 1 (old)
     43 #
     44 # from matplotlib.finance import quotes_historical_yahoo_ochl
     45 # from datetime import date
     46 # import pandas as pd
     47 # from datetime import datetime
     48 #
     49 #
     50 # today = date.today()
     51 # fields = ['date', 'open', 'close', 'high', 'low', 'volume']
     52 #
     53 # start2 = (today.year - 3, today.month, today.day)
     54 # end2 = today
     55 # quotes2 = quotes_historical_yahoo_ochl('MSFT', start2, end2)
     56 # # each items in quotes is type of "tuple"
     57 #
     58 # quotes3 = []
     59 # for t in quotes2:
     60 #     t1 = list(t)
     61 #     quotes3.append(t1)
     62 # # each items in quotes1 is type of "list"
     63 #
     64 # for i in range(0, len(quotes3)):
     65 #     quotes3[i][0] = date.fromordinal(int(quotes3[i][0]))
     66 #     # date format is changed
     67 #
     68 # df2 = pd.DataFrame(quotes3, index=range(1, len(quotes3) + 1), columns=fields)
     69 #
     70 # df2['date'] = pd.to_datetime(df2['date'], format='%Y-%m-%d')  # 转化成pandas的日期格式
     71 # # print(df2)
     72 #
     73 # start2015 = datetime(2015,1,1)
     74 # end2015 = datetime(2015,12,31)
     75 # # start2015 = datetime.strptime('2015-1-1', '%Y-%m-%d')
     76 # # # 将'2015-1-1'字符串设置为时间格式
     77 # # end2015 = datetime.strptime('2015-12-31', '%Y-%m-%d')
     78 # # # 将'2015-12-31'字符串设置为时间格式
     79 #
     80 # df1 = df2[(start2015 <= df2.date) & (df2.date <= end2015)]
     81 # # 通过时间条件来选择2015年的记录
     82 #
     83 # permonth1 = df1.date.dt.to_period('M')  #data per month
     84 # g_month1 = df1.groupby(permonth1)
     85 # g_closequotes = g_month1['close']
     86 #
     87 # s_month = g_closequotes.mean()  # s_month is Series class
     88 # s_month.index.name = 'date_index'
     89 #
     90 # print(s_month)
     91 # -----------------------------------------------------
     92 
     93 # =================================================================
     94 # Method 2
     95 
     96 # from matplotlib.finance import quotes_historical_yahoo_ochl
     97 # from datetime import date
     98 #
     99 # import pandas as pd
    100 # today = date.today()
    101 # start = (today.year-3, today.month, today.day)
    102 # quotesMS = quotes_historical_yahoo_ochl('MSFT', start, today)
    103 # attributes=['date','open','close','high','low','volume']
    104 # quotesdfMS = pd.DataFrame(quotesMS, columns= attributes)
    105 #
    106 #
    107 #
    108 # list = []
    109 # for i in range(0, len(quotesMS)):
    110 #     x = date.fromordinal(int(quotesMS[i][0]))
    111 #     y = date.strftime(x, '%y/%m/%d')
    112 #     list.append(y)
    113 # quotesdfMS.index = list
    114 # quotesdfMS = quotesdfMS.drop(['date'], axis = 1)
    115 # list = []
    116 # quotesdfMS15 = quotesdfMS['15/01/01':'15/12/31']
    117 #
    118 # print(quotesdfMS15)
    119 #
    120 # for i in range(0, len(quotesdfMS15)):
    121 #     list.append(int(quotesdfMS15.index[i][3:5])) #get month just like '02'
    122 # quotesdfMS15['month'] = list
    123 # print(quotesdfMS15.groupby('month').mean().close)
    124 # =================================================================

    输出结果如下:

     1 2015/01    43.124433
     2 2015/02    40.956772
     3 2015/03    40.203918
     4 2015/04    41.477685
     5 2015/05    45.472291
     6 2015/06    44.145879
     7 2015/07    43.807541
     8 2015/08    43.838895
     9 2015/09    42.114155
    10 2015/10    47.082882
    11 2015/11    52.252878
    12 2015/12    53.916431
    13 Name: close, dtype: float64
    作者:Lemon
    出处:个人微信公众号:“Python数据之道”(ID:PyDataRoad)和博客园:http://www.cnblogs.com/lemonbit/
    本文版权归作者所有,欢迎转载,但未经作者同意必须保留此段声明,且在文章页面明显位置给出原文出处,否则保留追究法律责任的权利。
  • 相关阅读:
    Python编程 从入门到实践-2变量上
    NX二次开发-基于Winform界面对话框与NXOPEN C#交互的开发(对话框嵌套)
    NX二次开发-UFUN获取投影曲线里的曲线UF_CURVE_ask_proj_curves
    NX二次开发-UFUN获取投影曲线里的曲线UF_MODL_ask_proj_curves
    NX二次开发-UFUN创建投影曲线UF_MODL_create_proj_curves
    NX二次开发-NXOPEN C#项目如何设断点调试代码
    NX二次开发-外部开发模式exe(不打开NX进行后台操作)以及封装exe传参调用
    NX二次开发-工程图模板,标题栏,页码,日期,比例,单位,部件名,等自动更新【转载】
    QTreeWidget 遍历所有子节点(QTreeWidgetItem)【转载】
    NX二次开发-使用NXOPEN C#手工搭建开发环境配置
  • 原文地址:https://www.cnblogs.com/lemonbit/p/6243513.html
Copyright © 2011-2022 走看看