zoukankan      html  css  js  c++  java
  • 使用TuShare下载历史逐笔成交数据并生成1分钟线

    使用如下代码从TuShare下载沪深300每只股票的历史成交记录并按股票、日期保存到本地。主要是为了以后查询方便快速。

    #-*- coding: utf-8 -*-
    import numpy as np
    import pandas as pd
    import tushare as ts
    import datetime
    import time
    import tushare as ts
    import os
     
    data_dir = '/home/vnpy/share/'  #下载数据的存放路径
     
    #ts.get_sz50s() #获取上证50成份股  返回值为DataFrame:code股票代码 name股票名称
     
    #cal_dates = ts.trade_cal() #返回交易所日历,类型为DataFrame, calendarDate  isOpen
    cal_dates = pd.read_csv(data_dir+'trade_cal.csv') 
    
    #本地实现判断市场开市函数 
    #@date: str类型日期 eg.'2017-11-23'
    def is_open_day(date):
        if date in cal_dates['calendarDate'].values:
            return cal_dates[cal_dates['calendarDate']==date].iat[0,2]==1
        return False
     
     
    #从TuShare获取tick data数据并保存到本地
    #@symbol: str类型股票代码 eg.600030
    #@date: date类型日期
    def get_save_tick_data(symbol, date):
        global sleep_time,data_dir
        res=True
        sleep_time=2
        str_date=str(date)
        dir=data_dir+symbol+'/'+str(date.year)+'/'+str(date.month)
        file=dir+'/'+symbol+'_'+str_date+'.csv'
        if is_open_day(str_date):
            if not os.path.exists(dir):
                os.makedirs(dir)
            if not os.path.exists(file):
                try:
                    d=ts.get_tick_data(symbol,str_date,pause=0.1)
                except IOError, msg:
                    print str(msg).decode('UTF-8')
                    sleep_time=min(sleep_time*2, 128)#每次下载失败后sleep_time翻倍,但是最大128s
                    print 'Get tick data error: symbol: '+ symbol + ', date: '+str_date+', sleep time is: '+str(sleep_time)
                    return res
                else:
                    d.to_csv(file) 
                    #hdf5_file=pd.HDFStore(file, 'w',complevel=4, complib='blosc')
                    #hdf5_file['data']=d
                    #hdf5_file.close()
                    sleep_time=max(sleep_time/2, 2) #每次成功下载后sleep_time变为一半,但是至少2s
                    print "Successfully download and save file: "+file+', sleep time is: '+str(sleep_time)
                    return res
            else:
                print "Data already downloaded before, skip " + file
                res=False
                return res
     
    #获取从起始日期到截止日期中间的的所有日期,前后都是封闭区间
    def get_date_list(begin_date, end_date):
        date_list = []
        while begin_date <= end_date:
            #date_str = str(begin_date)
            date_list.append(begin_date)
            begin_date += datetime.timedelta(days=1)
        return date_list
     
    #获取感兴趣的所有股票信息,这里获取沪深全部股票
    def get_all_stock_id():
        #stock_info=ts.get_hs300s()
        stock_info = pd.read_csv(data_dir+'stock_basics.csv')
        return stock_info['code'].values
    
    # 补全股票代码(6位股票代码)
    # input: int or string
    # output: string
    def getSixDigitalStockCode(code):
        strZero = ''
        for i in range(len(str(code)), 6):
            strZero += '0'
        return strZero + str(code)
    
    #从TuShare下载感兴趣的所有股票的历史成交数据,并保存到本地HDF5压缩文件
    #dates=get_date_list(datetime.date(2017,11,6), datetime.date(2017,11,12))
    dates=get_date_list(datetime.date(2018,1,1), datetime.date(2018,7,9))
    stocks=get_all_stock_id()
    for stock in stocks:
        for date in dates:
           if get_save_tick_data(getSixDigitalStockCode(stock), date):
               time.sleep(sleep_time)
    

      

    因为TuShare并没有提供1分钟线的信息,所以需要根据下载到的每日成交信息生成1分钟线信息。

    代码如下: 其实就是不用for和列,直接 newdf = df.resample ... 保存列头一致就好了

    #-*- coding: utf-8 -*-
    import pandas as pd
    import datetime
    import os
    
    #根据分笔成交数据生成1分钟线
    def gen_min_line(symbol, date):
    global data_dir data_dir = '/home/vnpy/share/' str_date=str(date) dir=data_dir+symbol+'/'+str(date.year)+'/'+str(date.month) tickfile=dir+'/'+symbol+'_'+str_date+'.csv' minfile=dir+'/'+symbol+'_'+str_date+'_1m.csv' print tickfile,minfile if (os.path.exists(tickfile)) and (not os.path.exists(minfile)): df=pd.read_csv(tickfile) print "Successfully read tick file: "+tickfile if df.shape[0]<10: #TuShare即便在停牌期间也会返回tick data,并且只有三行错误的数据,这里利用行数小于10把那些unexpected tickdata数据排除掉 print "No tick data read from tick file, skip generating 1min line" return 0 df['time']=str_date+' '+df['time'] df['time']=pd.to_datetime(df['time']) df=df.set_index('time') price_df=df['price'].resample('1min').ohlc() price_df=price_df.dropna() vols=df['volume'].resample('1min').sum() vols=vols.dropna() vol_df=pd.DataFrame(vols,columns=['volume']) amounts=df['amount'].resample('1min').sum() amounts=amounts.dropna() amount_df=pd.DataFrame(amounts,columns=['amount']) newdf=price_df.merge(vol_df, left_index=True, right_index=True).merge(amount_df, left_index=True, right_index=True) newdf.to_csv(minfile) print "Successfully write to minute file: "+minfile dates=get_date_list(datetime.date(2018,1,1), datetime.date(2018,7,9)) stocks=get_all_stock_id() for stock in stocks: for date in dates: gen_min_line(stock, date)

      

      refer to:https://blog.csdn.net/wqfhenanxc/article/details/78525730

  • 相关阅读:
    在IIS7.5中ASP.NET调用cmd程序拒绝访问决绝方法小记
    WindowsCE project missing Microsoft.CompactFramework.CSharp.targets in Visual Studio 2008
    Windows 10预览版14316开启Bash命令支持
    批量文件重命名工具
    多说使用ua-parser-js显示浏览器和系统信息
    Hexo主题实现多级分类显示
    MS SQL Server 数据库分离-SQL语句
    Windows应用程序快捷方式创建工具
    第三方登录插件.NET版XY.OAuth-CSharp
    Microsoft Visual Studio 2008 未能正确加载包“Visual Web Developer HTML Source Editor Package” | “Visual Studio HTM Editor Package”
  • 原文地址:https://www.cnblogs.com/tewuapple/p/9266660.html
Copyright © 2011-2022 走看看