zoukankan      html  css  js  c++  java
  • 【爬取新浪股票实时数据--tushare】

    # -*- coding: utf-8 -*-
    """
    Created on Sat Apr  7 03:15:00 2018
    
    @author: Administrator
    """
    
    import pandas as pd
    import numpy as np 
    import tushare as ts 
    import time
    import threading
    
    class sina_real_data:
        def __init__(self):
            print('sina_real_data 类')
            self.sina={}
        # 获取股票实时数据
        def sina_(self,code):
            df=pd.DataFrame()
            N=len(code)
            max_num=891
            M=int(N/max_num)+1
            for i in range(M):
                t=time.time()
                N1=i*max_num
                N2=(i+1)*max_num-1
                if i==M:
                    dataT=ts.get_realtime_quotes(code[N1:])
                else:    
                    dataT=ts.get_realtime_quotes(code[N1:N2+1])
                if len(df)==0:
                    df=dataT
                else:
                    df=pd.concat([df,dataT],ignore_index=True)
                t=round(time.time()-t,3)
                #print('---i='+str(i)+';N1='+str(N1)+';N2='+str(N2)+';耗时:'+str(t))
            return df
        def sina_hand(self,code):
            pass
            df=self.sina_(code)
            df['price']=df['price'].apply(float)
            df['volume']=df['volume'].apply(float)
            
            date=df['date'].values[-1]+' '+df['time'].values[-1]
            
            # 为了测试
            t=time.localtime(time.time())
            date=time.strftime("%Y-%m-%d %H:%M:%S",t)
                
                
            date=pd.to_datetime(date)
            price=list(np.round( df['price'].values,2))
            volume=np.round( df['volume'].values,0)
            if 'volume_np' not in self.sina:
                self.sina['volume_np']=volume
                if t.tm_hour==9 and t.tm_min==30:
                    pass 
                else:
                    volume=np.zeros(len(volume)) 
            else:
                volumeL=volume
                volume=volume-self.sina['volume_np']
                self.sina['volume_np']=volumeL
            volume=list(volume)
            
            price=price+[date]
            volume=volume+[date]
            
            if 'price' not in self.sina:
                self.sina['price']=[price]
                self.sina['volume']=[volume]
            else:
                self.sina['price'].append(price)
                self.sina['volume'].append(volume)
                
                
            if len(self.sina['price'])>200:
                self.sina['price']=self.sina['price'][-150:]
                self.sina['volume']=self.sina['volume'][-150:]
            self.sina_1_min()
        def sina_1_min(self):
            if len(self.sina['price'])<60:
                return 
            price=self.sina['price']
            dfA=pd.DataFrame(price)
            dfA.set_index(dfA.columns[-1], inplace=True)
            volume=self.sina['volume']
            dfB=pd.DataFrame(volume)
            dfB.set_index(dfB.columns[-1], inplace=True)
            
            periodS = '1min'
            kdata={}
            kdata['open'] = dfA.resample(periodS).first()
            kdata['high'] = dfA.resample(periodS).max()
            kdata['low'] = dfA.resample(periodS).min()
            kdata['close'] = dfA.resample(periodS).last()
            kdata['volume'] = dfB.resample(periodS).sum()
            self.sina['kdata']=kdata
        # 定时器启动收集数据
        def sina_run(self,code):
            while True:
                time.sleep(3)
                t=time.localtime(time.time())
                ts=time.strftime("%Y-%m-%d %H:%M:%S",t)
                # print(ts+'  定时器运行')
                try:
                    self.sina_hand(code)
                            
                    pass
                except:
                    pass
        
        def sina_run_start(self,code):
            # 启动策略线程 
            tt = threading.Thread(target=self.sina_run, args=(code,))
            tt.start()
            
    if __name__=='__main__':
        self=sina_real_data()
        
        
        stocklist=pd.read_hdf('Z:/data/stock_data_py/list/stocklist.h5')
        stocklist['codenum']=stocklist['code'].apply(lambda x:x[7:9].lower()+x[:6])
        code=list(stocklist['codenum'].values)
        tickdata = self.sina_(code)
        
        
        
        self.sina_run_start(code)
        
        t=time.time()
        df=self.sina_(code)
        t=round(time.time()-t,3)
        print('耗时:'+str(t))
        df['bid']=df['bid'].apply(float)
        df['ask']=df['ask'].apply(float)
        a=df[ ( df['bid']<0.001) & ( df['ask']<0.001) ]
        
        data=self.sina
        
  • 相关阅读:
    [ACM训练] 数据结构----树、二叉树----c++ && python
    [机器学习] ——KNN K-最邻近算法
    [Python学习] Linux环境下的Python配置,必备库的安装配置
    [Python学习] python 科学计算库NumPy—矩阵运算
    [Python学习] python 科学计算库NumPy—tile函数
    [机器学习] ——初识及实践选择
    [机器学习] 虚拟机VMware中使用Ubuntu的联网问题
    使用ScribeFire,Windows Live Writer 2012和Office Word 2013 发布文章到博客园全面总结
    工作常用 随笔记录
    JavaScript取子串方法slice,substr,substring对比表
  • 原文地址:https://www.cnblogs.com/fyandy/p/9613437.html
Copyright © 2011-2022 走看看