zoukankan      html  css  js  c++  java
  • 研究 -- 很多时候,就是证伪

    有个想法:
    用期货交易数据的 10个tik 信息,去检验3分钟后,价格波动是否有关系。  或者说神经网络能否识别。  

    结果:
    花了两天时间,写代码,提取数据, 写网络模型, 训练。
    证明结果:
    1.网络模型不能有效识别 10个tik中的潜在的特征,并作出有效判断。 
    2.或者说  3分钟后的结果,与10个tik中的特征 是无关的。 


    代码(数据提取):

    # encoding: UTF-8
    
    import pandas as pd
    import numpy as np
    import  time
    import os
    import sys
    import copy
    
    file = '/home/hylas/dev/data/ru05_20171208.csv'
    
    
    # file ='d:/test/ru09_20171219.csv'
    
    # 本程序主要功能就是生成 tik10x3miny    X,y 数据用于网络预测,  每天的数据提出出X,y 保存到 .h5 文件
    #  两组数据, 一组是  5tik , 3分钟后的dim 做为 Y  , dim >=10 为0, <=-10 为1, 其它过滤
    #             一组是 10tik , 3分钟后的dim 做为 Y 同上
    
    # tik 定义  [index, price, vol, amount, bidprice, bvol, askprice,askvol, openvol, closevol, ,type1,type2 ]
    # X 定义   tik x5   or  tik x10
    # Y 定义   dim >=10 为0, <=-10 为1, 其它过滤
    
    
    class tik10x3min():
    
        srcpath = None
        resultpath ='./temp/'
        tikcount=0
        tiklist=[]
    
        #tiklist_df = None
        Xy_file =[]
        curFile = None
        Xdata=None
        ydata=None
    
        def __init__(self):
            pass
    
        def initData(self):
            self.tikcount = 0
            self.tiklist = []
            self.yDimList =[]
            self.Xdata = None
            self.ydata = None
    
            self.Xy_file = []
            self.curFile = None
            pass
    
        #   0     1       2    3        4        5     6         7      8         9         10      11
        # [index, price, vol, amount, bidprice, bvol, askprice, askvol, openvol, closevol,, type1, type2]
        #计算  openvol, closevol,, type1, type2
        def satype(self,lasttik, curtik ):
            #newtik = copy.deepcopy( curtik )
            if( curtik[1] >=  lasttik[6]) :
                curtik[10] = 1
            if (curtik[1] <= lasttik[4]):
                curtik[10] = -1
            curtik[8]  =   (curtik[2] + curtik[3] )/2
            curtik[9] = (curtik[2] - curtik[3]) / 2
    
            if( curtik[8] == 0 ): #双平
                curtik[11] = 1
    
            if( 0 == curtik[9] ): #双开
                curtik[11] = 2
    
            if( curtik[10] == 1 and  curtik[8] > curtik[9] ): #多开
                curtik[11] = 3
    
            if( curtik[10] == 1 and  curtik[8] < curtik[9] ): #空平
                curtik[11] = 4
    
            if( curtik[10] == 1 and  curtik[8] == curtik[9] ): #多换
                curtik[11] = 5
    
            if( curtik[10] == -1 and  curtik[8] > curtik[9] ): #空开
                curtik[11] = 6
    
            if( curtik[10] == -1 and  curtik[8] < curtik[9] ): #多平
                curtik[11] = 7
    
            if( curtik[10] == -1 and  curtik[8] == curtik[9] ): #空换
                curtik[11] = 8
    
            return curtik
    
            pass
    
        def dotik(self,tik):
    
            pass
    
        def dotiklist( self ):
    
            data = np.array( self.tiklist )
            print data.shape
    
            print 'for tiklist: '
    
            for i in range(10, data.shape[0]  ):
                y = self.yDimList[i]
                if( y>-10 and  y <10 ):
                    continue
                #print y
                #print y
                y = y/abs(y)
                x_data_rc = self.tiklist[ i-10:i ]
                x_data_rc = np.array( x_data_rc )
                lastprice = self.tiklist[ i ][1]
                #print x_data_rc
                opNumber = np.array([ 0,lastprice,0,0,lastprice,0,lastprice,0,0,0,0,0 ], dtype=int)
                x_data_rc = x_data_rc - opNumber
                #print x_data_rc
                x_data_rc = x_data_rc.reshape( x_data_rc.shape[0]*x_data_rc.shape[1] )
                #print x_data_rc
    
                #self.Xdata.append( x_data_rc.tolist() )
                #self.ydata.append( [y] )
                y_data_rc = np.array( [y] )
                if(self.Xdata is None):
                    self.Xdata = x_data_rc #np.array( x_data_rc.tolist()  )
                    self.ydata = y_data_rc
                else:
                    self.Xdata =  np.vstack((self.Xdata , x_data_rc))
                    self.ydata = np.vstack((self.ydata,  y_data_rc  ))
    
                pass
            if(self.Xdata is None):
            return 
            print  self.Xdata.shape
            print  self.ydata.shape
            print self.Xdata[0:10]
            print self.ydata[0:20]
    
    
            pass
    
        def dofile(self, file):
            self.initData()
            #self.bsM.init()
            try:
                df = pd.read_csv(file, header=0, encoding='gbk')
            except Exception, e:
                return
            self.curFile = file
            self.tiklist_df = df
    
            data = np.array(df)
            print data.shape
    
            print 'for file: '
    
            for i in range(1, data.shape[0] - 360 ):
    
                lasttik = [i-1,data[i-1, 3], data[i-1, 5], data[i-1, 7], data[i-1, 8],data[i-1, 9],data[i-1, 10],data[i-1, 11], 0, 0, 0, 0 ]
                curtik = [i,data[i, 3], data[i, 5], data[i, 7], data[i, 8],data[i, 9],data[i, 10],data[i, 11],0,0,0,0 ]
                curtik = self.satype(lasttik, curtik )
                self.tiklist.append(  curtik )
                self.yDimList.append(  (data[i+360, 3]  -  data[i, 3] )   /5  )
    
                #self.dotik(tik)
    
                #[index, price, vol, amount, bidprice, bvol, askprice, askvol, openvol, closevol,, type1, type2]
    
            pass
    
            self.dotiklist()
        if(self.Xdata is None):
            return 
    
            #保存到文件
            srcfile = os.path.basename( file )
            fileflag = srcfile[0:-4]
            destFile = self.resultpath + 'tik10x3min_X_'+ fileflag+'.h5'
    
            df = pd.DataFrame( self.Xdata )  # X.reshape( X.shape[0], 120*120   )
            print destFile
            df.to_hdf(destFile, 'data')
    
            df = pd.DataFrame(self.ydata)
            df.to_hdf(destFile, 'label')
            self.Xy_file.append( destFile )
    
        def setPath(self,srcpath, resultpath):
            self.srcpath = srcpath
            self.resultpath = resultpath
            pass
        
        def dopath(self, srcpath, resultpath ):
            self.setPath(  srcpath, resultpath)
            rootdir = srcpath
            list = os.listdir(rootdir)  # 列出文件夹下所有的目录与文件
    
            for i in range(0, len(list)):
                print ('%d / %d' % (i, len(list)))
                path = os.path.join(rootdir, list[i])
                if os.path.isfile(path) == False:
                        continue
                # 你想对文件的操作
                print path
                self.dofile( path )
            pass
    
            xy_df = pd.DataFrame( self.Xy_file  )
            xy_df.to_csv( resultpath +'data.txt')
            pass
    
    def test():
        data = [ [1,2,3],[4,5,6],[7,8,9] ]
        xdata = np.array( data  )
        print xdata
    
    
        a2 = np.array([1, 1, 3], dtype=int)
        xdata = xdata - a2
        print xdata
    
        pass
    
    
    if __name__ == "__main__":
        sys_code_type = sys.getfilesystemencoding()
    
        test()
    
        model = tik10x3min()
        #model.dofile( file )
        # /home/hylas/dev/data/ru/20171205
        #model.dopath('/home/hylas/dev/data/ru/20171205/', './temp/')
        #model.dopath('/home/hylas/dev/data2/futuretik/ru/ru2015/', '/home/hylas/dev/data2/futuretik/ru/goodmin/ru2015X_goodmin_tik/')
        model.dopath('/home/hylas/dev/data2/futuretik/ru/ru2015/','/home/hylas/dev/data2/futuretik/ru/tik10x3min/')
        # /home/hylas/dev/data2/futuretik/ru/goodmin/ru2017
    
    
        #model.dopath('D:/test/rufile/','D:/test/result/')



    代码(网络训练):

    # encoding: UTF-8
    
    from sklearn.datasets import fetch_mldata
    import pandas as pd
    import numpy as np
    import  time
    
    import sys
    sys.path.append("/home/hylas/dev/py/project/lib/hyNN/")
    #sys.path.append("../../lib/hyNN/")
    #sys.path.append("..")
    import tool
    from tool.dataxdo import *
    from tool.dfdo import *
    from tool.datadraw import *
    from tool.imgdo import *
    
    from ML.CNN import *
    from ML.MLP import *
    from ML.LSTMer import *
    
    #sys.path.append("/home/hylas/dev/py/project/lib/")
    #import common
    #from common.bsMonitor import *
    
    import os
    import sys
    from sklearn import datasets
    from keras.utils import np_utils
    
    
    
    
    #读取文件, 形成X,y
    #把X,y 放到 MLP   LS他M 里面识别
    
    
    class runTik10x3miny():
        X = None
        y = None
    
        def __init__(self):
            print 'runTik10x3miny init'
    
            pass
    
        def dofile(self, path):
            df_X = pd.read_hdf(path, 'data')
            df_y = pd.read_hdf(path, 'label')
    
            #print df_y
            df_y[[0]] = df_y[[0]].astype(int)
    
            df_y[0] = df_y[0].map({ -1:0,1:1 })
    
            #print df_X[0:100]
            #print df_y
            npX = np.array(df_X)
            npY = np.array(df_y)
    
            if(self.X is None ):
                self.X = npX
                self.y = npY
            else:
                if( self.X.shape[1] != npX.shape[1] ):
                    print ' self.X.shape[1] != npX.shape[1]  '
                    print self.X.shape[1], npX.shape[1]
                    return
                    pass
                self.X = np.vstack((self.X , npX ))
                self.y = np.vstack((self.y,  npY ))
            pass
    
    
        def loadXyFromDiskPath(self, datapath ):
    
            rootdir = datapath
            list = os.listdir(rootdir)  # 列出文件夹下所有的目录与文件
    
            for i in range(0, len(list)):
                print ('%d / %d' % (i, len(list)))
                path = os.path.join(rootdir, list[i])
                if os.path.isfile(path) == False:
                        continue
                fileEx = path[-3:]
                #print fileEx
                if( '.h5' != fileEx ):
                    continue
                # 你想对文件的操作
                print path
                self.dofile( path )
                #break
            pass
            pass
            return self.X, self.y
    
    
        def makedata(self):
            destFile = '/home/hylas/dev/data2/futuretik/ru/modeh5data/tik10x3miny.h5'
            if (os.path.exists(destFile) == True  ):
                X = pd.read_hdf(destFile, 'data')
                y = pd.read_hdf(destFile, 'label')
    
                X = np.array(X)
                y = np.array(y)
                #X.reshape(X.shape[0], 120 , 120, 1)
    
            else:
    
                dd = dfdo()
                X, y = self.loadXyFromDiskPath('/home/hylas/dev/data2/futuretik/ru/tik10x3min/')
                print  y[0:100]
                y =  np_utils.to_categorical(y, num_classes=2)
                X, y = dd.datadengfen(X, y)
    
                df = pd.DataFrame( X )  #X.reshape( X.shape[0], 120*120   )
                df.to_hdf(destFile, 'data')
    
                df = pd.DataFrame(y)
                df.to_hdf(destFile, 'label')
    
            return X,y
            pass
    
    
        def do(self ):
            model = MLP()
    
            X,y = self.makedata()
    
    
            print X.shape
            print y.shape
            print X[0:10]
            print y[0:10]
            #X = X.reshape( X.shape[0], 120*120   )
            model.simple_result(X,y)
            #model.simple_result(X, y, input_dim=(120, 120, 1), nClass=2)
            pass
    
        def do2(self ):
            model = LSTMer()
    
            X,y = self.makedata()
    
    
            print X.shape
            print y.shape
            print X[0:10]
            print y[0:10]
            #X = X.reshape( X.shape[0], 120*120   )
            model.simple_result(X,y,timesteps =10, data_dim=12)
            #model.simple_result(X, y, input_dim=(120, 120, 1), nClass=2)
            pass
    
    if __name__ == "__main__":
        sys_code_type = sys.getfilesystemencoding()
        run = runTik10x3miny()
        run.do()



    训练结果:

    1263074/1263074 [==============================] - 22s - loss: 0.6923 - acc: 0.5092 - val_loss: 0.6924 - val_acc: 0.5133
    Epoch 67/68
    1263074/1263074 [==============================] - 22s - loss: 0.6924 - acc: 0.5097 - val_loss: 0.6924 - val_acc: 0.5117
    Epoch 68/68
    1263074/1263074 [==============================] - 22s - loss: 0.6923 - acc: 0.5093 - val_loss: 0.6921 - val_acc: 0.5102
    evaluate acc: 
    150144/155936 [===========================>..] - ETA: 0s[0.6920651392649998, 0.50743253642520003]


    欢迎讨论(QQ群):   375129936

  • 相关阅读:
    lnmp分离及其迁移数之一---数据库迁移
    lnmp wordpress...
    LNMP安装
    rpm 强制卸载
    ss ifconfig工具
    nginx--日志
    nginx--模块2--基于用户
    python-网络编程
    基本数据之-字典
    Python【day 9】函数入门1
  • 原文地址:https://www.cnblogs.com/xiaoxuebiye/p/8315622.html
Copyright © 2011-2022 走看看