zoukankan      html  css  js  c++  java
  • 科学经得起实践检验-python3.6通过决策树实战精准准确预测今日大盘走势(含代码)

    科学经得起实践检验-python3.6通过决策树实战精准准确预测今日大盘走势(含代码)




    春有百花秋有月,夏有凉风冬有雪;

    若无闲事挂心头,便是人间好时节。

      

      --宋.无门慧开


    不废话了,以下训练模型数据,采用本人发明的极致800实时指数近期的一些实际数据,

    预测采用今日的真实数据

    #coding=utf-8
    __author__ = 'huangzhi'
    
    import math
    import operator
    
    
    def calcShannonEnt(dataset):
        numEntries = len(dataset)
        labelCounts = {}
        for featVec in dataset:
            currentLabel = featVec[-1]
            if currentLabel not in labelCounts.keys():
                labelCounts[currentLabel] = 0
            labelCounts[currentLabel] += 1
    
        shannonEnt = 0.0
        for key in labelCounts:
            prob = float(labelCounts[key]) / numEntries
            shannonEnt -= prob * math.log(prob, 2)
        return shannonEnt
    
    
    def CreateDataSet():
        # dataset = [[1, 1, 'yes'],
        #            [1, 1, 'yes'],
        #            [1, 0, 'no'],
        #            [0, 1, 'no'],
        #            [0, 1, 'no']]
    
        dataset = [[3, 4, 100, 85, 4, 6, 110, 120, 4, 6, 151, 122, 8, 12, 110, 185, ''],
                   [5, 7, 88, 85, 6, 8, 100, 130, 6, 9, 131, 132, 8, 14, 100, 195, ''],
                   [6, 2, 60, 20, 9, 3, 80, 22, 16, 4, 131, 32, 33, 5, 160, 45, ''],
                   [3, 4, 100, 105, 4, 6, 110, 120, 4, 6, 151, 122, 8, 12, 110, 185, ''],
                   [5, 3, 50, 30, 8, 4, 70, 28, 12, 6, 101, 42, 28, 7, 120, 35, ''],
                   [2, 6, 60, 95, 4, 8, 90, 130, 6, 11, 101, 142, 9, 15, 99, 145, ''],
                   [5, 3, 70, 30, 8, 4, 90, 32, 22, 6, 141, 42, 43, 8, 150, 65, ''],
                   [2, 8, 30, 60, 9, 8, 80, 90, 9, 20, 140, 160, 12, 32, 101, 205, '']]
        labels = ['l1', 'l2', 'l3', 'l4', 'l5', 'l6', 'l7', 'l8', 'l9', 'l11', 'l12', 'l13', 'l14', 'l15', 'l16', 'l17']
        return dataset, labels
    
    
    def splitDataSet(dataSet, axis, value):
        retDataSet = []
        for featVec in dataSet:
            if featVec[axis] == value:
                reducedFeatVec = featVec[:axis]
                reducedFeatVec.extend(featVec[axis + 1:])
                retDataSet.append(reducedFeatVec)
    
        return retDataSet
    
    
    def chooseBestFeatureToSplit(dataSet):
        numberFeatures = len(dataSet[0]) - 1
        baseEntropy = calcShannonEnt(dataSet)
        bestInfoGain = 0.0;
        bestFeature = -1;
        for i in range(numberFeatures):
            featList = [example[i] for example in dataSet]
            # print(featList)
            uniqueVals = set(featList)
            # print(uniqueVals)
            newEntropy = 0.0
            for value in uniqueVals:
                subDataSet = splitDataSet(dataSet, i, value)
                prob = len(subDataSet) / float(len(dataSet))
                newEntropy += prob * calcShannonEnt(subDataSet)
            infoGain = baseEntropy - newEntropy
            if (infoGain > bestInfoGain):
                bestInfoGain = infoGain
                bestFeature = i
        return bestFeature
    
    
    def majorityCnt(classList):
        classCount = {}
        for vote in classList:
            if vote not in classCount.keys():
                classCount[vote] = 0
            classCount[vote] = 1
        sortedClassCount = sorted(classCount.iteritems(), key=operator.itemgetter(1), reverse=True)
        return sortedClassCount[0][0]
    
    
    def createTree(dataSet, inputlabels):
        labels = inputlabels[:]
        classList = [example[-1] for example in dataSet]
        if classList.count(classList[0]) == len(classList):
            return classList[0]
        if len(dataSet[0]) == 1:
            return majorityCnt(classList)
        bestFeat = chooseBestFeatureToSplit(dataSet)
        bestFeatLabel = labels[bestFeat]
        myTree = {bestFeatLabel: {}}
        del (labels[bestFeat])
        featValues = [example[bestFeat] for example in dataSet]
        uniqueVals = set(featValues)
        for value in uniqueVals:
            subLabels = labels[:]
            myTree[bestFeatLabel][value] = createTree(splitDataSet(dataSet, bestFeat, value), subLabels)
        return myTree
    
    
    def classify(inputTree, featLabels, testVec):
        firstStr = list(inputTree.keys())[0]
        secondDict = inputTree[firstStr]
        featIndex = featLabels.index(firstStr)
        for key in secondDict.keys():
            if testVec[featIndex] == key:
                if type(secondDict[key]).__name__ == 'dict':
                    classLabel = classify(secondDict[key], featLabels, testVec)
                else:
                    classLabel = secondDict[key]
        return classLabel
    
    
    myDat, labels = CreateDataSet()
    # print(calcShannonEnt(myDat))
    
    # print(splitDataSet(myDat, 1, 1))
    
    # print(chooseBestFeatureToSplit(myDat))
    
    myTree = createTree(myDat, labels)
    
    #通过早上9:41分的实际数据进行预测
    print(classify(myTree, labels, [1, 6, 156, 169, 1, 6, 156, 169, 1, 6, 156, 169, 1, 6, 156, 169]))
    #通过早上10:41分的实际数据进行预测
    print(classify(myTree, labels, [1, 6, 156, 169, 4, 9, 129, 263, 4, 9, 129, 263, 4, 9, 129, 263]))
    #通过下午13:41分的实际数据进行预测
    print(classify(myTree, labels, [1, 6, 156, 169, 4, 9, 129, 263, 5, 12, 123, 306, 5, 12, 123, 306]))
    #通过下午14:41分的实际数据进行预测
    print(classify(myTree, labels, [1, 6, 156, 169, 4, 9, 129, 263, 5, 12, 123, 306, 6, 13, 99, 397]))
    
    
    
    
    运行结果如下:
    D:ProgramsPythonPython36-64python.exe D:/pyfenlei/决策树/jcs4.py
    跌
    跌
    跌
    跌
    

  • 相关阅读:
    (转) Linux下Setuid命令!
    Linux SWAP 交换分区配置说明(转)
    linux中ctime,mtime,atime的区别
    无法访问win8默认共享(如C$)解决办法
    Daemon进程
    autofs文件自动挂载系统
    Selinux相关
    解读linux中用户密码规则及忘记root口令的破解(思路)
    windows共享连接显示无法打开
    DOS口令启用停用的管理员密码
  • 原文地址:https://www.cnblogs.com/bdccloudy/p/7665206.html
Copyright © 2011-2022 走看看