zoukankan      html  css  js  c++  java
  • Intel DAAL AI加速 ——传统决策树和随机森林

    # file: dt_cls_dense_batch.py
    #===============================================================================
    # Copyright 2014-2018 Intel Corporation.
    #
    # This software and the related documents are Intel copyrighted  materials,  and
    # your use of  them is  governed by the  express license  under which  they were
    # provided to you (License).  Unless the License provides otherwise, you may not
    # use, modify, copy, publish, distribute,  disclose or transmit this software or
    # the related documents without Intel's prior written permission.
    #
    # This software and the related documents  are provided as  is,  with no express
    # or implied  warranties,  other  than those  that are  expressly stated  in the
    # License.
    #===============================================================================
    
    ## <a name="DAAL-EXAMPLE-PY-DT_CLS_DENSE_BATCH"></a>
    ## example dt_cls_dense_batch.py
    
    import os
    import sys
    
    from daal.algorithms.decision_tree.classification import prediction, training
    from daal.algorithms import classifier
    from daal.data_management import (
        FileDataSource, DataSourceIface, NumericTableIface, HomogenNumericTable, MergedNumericTable
    )
    utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
    if utils_folder not in sys.path:
        sys.path.insert(0, utils_folder)
    from utils import printNumericTables
    
    DAAL_PREFIX = os.path.join('..', 'data')
    
    # Input data set parameters
    trainDatasetFileName = os.path.join(DAAL_PREFIX, 'batch', 'decision_tree_train.csv')
    pruneDatasetFileName = os.path.join(DAAL_PREFIX, 'batch', 'decision_tree_prune.csv')
    testDatasetFileName = os.path.join(DAAL_PREFIX, 'batch', 'decision_tree_test.csv')
    
    nFeatures = 5
    nClasses = 5
    
    # Model object for the decision tree classification algorithm
    model = None
    predictionResult = None
    testGroundTruth = None
    
    
    def trainModel():
        global model
    
        # Initialize FileDataSource<CSVFeatureManager> to retrieve the input data from a .csv file
        trainDataSource = FileDataSource(
            trainDatasetFileName,
            DataSourceIface.notAllocateNumericTable,
            DataSourceIface.doDictionaryFromContext
        )
    
        # Create Numeric Tables for training data and labels
        trainData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
        trainGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
        mergedData = MergedNumericTable(trainData, trainGroundTruth)
    
        # Retrieve the data from the input file
        trainDataSource.loadDataBlock(mergedData)
    
        # Initialize FileDataSource<CSVFeatureManager> to retrieve the input data from a .csv file
        pruneDataSource = FileDataSource(
            pruneDatasetFileName,
            DataSourceIface.notAllocateNumericTable,
            DataSourceIface.doDictionaryFromContext
        )
    
        # Create Numeric Tables for pruning data and labels
        pruneData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
        pruneGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
        pruneMergedData = MergedNumericTable(pruneData, pruneGroundTruth)
    
        # Retrieve the data from the input file
        pruneDataSource.loadDataBlock(pruneMergedData)
    
        # Create an algorithm object to train the decision tree classification model
        algorithm = training.Batch(nClasses)
    
        # Pass the training data set and dependent values to the algorithm
        algorithm.input.set(classifier.training.data, trainData)
        algorithm.input.set(classifier.training.labels, trainGroundTruth)
        algorithm.input.setTable(training.dataForPruning, pruneData)
        algorithm.input.setTable(training.labelsForPruning, pruneGroundTruth)
    
        # Train the decision tree classification model and retrieve the results of the training algorithm
        trainingResult = algorithm.compute()
        model = trainingResult.get(classifier.training.model)
    
    def testModel():
        global testGroundTruth, predictionResult
    
        # Initialize FileDataSource<CSVFeatureManager> to retrieve the test data from a .csv file
        testDataSource = FileDataSource(
            testDatasetFileName,
            DataSourceIface.notAllocateNumericTable,
            DataSourceIface.doDictionaryFromContext
        )
    
        # Create Numeric Tables for testing data and labels
        testData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
        testGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
        mergedData = MergedNumericTable(testData, testGroundTruth)
    
        # Retrieve the data from input file
        testDataSource.loadDataBlock(mergedData)
    
        # Create algorithm objects for decision tree classification prediction with the default method
        algorithm = prediction.Batch()
    
        # Pass the testing data set and trained model to the algorithm
        #print("Number of columns: {}".format(testData.getNumberOfColumns()))
        algorithm.input.setTable(classifier.prediction.data,  testData)
        algorithm.input.setModel(classifier.prediction.model, model)
    
        # Compute prediction results and retrieve algorithm results
        # (Result class from classifier.prediction)
        predictionResult = algorithm.compute()
    
    
    def printResults():
    
        printNumericTables(
            testGroundTruth,
            predictionResult.get(classifier.prediction.prediction),
            "Ground truth", "Classification results",
            "Decision tree classification results (first 20 observations):",
            20, flt64=False
        )
    
    if __name__ == "__main__":
    
        trainModel()
        testModel()
        printResults()
    

      

    随机森林的:

    # file: df_cls_dense_batch.py
    #===============================================================================
    # Copyright 2014-2018 Intel Corporation.
    #
    # This software and the related documents are Intel copyrighted  materials,  and
    # your use of  them is  governed by the  express license  under which  they were
    # provided to you (License).  Unless the License provides otherwise, you may not
    # use, modify, copy, publish, distribute,  disclose or transmit this software or
    # the related documents without Intel's prior written permission.
    #
    # This software and the related documents  are provided as  is,  with no express
    # or implied  warranties,  other  than those  that are  expressly stated  in the
    # License.
    #===============================================================================
    
    ## <a name="DAAL-EXAMPLE-PY-DF_CLS_DENSE_BATCH"></a>
    ## example df_cls_dense_batch.py
    
    import os
    import sys
    
    from daal.algorithms import decision_forest
    from daal.algorithms.decision_forest.classification import prediction, training
    from daal.algorithms import classifier
    from daal.data_management import (
        FileDataSource, DataSourceIface, NumericTableIface, HomogenNumericTable,
        MergedNumericTable, features
    )
    
    utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
    if utils_folder not in sys.path:
        sys.path.insert(0, utils_folder)
    from utils import printNumericTable, printNumericTables
    
    DAAL_PREFIX = os.path.join('..', 'data')
    
    # Input data set parameters
    trainDatasetFileName = os.path.join(DAAL_PREFIX, 'batch', 'df_classification_train.csv')
    testDatasetFileName = os.path.join(DAAL_PREFIX, 'batch', 'df_classification_test.csv')
    
    nFeatures = 3
    nClasses = 5
    
    # Decision forest parameters
    nTrees = 10
    minObservationsInLeafNode = 8
    
    # Model object for the decision forest classification algorithm
    model = None
    predictionResult = None
    testGroundTruth = None
    
    
    def trainModel():
        global model
    
        # Initialize FileDataSource<CSVFeatureManager> to retrieve the input data from a .csv file
        trainDataSource = FileDataSource(
            trainDatasetFileName,
            DataSourceIface.notAllocateNumericTable,
            DataSourceIface.doDictionaryFromContext
        )
    
        # Create Numeric Tables for training data and labels
        trainData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
        trainGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
        mergedData = MergedNumericTable(trainData, trainGroundTruth)
    
        # Retrieve the data from the input file
        trainDataSource.loadDataBlock(mergedData)
    
        #  Get the dictionary and update it with additional information about data
        dict = trainData.getDictionary()
    
        #  Add a feature type to the dictionary
        dict[0].featureType = features.DAAL_CONTINUOUS
        dict[1].featureType = features.DAAL_CONTINUOUS
        dict[2].featureType = features.DAAL_CATEGORICAL
    
        # Create an algorithm object to train the decision forest classification model
        algorithm = training.Batch(nClasses)
        algorithm.parameter.nTrees = nTrees
        algorithm.parameter.minObservationsInLeafNode = minObservationsInLeafNode
        algorithm.parameter.featuresPerNode = nFeatures
        algorithm.parameter.varImportance = decision_forest.training.MDI
        algorithm.parameter.resultsToCompute = decision_forest.training.computeOutOfBagError
    
        # Pass the training data set and dependent values to the algorithm
        algorithm.input.set(classifier.training.data, trainData)
        algorithm.input.set(classifier.training.labels, trainGroundTruth)
    
        # Train the decision forest classification model and retrieve the results of the training algorithm
        trainingResult = algorithm.compute()
        model = trainingResult.get(classifier.training.model)
        printNumericTable(trainingResult.getTable(training.variableImportance), "Variable importance results: ")
        printNumericTable(trainingResult.getTable(training.outOfBagError), "OOB error: ")
    
    def testModel():
        global testGroundTruth, predictionResult
    
        # Initialize FileDataSource<CSVFeatureManager> to retrieve the test data from a .csv file
        testDataSource = FileDataSource(
            testDatasetFileName,
            DataSourceIface.notAllocateNumericTable,
            DataSourceIface.doDictionaryFromContext
        )
    
        # Create Numeric Tables for testing data and labels
        testData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
        testGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
        mergedData = MergedNumericTable(testData, testGroundTruth)
    
        # Retrieve the data from input file
        testDataSource.loadDataBlock(mergedData)
    
        #  Get the dictionary and update it with additional information about data
        dict = testData.getDictionary()
    
        #  Add a feature type to the dictionary
        dict[0].featureType = features.DAAL_CONTINUOUS
        dict[1].featureType = features.DAAL_CONTINUOUS
        dict[2].featureType = features.DAAL_CATEGORICAL
    
        # Create algorithm objects for decision forest classification prediction with the default method
        algorithm = prediction.Batch(nClasses)
    
        # Pass the testing data set and trained model to the algorithm
        algorithm.input.setTable(classifier.prediction.data,  testData)
        algorithm.input.setModel(classifier.prediction.model, model)
    
        # Compute prediction results and retrieve algorithm results
        # (Result class from classifier.prediction)
        predictionResult = algorithm.compute()
    
    
    def printResults():
        printNumericTable(predictionResult.get(classifier.prediction.prediction),"Decision forest prediction results (first 10 rows):",10)
        printNumericTable(testGroundTruth, "Ground truth (first 10 rows):", 10);
    
    if __name__ == "__main__":
    
        trainModel()
        testModel()
        printResults()
    

      

  • 相关阅读:
    自定义View的ToolBar布局报错Error:(2) No resource identifier found for attribute 'context' in package 'c
    在学git之主分支 branch
    获取发布版SHA1
    关于开启线程与UI的操作
    播放音频和视频(VideoView控件)
    通知栏Notification的应用
    Android 真机调式 Installation failed with message 远程主机强迫关闭了一个现有的连接。. It is possible that this issue is resolved by uninstalling an existing version of the apk if it is present, and then re-installing. WA
    运行程序申请危险权限
    mysql乐观锁总结和实践
    Nginx配置文件nginx.conf中文详解
  • 原文地址:https://www.cnblogs.com/bonelee/p/9703150.html
Copyright © 2011-2022 走看看