zoukankan      html  css  js  c++  java
  • Intel daal4py demo运行过程

    daal安装(记得先安装anaconda):

    git clone https://github.com/IntelPython/daal4py.git
    cd daal4py
    conda create -n DAAL4PY -c intel -c intel/label/test -c conda-forge python=3.6 mpich cnc tbb-devel daal daal-include cython jinja2 numpy
    source activate DAAL4PY
    export CNCROOT=$CONDA_PREFIX
    export TBBROOT=$CONDA_PREFIX
    export DAALROOT=$CONDA_PREFIX
    python setup.py build_ext
    python setup.py install
    # 运行后面的demo
    
    source deactivate DAAL4PY # 退出
    

     注意:安装过程较慢,耐心等待。

    随机森林:

    #*******************************************************************************
    # Copyright 2014-2018 Intel Corporation
    # All Rights Reserved.
    #
    # This software is licensed under the Apache License, Version 2.0 (the
    # "License"), the following terms apply:
    #
    # You may not use this file except in compliance with the License.  You may
    # obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
    #
    # Unless required by applicable law or agreed to in writing, software
    # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
    # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    #
    # See the License for the specific language governing permissions and
    # limitations under the License.
    #*******************************************************************************
    
    # daal4py Decision Forest Classification example for shared memory systems
    
    import daal4py as d4p
    import numpy as np
    
    # let's try to use pandas' fast csv reader
    try:
        import pandas
        read_csv = lambda f, c: pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=np.float32).values
    except:
        # fall back to numpy loadtxt
        read_csv = lambda f, c: np.loadtxt(f, usecols=c, delimiter=',', ndmin=2, dtype=np.float32)
    
    
    def main():
        # input data file
        infile = "./data/batch/df_classification_train.csv"
        testfile = "./data/batch/df_classification_test.csv"
    
        # Configure a training object (5 classes)
        train_algo = d4p.decision_forest_classification_training(5, nTrees=10, minObservationsInLeafNode=8, featuresPerNode=3, engine = d4p.engines_mt19937(seed=777),
                                                                 varImportance='MDI', bootstrap=True, resultsToCompute='computeOutOfBagError')
        
        # Read data. Let's use 3 features per observation
        data   = read_csv(infile, range(3))
        labels = read_csv(infile, range(3,4))
        train_result = train_algo.compute(data, labels)
        # Traiing result provides (depending on parameters) model, outOfBagError, outOfBagErrorPerObservation and/or variableImportance
    
        # Now let's do some prediction
        predict_algo = d4p.decision_forest_classification_prediction(5)
        # read test data (with same #features)
        pdata = read_csv(testfile, range(3))
        plabels = read_csv(testfile, range(3,4))
        # now predict using the model from the training above
        predict_result = predict_algo.compute(pdata, train_result.model)
    
        # Prediction result provides prediction
        assert(predict_result.prediction.shape == (pdata.shape[0], 1))
    
        return (train_result, predict_result, plabels)
    
    
    if __name__ == "__main__":
        (train_result, predict_result, plabels) = main()
        print("
    Variable importance results:
    ", train_result.variableImportance)
        print("
    OOB error:
    ", train_result.outOfBagError)
        print("
    Decision forest prediction results (first 10 rows):
    ", predict_result.prediction[0:10])
        print("
    Ground truth (first 10 rows):
    ", plabels[0:10])
        print('All looks good!')
    

     demo示例数据:

    0.00125126,0.563585,8,2,
    0.193304,0.808741,12,1,
    0.585009,0.479873,6,1,
    0.350291,0.895962,13,4,
    0.82284,0.746605,11,2,
    0.174108,0.858943,12,0,
    0.710501,0.513535,10,2,
    0.303995,0.0149846,1,2,
    0.0914029,0.364452,4,0,
    0.147313,0.165899,0,4,
    0.988525,0.445692,7,2,
    0.119083,0.00466933,0,2,
    0.0089114,0.37788,4,2,
    0.531663,0.571184,10,3,
    0.601764,0.607166,10,4,
    0.166234,0.663045,8,4,
    0.450789,0.352123,5,3,
    0.0570391,0.607685,8,4,
    0.783319,0.802606,15,3,
    0.519883,0.30195,6,2,
    0.875973,0.726676,11,1,
    0.955901,0.925718,15,3,
    0.539354,0.142338,2,3,
    0.462081,0.235328,1,2,
    0.862239,0.209601,3,1,
    0.779656,0.843654,15,3,
    0.996796,0.999695,15,2,
    0.611499,0.392438,6,0,
    0.266213,0.297281,5,2,
    0.840144,0.0237434,3,1,
    0.375866,0.0926237,1,0,
    0.677206,0.0562151,2,3,
    0.00878933,0.91879,12,2,
    0.275887,0.272897,5,2,
    0.587909,0.691183,10,4,
    0.837611,0.726493,11,1,
    0.484939,0.205359,1,2,
    0.743736,0.468459,6,2,
    0.457961,0.949156,13,3,
    0.744438,0.10828,2,2,
    0.599048,0.385235,6,0,
    0.735008,0.608966,10,2,
    0.572405,0.361339,6,0,
    0.151555,0.225105,0,3,
    0.425153,0.802881,13,3,
    

    计算均值 方差等统计特征:

    #*******************************************************************************
    
    # Copyright 2014-2018 Intel Corporation
    
    # All Rights Reserved.
    
    #
    
    # This software is licensed under the Apache License, Version 2.0 (the
    
    # "License"), the following terms apply:
    
    #
    
    # You may not use this file except in compliance with the License.  You may
    
    # obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
    
    #
    
    # Unless required by applicable law or agreed to in writing, software
    
    # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
    
    # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    
    #
    
    # See the License for the specific language governing permissions and
    
    # limitations under the License.
    
    #*******************************************************************************
    
    
    
    # daal4py low order moments example for shared memory systems
    
    
    
    import daal4py as d4p
    
    import numpy as np
    
    
    
    # let's try to use pandas' fast csv reader
    
    try:
    
        import pandas
    
        read_csv = lambda f, c: pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=np.float64).values
    
    except:
    
        # fall back to numpy loadtxt
    
        read_csv = lambda f, c: np.loadtxt(f, usecols=c, delimiter=',', ndmin=2)
    
    
    
    
    
    def main():
    
        # read data from file
    
        file = "./data/batch/covcormoments_dense.csv"
    
        data = read_csv(file, range(10))
    
    
    
        # compute
    
        alg = d4p.low_order_moments()
    
        res = alg.compute(data)
    
    
    
        # result provides minimum, maximum, sum, sumSquares, sumSquaresCentered,
    
        # mean, secondOrderRawMoment, variance, standardDeviation, variation
    
        assert res.minimum.shape == (1, data.shape[1])
    
        assert res.maximum.shape == (1, data.shape[1])
    
        assert res.sum.shape == (1, data.shape[1])
    
        assert res.sumSquares.shape == (1, data.shape[1])
    
        assert res.sumSquaresCentered.shape == (1, data.shape[1])
    
        assert res.mean.shape == (1, data.shape[1])
    
        assert res.secondOrderRawMoment.shape == (1, data.shape[1])
    
        assert res.variance.shape == (1, data.shape[1])
    
        assert res.standardDeviation.shape == (1, data.shape[1])
    
        assert res.variation.shape == (1, data.shape[1])
    
    
    
        return res
    
    
    
    
    
    if __name__ == "__main__":
    
        res = main()
    
        # print results
    
        print("
    Minimum:
    ", res.minimum)
    
        print("
    Maximum:
    ", res.maximum)
    
        print("
    Sum:
    ", res.sum)
    
        print("
    Sum of squares:
    ", res.sumSquares)
    
        print("
    Sum of squared difference from the means:
    ", res.sumSquaresCentered)
    
        print("
    Mean:
    ", res.mean)
    
        print("
    Second order raw moment:
    ", res.secondOrderRawMoment)
    
        print("
    Variance:
    ", res.variance)
    
        print("
    Standard deviation:
    ", res.standardDeviation)
    
        print("
    Variation:
    ", res.variation)
    
        print('All looks good!')
    
  • 相关阅读:
    数据访问技术系列课程 笔记(2) ADO.NET 连接方式进行数据访问
    Modern C# 系列课程笔记 第11节 深入委托和事件
    idea 将项目托管到 Git 报错:Can't finish Gitee sharing process
    ADO.Net
    WebService
    2013年了
    201301杂谈
    流程图
    出错列表
    杂谈4 2012年8月15日开
  • 原文地址:https://www.cnblogs.com/bonelee/p/9881478.html
Copyright © 2011-2022 走看看