zoukankan      html  css  js  c++  java
  • 假期学习14

    今天原来是情人节,但想到情侣不能约会,就超级开心,也成功的做完了全部的实验项目,收获很大,对新的语言有了一定的了解。

    下面是实验的部分代码:

    scala> val labelIndexer = new 
    StringIndexer().setInputCol("label").setOutputCol("indexedLabel").fit(result)
    labelIndexer: org.apache.spark.ml.feature.StringIndexerModel = strIdx_6721796011c5
    scala> labelIndexer.labels.foreach(println)
    <=50K
    >50K
     
    scala> val featureIndexer = new 
    VectorIndexer().setInputCol("pcaFeatures").setOutputCol("indexedFeatures").fit(result)
    featureIndexer: org.apache.spark.ml.feature.VectorIndexerModel = vecIdx_7b6672933fc3
    scala> println(featureIndexer.numFeatures)
    3
     
    scala> val labelConverter = new 
    IndexToString().setInputCol("prediction").setOutputCol("predictedLabel").setLabels(labelIndexer.
    labels)
    labelConverter: org.apache.spark.ml.feature.IndexToString = idxToStr_d0c9321aaaa9
    scala> val lr = new 
    LogisticRegression().setLabelCol("indexedLabel").setFeaturesCol("indexedFeatures").setMaxIter(
    100)
    lr: org.apache.spark.ml.classification.LogisticRegression = logreg_06812b41b118
     
    scala> val lrPipeline = new Pipeline().setStages(Array(labelIndexer, featureIndexer, lr, 
    labelConverter))
    lrPipeline: org.apache.spark.ml.Pipeline = pipeline_b6b87b6e8cd5
    scala> val lrPipelineModel = lrPipeline.fit(result)
    lrPipelineModel: org.apache.spark.ml.PipelineModel = pipeline_b6b87b6e8cd5
    scala> val lrModel = lrPipelineModel.stages(2).asInstanceOf[LogisticRegressionModel]
    lrModel: org.apache.spark.ml.classification.LogisticRegressionModel = logreg_06812b41b118
    scala> println("Coefficients: " + lrModel.coefficientMatrix+"Intercept: 
    "+lrModel.interceptVector+"numClasses: "+lrModel.numClasses+"numFeatures: 
    "+lrModel.numFeatures)
    Coefficients: -1.9828586428133616E-7 -3.5090924715811705E-4 -8.451506276498941E-4 
    Intercept: [-1.4525982557843347]numClasses: 2numFeatures: 3
     
    scala> val lrPredictions = lrPipelineModel.transform(testdata)
    lrPredictions: org.apache.spark.sql.DataFrame = [features: vector, label: string ... 7 more fields]
    scala> val evaluator = new 
    MulticlassClassificationEvaluator().setLabelCol("indexedLabel").setPredictionCol("prediction")
    evaluator: org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator = 
    mcEval_38ac5c14fa2a
    scala> val lrAccuracy = evaluator.evaluate(lrPredictions)
    lrAccuracy: Double = 0.7764235163053484
    scala> println("Test Error = " + (1.0 - lrAccuracy))
    Test Error = 0.22357648369465155
  • 相关阅读:
    C# 7.2 通过 in 和 readonly struct 减少方法值复制提高性能
    .net remoting 使用事件
    .net remoting 使用事件
    WPF 使用 SharpDX 在 D3DImage 显示
    PHP readlink() 函数
    PHP readfile() 函数
    PHP popen() 函数
    PHP pclose() 函数
    PHP pathinfo() 函数
    latin1字符集的数据转换为utf8字符集
  • 原文地址:https://www.cnblogs.com/Excusezuo/p/12315320.html
Copyright © 2011-2022 走看看