zoukankan      html  css  js  c++  java
  • 假期学习14

    今天原来是情人节,但想到情侣不能约会,就超级开心,也成功的做完了全部的实验项目,收获很大,对新的语言有了一定的了解。

    下面是实验的部分代码:

    scala> val labelIndexer = new 
    StringIndexer().setInputCol("label").setOutputCol("indexedLabel").fit(result)
    labelIndexer: org.apache.spark.ml.feature.StringIndexerModel = strIdx_6721796011c5
    scala> labelIndexer.labels.foreach(println)
    <=50K
    >50K
     
    scala> val featureIndexer = new 
    VectorIndexer().setInputCol("pcaFeatures").setOutputCol("indexedFeatures").fit(result)
    featureIndexer: org.apache.spark.ml.feature.VectorIndexerModel = vecIdx_7b6672933fc3
    scala> println(featureIndexer.numFeatures)
    3
     
    scala> val labelConverter = new 
    IndexToString().setInputCol("prediction").setOutputCol("predictedLabel").setLabels(labelIndexer.
    labels)
    labelConverter: org.apache.spark.ml.feature.IndexToString = idxToStr_d0c9321aaaa9
    scala> val lr = new 
    LogisticRegression().setLabelCol("indexedLabel").setFeaturesCol("indexedFeatures").setMaxIter(
    100)
    lr: org.apache.spark.ml.classification.LogisticRegression = logreg_06812b41b118
     
    scala> val lrPipeline = new Pipeline().setStages(Array(labelIndexer, featureIndexer, lr, 
    labelConverter))
    lrPipeline: org.apache.spark.ml.Pipeline = pipeline_b6b87b6e8cd5
    scala> val lrPipelineModel = lrPipeline.fit(result)
    lrPipelineModel: org.apache.spark.ml.PipelineModel = pipeline_b6b87b6e8cd5
    scala> val lrModel = lrPipelineModel.stages(2).asInstanceOf[LogisticRegressionModel]
    lrModel: org.apache.spark.ml.classification.LogisticRegressionModel = logreg_06812b41b118
    scala> println("Coefficients: " + lrModel.coefficientMatrix+"Intercept: 
    "+lrModel.interceptVector+"numClasses: "+lrModel.numClasses+"numFeatures: 
    "+lrModel.numFeatures)
    Coefficients: -1.9828586428133616E-7 -3.5090924715811705E-4 -8.451506276498941E-4 
    Intercept: [-1.4525982557843347]numClasses: 2numFeatures: 3
     
    scala> val lrPredictions = lrPipelineModel.transform(testdata)
    lrPredictions: org.apache.spark.sql.DataFrame = [features: vector, label: string ... 7 more fields]
    scala> val evaluator = new 
    MulticlassClassificationEvaluator().setLabelCol("indexedLabel").setPredictionCol("prediction")
    evaluator: org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator = 
    mcEval_38ac5c14fa2a
    scala> val lrAccuracy = evaluator.evaluate(lrPredictions)
    lrAccuracy: Double = 0.7764235163053484
    scala> println("Test Error = " + (1.0 - lrAccuracy))
    Test Error = 0.22357648369465155
  • 相关阅读:
    mysql用查询结果当删除的判断条件进行删除报错1093 You can't specify target table解决方法
    centos通过yum快速安装JDK1.8
    crontab运行python不生效,但是手动执行正常的问题和解决方案
    SyntaxError: '' string literal contains an unescaped line break
    Enable Audit log
    checkbox横向选择
    动态分列显示
    重置参数值为缺省值
    Reset running number
    查看是谁在使用SL(SyteLine)
  • 原文地址:https://www.cnblogs.com/Excusezuo/p/12315320.html
Copyright © 2011-2022 走看看