zoukankan      html  css  js  c++  java
  • 假期学习13

    今天做的是最后一个实验Spark 机器学习库 MLlib 编程实践的前一部分。

    以下是部分代码:

    import org.apache.spark.ml.feature.PCA
    import org.apache.spark.sql.Row
    import org.apache.spark.ml.linalg.{Vector,Vectors}
    import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator
    import org.apache.spark.ml.{Pipeline,PipelineModel}
    import org.apache.spark.ml.feature.{IndexToString, StringIndexer, VectorIndexer,HashingTF, 
    Tokenizer}
    import org.apache.spark.ml.classification.LogisticRegression
    import org.apache.spark.ml.classification.LogisticRegressionModel
    import org.apache.spark.ml.classification.{BinaryLogisticRegressionSummary, 
    LogisticRegression}
    import org.apache.spark.sql.functions;
    scala> import spark.implicits._
    import spark.implicits._
    scala> case class Adult(features: org.apache.spark.ml.linalg.Vector, label: String)
    defined class Adult
    scala> val df = sc.textFile("adult.data.txt").map(_.split(",")).map(p => 
    Adult(Vectors.dense(p(0).toDouble,p(2).toDouble,p(4).toDouble, p(10).toDouble, p(11).toDouble, 
    p(12).toDouble), p(14).toString())).toDF()
    df: org.apache.spark.sql.DataFrame = [features: vector, label: string]
    scala> val test = sc.textFile("adult.test.txt").map(_.split(",")).map(p => 
    Adult(Vectors.dense(p(0).toDouble,p(2).toDouble,p(4).toDouble, p(10).toDouble, p(11).toDouble, 
    p(12).toDouble), p(14).toString())).toDF()
    test: org.apache.spark.sql.DataFrame = [features: vector, label: string]
  • 相关阅读:
    机房收费系统重构(三)—工厂+反射+DAL
    机房收费系统重构(二)—菜鸟入门
    机房收费系统重构(—)—小试牛刀
    vb.net机房收费登录功能
    设计模式总结之结构型模式
    设计模式总结之创建型模式
    大话设计之桥接模式
    大话设计之单例模式
    大话设计之适配器模式
    大话设计之抽象工厂模式
  • 原文地址:https://www.cnblogs.com/Excusezuo/p/12315306.html
Copyright © 2011-2022 走看看