zoukankan      html  css  js  c++  java
  • scala spark 聚类

    import org.apache.spark.ml.clustering.KMeans
    import org.apache.spark.ml.evaluation.ClusteringEvaluator
    import org.apache.spark.sql.Row
    import org.apache.spark.sql.types._
    import org.apache.spark._
    import org.apache.spark.ml.feature.VectorAssembler

    // Loads data.
    val dataset = sc.parallelize(List(List(1.0,8.0),List(8.0,2.0),List(2.0,10.0),
    List(5.0,15.0),List(9.0,1.0),List(9.0,7.0),List(1.0,3.0)))
    //val rdd= sc.textFile("input/textdata.txt")


    case class data1(length:Double,wide:Double)
    val df = dataset.map(x=>data1(x(0),x(1))).toDF

    val assembler = (new VectorAssembler().
    setInputCols(Array("length", "wide")).
    setOutputCol("features"))

    val df2 = assembler.transform(df)

    // Trains a k-means model.
    val kmeans = new KMeans().setK(3).setSeed(1L)
    val model = kmeans.fit(df2)

    // Make predictions
    val predictions = model.transform(df2)

    val ret1=predictions.groupBy("prediction").agg(Map("length"->"avg","wide"->"avg"))


    // 保存数据框到文件

    scala> data1.select("gender", "age", "education").write.format("csv").save("hdfs://ns1/datafile/wangxiao/data123.csv")

  • 相关阅读:
    并列显示
    vertical-align,text-align 和 align的区别
    实现水平垂直居中
    overflow属性
    float属性
    table 标签
    idea中修改默认maven
    使用host的方式来破解idea
    mysql分区
    mysql数据库设计规范
  • 原文地址:https://www.cnblogs.com/zhangbojiangfeng/p/8870301.html
Copyright © 2011-2022 走看看