zoukankan      html  css  js  c++  java
  • 聚类-----KMeans

    package Spark_MLlib
    
    import org.apache.spark.ml.clustering.KMeans
    import org.apache.spark.sql.SparkSession
    import org.apache.spark.ml.linalg.{Vector, Vectors}
    
    /**
      * K均值
      */
    case class features_schema(features:Vector)
    object 聚类__KMeans {
           val spark=SparkSession.builder().master("local[2]").getOrCreate()
           import spark.implicits._
      def main(args: Array[String]): Unit = {
    
           val data=spark.sparkContext.textFile("file:///home/soyo/桌面/spark编程测试数据/soyo2.txt")
                      .map(_.split(",")).map(x=>features_schema(Vectors.dense(x(0).toDouble,x(1).toDouble,x(2).toDouble,x(3).toDouble))).toDF()
             data.show()
            val KMeansModel=new KMeans().setK(7).setFeaturesCol("features").setPredictionCol("prediction").fit(data)
            val results=KMeansModel.transform(data)
             results.show(150)
            //模型所有的聚类中心(指最后生成的聚类中心,K是几就有几组)的情况
             KMeansModel.clusterCenters.foreach(println)
            //集合内误差平方和(选取K的大小可以参照,使用场景+最大的集合内误差平方的值=较合适的K)
             val cost=KMeansModel.computeCost(data)
             println(cost)
      }
    }

    结果:

    +-----------------+
    |         features|
    +-----------------+
    |[5.1,3.5,1.4,0.2]|
    |[4.9,3.0,1.4,0.2]|
    |[4.7,3.2,1.3,0.2]|
    |[4.6,3.1,1.5,0.2]|
    |[5.0,3.6,1.4,0.2]|
    |[5.4,3.9,1.7,0.4]|
    |[4.6,3.4,1.4,0.3]|
    |[5.0,3.4,1.5,0.2]|
    |[4.4,2.9,1.4,0.2]|
    |[4.9,3.1,1.5,0.1]|
    |[5.4,3.7,1.5,0.2]|
    |[4.8,3.4,1.6,0.2]|
    |[4.8,3.0,1.4,0.1]|
    |[4.3,3.0,1.1,0.1]|
    |[5.8,4.0,1.2,0.2]|
    |[5.7,4.4,1.5,0.4]|
    |[5.4,3.9,1.3,0.4]|
    |[5.1,3.5,1.4,0.3]|
    |[5.7,3.8,1.7,0.3]|
    |[5.1,3.8,1.5,0.3]|
    +-----------------+
    only showing top 20 rows

    +-----------------+----------+
    |         features|prediction|
    +-----------------+----------+
    |[5.1,3.5,1.4,0.2]|         0|
    |[4.9,3.0,1.4,0.2]|         0|
    |[4.7,3.2,1.3,0.2]|         0|
    |[4.6,3.1,1.5,0.2]|         0|
    |[5.0,3.6,1.4,0.2]|         0|
    |[5.4,3.9,1.7,0.4]|         0|
    |[4.6,3.4,1.4,0.3]|         0|
    |[5.0,3.4,1.5,0.2]|         0|
    |[4.4,2.9,1.4,0.2]|         0|
    |[4.9,3.1,1.5,0.1]|         0|
    |[5.4,3.7,1.5,0.2]|         0|
    |[4.8,3.4,1.6,0.2]|         0|
    |[4.8,3.0,1.4,0.1]|         0|
    |[4.3,3.0,1.1,0.1]|         0|
    |[5.8,4.0,1.2,0.2]|         0|
    |[5.7,4.4,1.5,0.4]|         0|
    |[5.4,3.9,1.3,0.4]|         0|
    |[5.1,3.5,1.4,0.3]|         0|
    |[5.7,3.8,1.7,0.3]|         0|
    |[5.1,3.8,1.5,0.3]|         0|
    +-----------------+----------+
    only showing top 20 rows

    [5.005999999999999,3.4180000000000006,1.4640000000000002,0.2439999999999999]
    [6.8538461538461535,3.076923076923076,5.715384615384614,2.0538461538461537]
    [5.883606557377049,2.740983606557377,4.388524590163936,1.4344262295081966]
    78.94506582597859

  • 相关阅读:
    文件的序列化和反序列化
    三个小功能,游戏倒计时,文件的序列化和反序列化,txt文档的读取和写入
    Unity 中Debug打印的全局注释方式和重写
    导航制作的几个步骤
    Unity中删除文件目录下的所有文件和查看文件里面的内容
    VS2017一些小技巧
    在Unity中图标进行鼠标图标更换
    Electron-Vue 使用 oss 实现上传、下载
    Electron-Vue 调用本地数据库
    构建 Electron-Vue 脚手架项目
  • 原文地址:https://www.cnblogs.com/soyo/p/7799422.html
Copyright © 2011-2022 走看看