zoukankan      html  css  js  c++  java
  • K 聚类分析实现类源码

    数据文件来自 :http://archive.ics.uci.edu/ml/datasets/Wholesale+customers?cm_mc_uid=21918109261714715776095&cm_mc_sid_50200000=1476090999


    import org.apache.spark.{SparkContext, SparkConf}
    import org.apache.spark.mllib.clustering.{KMeans, KMeansModel}
    import org.apache.spark.mllib.linalg.Vectors
    object KMeansClustering {
     def main (args: Array[String]) {
     if (args.length < 5) {
    
        println("Usage:KMeansClustering trainingDataFilePath testDataFilePath numClusters
        numIterations runTimes")
     sys.exit(1)
     }
    
     val conf = new
        SparkConf().setAppName("Spark MLlib Exercise:K-Means Clustering")
     val sc = new SparkContext(conf)
    
     
     val rawTrainingData = sc.textFile(args(0))
     val parsedTrainingData =
        rawTrainingData.filter(!isColumnNameLine(_)).map(line => {
    
        Vectors.dense(line.split("	").map(_.trim).filter(!"".equals(_)).map(_.toDouble))
     }).cache()
    
        // Cluster the data into two classes using KMeans
    
     val numClusters = args(2).toInt
     val numIterations = args(3).toInt
     val runTimes = args(4).toInt
     var clusterIndex:Int = 0
     val clusters:KMeansModel =
        KMeans.train(parsedTrainingData, numClusters, numIterations,runTimes)
    
        println("Cluster Number:" + clusters.clusterCenters.length)
    
        println("Cluster Centers Information Overview:")
     clusters.clusterCenters.foreach(
        x => {
    
        println("Center Point of Cluster " + clusterIndex + ":")
    
        println(x)
     clusterIndex += 1
     })
    
        //begin to check which cluster each test data belongs to based on the clustering result
    
     val rawTestData = sc.textFile(args(1))
     val parsedTestData = rawTestData.map(line =>
        {
    
        Vectors.dense(line.split("	").map(_.trim).filter(!"".equals(_)).map(_.toDouble))
    
        })
     parsedTestData.collect().foreach(testDataLine => {
     val predictedClusterIndex:
        Int = clusters.predict(testDataLine)
    
        println("The data " + testDataLine.toString + " belongs to cluster " +
        predictedClusterIndex)
     })
    
        println("Spark MLlib K-means clustering test finished.")
     }
    
     private def isColumnNameLine(line:String):Boolean = {
     if (line != null &&
        line.contains("Channel")) true
     else false
     }
     


  • 相关阅读:
    火车头采集标题如何伪原创(附教程)
    神经网络的模型分为哪几类,神经网络训练模型描述
    CSS background-color 详细说明 ccs修改背景颜色
    三大神经网络模型,神经网络简单实例
    golang查看数据类型
    git根据某个commit创建分支
    RocketMQ在在CentOS7中使用systemctl方式启动
    CentOS7使用firewall-cmd打开关闭防火墙与端口
    1,6,18,30,75的规律php版本
    发现自己
  • 原文地址:https://www.cnblogs.com/TendToBigData/p/10501345.html
Copyright © 2011-2022 走看看