zoukankan      html  css  js  c++  java
  • 201707舆情分析系统代码

    import breeze.linalg
    import org.apache.spark.ml.Pipeline
    import org.apache.spark.ml.classification.MultilayerPerceptronClassifier
    import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator
    import org.apache.spark.ml.feature.{IndexToString, StringIndexer, Word2Vec}
    import org.apache.spark.sql.SQLContext
    import org.apache.spark.{SparkConf, SparkContext}
    import org.apache.spark.rdd.RDD
    //http://qkxue.net/info/28517/SparkML
    //spark-shell --driver-class-path /home/hadoop/test/mysqljdbc.jar
    object WbClassifier {
    
      def main(args: Array[String]) {
    
    
        val VECTOR_SIZE =500
    
        val conf = new SparkConf().setAppName("WEIBO MLPC Classification")
    
        val sc = new SparkContext(conf)
    
        val sqlCtx = new SQLContext(sc)
    
        val titlesplit1 = sqlCtx.jdbc("jdbc:mysql://192.168.0.37:3306/emotional?user=root&password=123456", "mltest")
        val titlesplit =titlesplit1.toDF().registerTempTable("mltest")
        val value =sqlCtx.sql("SELECT mltest.svalue,mltest.words  FROM mltest")
        //model
        val parsedRDD = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/SMSSpamCollection.txt").map(_.split("	")).map(eachRow => {
          (eachRow(0),eachRow(1).split(" "))
        })
    //   val parsedRDD= value.map(p => {
    //   val v0 = p.get(0).toString
    //   val v1 = p.getString(1).split(",")
    //    (v0, v1)
    //})
    
       // val parsedRDD = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/20170725.txt").map(line=>(line.split(" ")(3),line.split(" ")(2).split(",")))
    //            val parsedRDD = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/20170726.txt").map(_.split("	")).map(eachRow => {
    //              (eachRow(0),eachRow(1).split(" "))
    //            })
    
       // http://doc.okbase.net/u013719780/archive/239004.html
        val msgDF = sqlCtx.createDataFrame(parsedRDD).toDF("label", "message")
        // 主成分分析
    
    
        //
    
        val labelIndexer = new StringIndexer().setInputCol("label").setOutputCol("indexedLabel").fit(msgDF)
        val word2Vec = new Word2Vec().setInputCol("message").setOutputCol("features").setVectorSize(VECTOR_SIZE).setMinCount(1)
        val layers = Array[Int](VECTOR_SIZE, 6,5,3)
        val multilayerPerceptronClassifier = new MultilayerPerceptronClassifier().setLayers(layers).setBlockSize(1024).setSeed(1234L).setMaxIter(456).setFeaturesCol("features").setLabelCol("indexedLabel").setPredictionCol("prediction")
        val labelConverter = new IndexToString().setInputCol("prediction").setOutputCol("predictedLabel").setLabels(labelIndexer.labels)
        val Array(trainingData, testData) = msgDF.randomSplit(Array(0.8, 0.2))
        val pipeline = new Pipeline().setStages(Array(labelIndexer, word2Vec, multilayerPerceptronClassifier, labelConverter))
        val model = pipeline.fit(trainingData)
        val predictionResultDF = model.transform(testData)
         predictionResultDF.printSchema
        //predictionResultDF.select("message", "label","features", "predictedLabel").show(30)
        predictionResultDF.select("message","features","label","predictedLabel").show(30)
      //  predictionResultDF.select("message","features","label","predictedLabel").write.save("file:///logs")
        predictionResultDF.select("predictedLabel").distinct().take(5)
        val evaluator = new MulticlassClassificationEvaluator().setLabelCol("indexedLabel").setPredictionCol("prediction").setMetricName("precision")
        val predictionAccuracy = evaluator.evaluate(predictionResultDF)
        println("Testing Accuracy is %2.4f".format(predictionAccuracy * 100) + "%")
        sc.stop
    
      }
    
    }
  • 相关阅读:
    分布式系统简介
    java.lang.Object 之 clone() 深拷贝,浅拷贝
    粉丝裂变活动bug
    遇到的bug
    移动端fixed定位在底部,出现键盘后消失
    窗口关闭,打开新页面,刷新等总结
    input 手机数字键盘
    正则重温(学习笔记)
    input的表单验证(不断更新中~~)
    css 不大常见的属性(不断更新中...)
  • 原文地址:https://www.cnblogs.com/canyangfeixue/p/7249143.html
Copyright © 2011-2022 走看看