zoukankan      html  css  js  c++  java
  • Scala2.11.8 spark2.3.1 mongodb connector 2.3.0

    import java.sql.DriverManager
    import com.mongodb.spark._
    import org.apache.spark.SparkConf
    import org.apache.spark.sql.SparkSession
    object mongospark20180830consume_amount {
    
      // 关于记录多个相同字段的处理方法 https://blog.csdn.net/qq_14950717/article/details/62425563
      // https://blog.csdn.net/qq_27234661/article/details/78344435?locationNum=3&fps=1
      def main(args: Array[String]): Unit = {
    
       // val mgohost = "dds-m5e6e56a3b0cf7b42784-pub.mongodb.rds.aliyuncs.com"
       // spark-submit --driver-class-path /usr/local/jdk/lib/mysql-connector-java-5.1.46.jar   --packages org.mongodb.spark:mongo-spark-connector_2.11:2.3.0 --class  "mongospark20180830consume_amount"  /testdata/u3.jar
    
    
        //  "org.mongodb.spark" %% "mongo-spark-connector" % "2.3.0",
      //  val conn = DriverManager.getConnection(url)
        val conf = new SparkConf().setAppName("appName").setMaster("local")
        val sparkConf = new SparkConf().setAppName("adver").setMaster("local[*]")
        val spark = SparkSession.builder().config(sparkConf).enableHiveSupport().getOrCreate()
    
        val inputUri="mongodb://saas:saas2018yundinglixin@dds-m5e6e56a3b0cf7b42784-pub.mongodb.rds.aliyuncs.com:3717/saas.elemeterPowerHistory"
    
        val df=spark.read.format("com.mongodb.spark.sql").options(
          Map("spark.mongodb.input.uri" -> inputUri,
            "spark.mongodb.input.partitioner" -> "MongoPaginateBySizePartitioner",
            "spark.mongodb.input.partitionerOptions.partitionKey"  -> "_id",
            "spark.mongodb.input.partitionerOptions.partitionSizeMB"-> "32"))
          .load()
    
        val currentTimestamp = System.currentTimeMillis()
        val df2 = df.select("time".toString,"uuid".toString,"consume_amount".toString,"room_id".toString)
          .toDF("time","uuid","consume_amount","room_id")
    
    
        spark.sql("use saas")
        df2.write.mode("overwrite").saveAsTable("consume_amount20180831")
    //       df2.foreach(println)
    //
    
    //    val rddf=spark.sql( "select uuid,from_unixtime(cast(`time`/1000 AS bigint),'yyyyMMddHH'),consume_amount from consume where time>=1533115788000").toDF("uuid", "time","consume_amount")
    //
      //  spark.sql("use saas")
    //
    //    rddf.write.saveAsTable("consume_amount20180830")
    //   // val p=df.printSchema()
    //
    //   // val select=spark.sql("select s.sn,s.uuid,e.time,e.consume_amount from staonly2  s join elem e on s.uuid=e.uuid").take(10)
    //    val select=spark.sql("select consume_amount from elem limit 5").take(5)
    //     select.foreach(println)
    
    
    
    
      }
    }
  • 相关阅读:
    BZOJ4569: [Scoi2016]萌萌哒
    BZOJ4566: [Haoi2016]找相同字符
    BZOJ4556: [Tjoi2016&Heoi2016]字符串
    BZOJ4545: DQS的trie
    BZOJ4458: GTY的OJ
    Codeforces Beta Round #19E. Fairy
    不确定性推理
    朴素贝叶斯
    对抗搜索
    struct
  • 原文地址:https://www.cnblogs.com/canyangfeixue/p/5691865.html
Copyright © 2011-2022 走看看