zoukankan      html  css  js  c++  java
  • Spark连接MongoDB之Scala

    MongoDB Connector for Spark
      Spark Connector Scala Guide

    spark-shell --jars "mongo-spark-connector_2.11-2.0.0.jar,mongo-hadoop-core-2.0.2.jar,mongo-java-driver-3.4.2.jar"

    import org.apache.spark.sql.SparkSession
    import com.mongodb.spark._
    import com.mongodb.spark.config._
    import org.bson.Document
    
        val spark = SparkSession.builder()
          .master("local")
          .appName("MongoSparkConnector")
          .config("spark.some.config.option", "some-value")
          .getOrCreate()
    
    val uri = "mongodb://172.1.1.1:27017"
    
        val userDF = spark.sql("""
    select
           uid,
           name,
           current_date() version
      from test_table
      limit 100
          """).repartition(8)
    
    // Write to MongoDB
        userDF.write.mode("overwrite").format("com.mongodb.spark.sql").options(
          Map(
            "uri" -> uri,
            "database" -> "test",
            "collection" -> "test_table")).save()
    
    // Read From MongoDB
        val df = spark.read.format("com.mongodb.spark.sql").options(
          Map(
            "uri" -> uri,
            "database" -> "test",
            "collection" -> "test_table")).load()
    
    // 其他方式
        userDF.write.mode("overwrite").format("com.mongodb.spark.sql").options(
          Map(
            "spark.mongodb.input.uri" -> uri,
            "spark.mongodb.output.uri" -> uri,
            "spark.mongodb.output.database" -> "test",
            "spark.mongodb.output.collection" -> "test_table")).save()
    
        MongoSpark.save(
          userDF.write.mode("overwrite").options(
            Map(
              "spark.mongodb.input.uri" -> uri,
              "spark.mongodb.output.uri" -> uri,
              "spark.mongodb.output.database" -> "test",
              "spark.mongodb.output.collection" -> "test_table")))
    
        MongoSpark.save(
          userDF.write.mode("overwrite").options(
            Map(
              "uri" -> uri,
              "database" -> "test",
              "collection" -> "test_table")))
    
        spark.stop()
    
  • 相关阅读:
    【打印】windows打印控件,Lodop.js介绍
    【MySQL】MySQL查询数据库各表的行数
    【MySQL】MySQL中查询出数据表中存在重复的值list
    【php】php5.0以上,instanceof 用法
    日期转换:Cannot format given Object as a Date (SimpleDateFormat的parse和format)
    Groovy 正则表达式 匹配点号
    什么是开发框架
    SoapUI 增大使用内存
    Groovy API link
    Groovy 跳出each循环
  • 原文地址:https://www.cnblogs.com/wwxbi/p/7170679.html
Copyright © 2011-2022 走看看