zoukankan      html  css  js  c++  java
  • Spark 数据源

    一、mysql作为数据源

    import org.apache.spark.sql.{DataFrame, Dataset, Row, SparkSession}
    
    /**
      * mysql作为数据源
      *
      * schema信息
      * root
      * |-- uid: integer (nullable = false)
      * |-- xueyuan: string (nullable = true)
      * |-- number_one: string (nullable = true)
      */
    object JdbcSource {
      def main(args: Array[String]): Unit = {
        //1.sparkSQL 创建sparkSession
        val sparkSession: SparkSession = SparkSession.builder().appName("JdbcSource")
          .master("local[2]").getOrCreate()
    
        //2.加载数据源
        val urlData: DataFrame = sparkSession.read.format("jdbc").options(Map(
          "url" -> "jdbc:mysql://localhost:3306/urlcount",
          "driver" -> "com.mysql.jdbc.Driver",
          "dbtable" -> "url_data",
          "user" -> "root",
          "password" -> "root"
        )).load()
    
        //测试
        //urlData.printSchema()
        //urlData.show()
    
        //3.过滤数据
        val fData: Dataset[Row] = urlData.filter(x => {
          //uid>2 如何拿到uid?
          x.getAs[Int](0) > 2
        })
    
        fData.show()
        sparkSession.stop()
      }
    }

    mysql数据:

    二、Spark写出数据格式

    import org.apache.spark.sql.{DataFrame, Dataset, Row, SparkSession}
    
    object JdbcSource1 {
      def main(args: Array[String]): Unit = {
        //1.sparkSQL 创建sparkSession
        val sparkSession: SparkSession = SparkSession.builder().appName("JdbcSource")
          .master("local[2]").getOrCreate()
    
        import sparkSession.implicits._
        //2.加载数据源
        val urlData: DataFrame = sparkSession.read.format("jdbc").options(Map(
          "url" -> "jdbc:mysql://localhost:3306/urlcount",
          "driver" -> "com.mysql.jdbc.Driver",
          "dbtable" -> "url_data",
          "user" -> "root",
          "password" -> "root"
        )).load()
    
        //3.uid>2
        val r = urlData.filter($"uid" > 2)
        val rs: DataFrame = r.select($"xueyuan", $"number_one")
    
        //val rs: DataFrame = r.select($"xueyuan")
    
        //写入以text格式
        //rs.write.text("e:/saveText")
    
        //写入以json格式
        //rs.write.json("e:/saveJson")
    
        //写入以csv格式
        rs.write.csv("e:/saveCsv")
    
        //rs.write.parquet("e:/savePar")
    
        rs.show()
        sparkSession.stop()
      }
    }

    三、Json作为数据源

    import org.apache.spark.sql.{DataFrame, Dataset, Row, SparkSession}
    
    object JsonSource {
      def main(args: Array[String]): Unit = {
        //1.创建sparkSession
        val sparkSession: SparkSession = SparkSession.builder().appName("JsonSource")
          .master("local[2]").getOrCreate()
    
        import sparkSession.implicits._
        //2.读取json数据源
        val jread: DataFrame = sparkSession.read.json("e:/saveJson")
    
        //3.处理数据
        val fread: Dataset[Row] = jread.filter($"xueyuan" === "bigdata")
    
        //4.触发action
        fread.show()
    
        //5.关闭资源
        sparkSession.stop()
      }
    }

    四、Csv作为数据源

    import org.apache.spark.sql.{DataFrame, Dataset, Row, SparkSession}
    
    object CsvSource {
      def main(args: Array[String]): Unit = {
        //1.创建sparkSession
        val sparkSession: SparkSession = SparkSession.builder().appName("CsvSource")
          .master("local[2]").getOrCreate()
    
        import sparkSession.implicits._
        //2.读取csv数据源
        val cread: DataFrame = sparkSession.read.csv("e:/saveCsv")
    
        //3.处理数据
        val rdf = cread.toDF("id", "xueyuan")
        val rs = rdf.filter($"id" <= 3)
    
        //4.触发action
        rs.show()
    
        //5.关闭资源
        sparkSession.stop()
      }
    }
  • 相关阅读:
    xgqfrms™, xgqfrms® : xgqfrms's offical website of GitHub!
    xgqfrms™, xgqfrms® : xgqfrms's offical website of GitHub!
    svn 启动项目报错,项目被lock
    BigDecimal 用法详解
    MySQL 规范
    Tomcat 详解URL请求
    Tomcat Servlet工作原理
    Tomcat Context容器和Wrapper容器
    Tomcat 核心组件 Container容器相关
    Tomcat 核心组件 Connector
  • 原文地址:https://www.cnblogs.com/areyouready/p/10296704.html
Copyright © 2011-2022 走看看