zoukankan      html  css  js  c++  java
  • SQL操作Spark SQL--BasicSQLTestt

    object BasicSQLTest {
      def main(args: Array[String]): Unit = {
        val spark = SparkSession
          .builder()
          .appName("BasicSQLTest")
          .getOrCreate()
    
        val sessionDf = spark.read.parquet(s"${BASE_PATH}/trackerSession")
        sessionDf.printSchema()
        sessionDf.createOrReplaceTempView("trackerSession")
    
        //查询所有记录
        val sessionRecords = spark.sql("select * from trackerSession")
        sessionRecords.show()
    
        //过滤
        val filterSession =
          spark.sql("select distinct(cookie) from trackerSession where cookie_label = '固执'")
        filterSession.show()
    
        //先join, 然后再分组SQL
        val logDf = spark.read.parquet(s"${BASE_PATH}/trackerLog")
        logDf.printSchema()
        logDf.createOrReplaceTempView("trackerLog")
    
        val sql =
          """select tl.url, count(*) from trackerLog tl join trackerSession ts on tl.cookie = ts.cookie
            | where ts.cookie_label = '固执' and tl.log_type='pageview'
            | group by tl.url
            | order by tl.url desc
          """.stripMargin
    
        spark.sql(sql).show(false)
    
        //函数(内置函数)
        //单行函数
        spark.sql("select session_server_time, " +
          "hour(session_server_time) as hour from trackerSession").show(false)
        spark.sql("select click_count, cookie_label, " +
          "concat(click_count, cookie_label) as c from trackerSession").show()
        //多行函数(聚合函数)
        val ccmDF = spark.sql("select max(click_count) as ccm from trackerSession")
        ccmDF.printSchema()
        ccmDF.show()
    
        //udf
        spark.udf.register("myUDF", (arg1: Int, arg2: String) => {
          if (arg1 > 1 && arg2.equals("固执")) {
            arg2 + arg1
          } else {
            arg2 + "less"
          }
        })
        spark.sql("select click_count, cookie_label, myUDF(click_count, cookie_label) " +
          "as c from trackerSession").show()
    
        spark.stop()
      }
    }
    

      

  • 相关阅读:
    c&c++中的宏
    cmake教程
    什么是Grunt
    npm-install camo
    在node.js中建立你的第一个HTTp服务器
    highChart数据动态更新
    css:before和after中的content属性
    清除float浮动三种方式
    中文标准web字体
    网站桌面端和手机端不同url的设置
  • 原文地址:https://www.cnblogs.com/tesla-turing/p/11489111.html
Copyright © 2011-2022 走看看