zoukankan      html  css  js  c++  java
  • SQL操作Spark SQL--BasicSQLTestt

    object BasicSQLTest {
      def main(args: Array[String]): Unit = {
        val spark = SparkSession
          .builder()
          .appName("BasicSQLTest")
          .getOrCreate()
    
        val sessionDf = spark.read.parquet(s"${BASE_PATH}/trackerSession")
        sessionDf.printSchema()
        sessionDf.createOrReplaceTempView("trackerSession")
    
        //查询所有记录
        val sessionRecords = spark.sql("select * from trackerSession")
        sessionRecords.show()
    
        //过滤
        val filterSession =
          spark.sql("select distinct(cookie) from trackerSession where cookie_label = '固执'")
        filterSession.show()
    
        //先join, 然后再分组SQL
        val logDf = spark.read.parquet(s"${BASE_PATH}/trackerLog")
        logDf.printSchema()
        logDf.createOrReplaceTempView("trackerLog")
    
        val sql =
          """select tl.url, count(*) from trackerLog tl join trackerSession ts on tl.cookie = ts.cookie
            | where ts.cookie_label = '固执' and tl.log_type='pageview'
            | group by tl.url
            | order by tl.url desc
          """.stripMargin
    
        spark.sql(sql).show(false)
    
        //函数(内置函数)
        //单行函数
        spark.sql("select session_server_time, " +
          "hour(session_server_time) as hour from trackerSession").show(false)
        spark.sql("select click_count, cookie_label, " +
          "concat(click_count, cookie_label) as c from trackerSession").show()
        //多行函数(聚合函数)
        val ccmDF = spark.sql("select max(click_count) as ccm from trackerSession")
        ccmDF.printSchema()
        ccmDF.show()
    
        //udf
        spark.udf.register("myUDF", (arg1: Int, arg2: String) => {
          if (arg1 > 1 && arg2.equals("固执")) {
            arg2 + arg1
          } else {
            arg2 + "less"
          }
        })
        spark.sql("select click_count, cookie_label, myUDF(click_count, cookie_label) " +
          "as c from trackerSession").show()
    
        spark.stop()
      }
    }
    

      

  • 相关阅读:
    关于sql json数据的处理
    时间函数strtotime的强大
    /usr/bin/install: cannot create regular file `/usr/local/jpeg6/include/jconfig.h'
    linux安装php7.2.7
    关于sql时间方面的处理
    关于centos防火墙的一些问题
    linux 安装ssl 失败原因
    linux安装php7.2.7
    拾取坐标和反查询接口api
    【转】通过点击获取地址等信息、可以传值
  • 原文地址:https://www.cnblogs.com/tesla-turing/p/11489111.html
Copyright © 2011-2022 走看看