zoukankan      html  css  js  c++  java
  • SQL操作Spark SQL--BasicSQLTestt

    object BasicSQLTest {
      def main(args: Array[String]): Unit = {
        val spark = SparkSession
          .builder()
          .appName("BasicSQLTest")
          .getOrCreate()
    
        val sessionDf = spark.read.parquet(s"${BASE_PATH}/trackerSession")
        sessionDf.printSchema()
        sessionDf.createOrReplaceTempView("trackerSession")
    
        //查询所有记录
        val sessionRecords = spark.sql("select * from trackerSession")
        sessionRecords.show()
    
        //过滤
        val filterSession =
          spark.sql("select distinct(cookie) from trackerSession where cookie_label = '固执'")
        filterSession.show()
    
        //先join, 然后再分组SQL
        val logDf = spark.read.parquet(s"${BASE_PATH}/trackerLog")
        logDf.printSchema()
        logDf.createOrReplaceTempView("trackerLog")
    
        val sql =
          """select tl.url, count(*) from trackerLog tl join trackerSession ts on tl.cookie = ts.cookie
            | where ts.cookie_label = '固执' and tl.log_type='pageview'
            | group by tl.url
            | order by tl.url desc
          """.stripMargin
    
        spark.sql(sql).show(false)
    
        //函数(内置函数)
        //单行函数
        spark.sql("select session_server_time, " +
          "hour(session_server_time) as hour from trackerSession").show(false)
        spark.sql("select click_count, cookie_label, " +
          "concat(click_count, cookie_label) as c from trackerSession").show()
        //多行函数(聚合函数)
        val ccmDF = spark.sql("select max(click_count) as ccm from trackerSession")
        ccmDF.printSchema()
        ccmDF.show()
    
        //udf
        spark.udf.register("myUDF", (arg1: Int, arg2: String) => {
          if (arg1 > 1 && arg2.equals("固执")) {
            arg2 + arg1
          } else {
            arg2 + "less"
          }
        })
        spark.sql("select click_count, cookie_label, myUDF(click_count, cookie_label) " +
          "as c from trackerSession").show()
    
        spark.stop()
      }
    }
    

      

  • 相关阅读:
    php字符串相加
    elementUI的input输入一个字符就失去焦点问题
    js鸡尾酒排序算法
    js快速排序算法
    js冒泡排序算法改进
    js实现队列
    EXIF.js 读取图片的方向
    new Image().src资源重复请求问题
    canvas绘制圆图输出图片格式
    去掉"You are running Vue in development mode"提示
  • 原文地址:https://www.cnblogs.com/tesla-turing/p/11489111.html
Copyright © 2011-2022 走看看