zoukankan      html  css  js  c++  java
  • SparkSQL DSL 随便写写

    @Test
    def functionTest() = {
    Logger.getLogger("org").setLevel(Level.WARN)
    val spark = getSpark("functionTest")
    val sc = spark.sparkContext
    import spark.implicits._

    //-------------------------
    val stus = Seq(Student(1001, "jack", "M", 20),
    Student(1004, "mary", "F", 18),
    Student(1017, "alice", "F", 23),
    Student(1026, "tom", "M", 20),
    Student(1007, "leo", "M", 22),
    Student(1008, "wood", "M", 22)).toDS()

    /*

    //---------select----------
    stus.select("id","name").show()
    stus.select($"id",$"name",$"age"+10).show()
    import org.apache.spark.sql.functions._
    stus.select(col("id"),col("name")).show()
    stus.select(stus("id"),stus("gender")).show()
    stus.selectExpr("id","name","age/10").show()

    //-----filter == where------
    stus.filter(stu => stu.age >22).show()
    stus.filter("name in ('jack','alice')").show()
    stus.filter($"gender" === "M").show()

    // stus.where() //底层调用filter
    //--------group by---------
    stus.groupBy("gender").count().show()
    stus.groupBy("gender").sum("age").show()
    val map = Map(("age","sum"),("*","count"))
    stus.groupBy("gender").agg(map).show()

    stus.groupBy("gender").agg(("age","sum"),("age","count")).show()

    println("--------神奇操作---------")
    stus.groupBy("gender").count().show()
    stus.groupBy("gender","age").count().show()
    //pivot 透视 把未分组的列中的数据进行分组,并转置成列名,再对每个列名下的数据进行聚合
    stus.groupBy("gender").pivot("age").count().show()

    //--------order by---------
    stus.orderBy($"age" desc).show()

    //-------- join ---------
    val scos = Seq(Score(1001,"语文",60.0),
    Score(1004,"数学",90.0),
    Score(1019,"物理",70.0),
    Score(1099,"化学",80.0)).toDS()
    stus.join(scos,stus("id") === scos("id"),"inner").show()
    stus.join(scos,stus("id") === scos("id"),"left").show()
    stus.join(scos,stus("id") === scos("id"),"right").show()
    stus.join(scos,stus("id") === scos("id"),"full").show()
    */

    val s = Seq("y", "e", "k")
    val fun: String => Boolean = (name: String) => {
    val last = name.substring(name.length-1)
    s.contains(last)
    }
    spark.udf.register("lastIsX",fun)

    stus.createTempView("student")

    spark.sql("select * from student where lastIsX(name)").show()


    spark.close()
    }
  • 相关阅读:
    第十八章、使用集合
    第十九章、枚举集合
    第十七章、泛型概述
    第十六章、使用索引器
    第十五章、实现属性以访问字段
    第十四章、使用垃圾回收和资源管理
    第十三章、创建接口和定义抽象类
    AtCoder Grand Contest 018 E
    AtCoder Regular Contest 059 F Unhappy Hacking
    Codeforces 464E. The Classic Problem
  • 原文地址:https://www.cnblogs.com/Diyo/p/11410895.html
Copyright © 2011-2022 走看看