zoukankan      html  css  js  c++  java
  • SparkSQL DSL 随便写写

    @Test
    def functionTest() = {
    Logger.getLogger("org").setLevel(Level.WARN)
    val spark = getSpark("functionTest")
    val sc = spark.sparkContext
    import spark.implicits._

    //-------------------------
    val stus = Seq(Student(1001, "jack", "M", 20),
    Student(1004, "mary", "F", 18),
    Student(1017, "alice", "F", 23),
    Student(1026, "tom", "M", 20),
    Student(1007, "leo", "M", 22),
    Student(1008, "wood", "M", 22)).toDS()

    /*

    //---------select----------
    stus.select("id","name").show()
    stus.select($"id",$"name",$"age"+10).show()
    import org.apache.spark.sql.functions._
    stus.select(col("id"),col("name")).show()
    stus.select(stus("id"),stus("gender")).show()
    stus.selectExpr("id","name","age/10").show()

    //-----filter == where------
    stus.filter(stu => stu.age >22).show()
    stus.filter("name in ('jack','alice')").show()
    stus.filter($"gender" === "M").show()

    // stus.where() //底层调用filter
    //--------group by---------
    stus.groupBy("gender").count().show()
    stus.groupBy("gender").sum("age").show()
    val map = Map(("age","sum"),("*","count"))
    stus.groupBy("gender").agg(map).show()

    stus.groupBy("gender").agg(("age","sum"),("age","count")).show()

    println("--------神奇操作---------")
    stus.groupBy("gender").count().show()
    stus.groupBy("gender","age").count().show()
    //pivot 透视 把未分组的列中的数据进行分组,并转置成列名,再对每个列名下的数据进行聚合
    stus.groupBy("gender").pivot("age").count().show()

    //--------order by---------
    stus.orderBy($"age" desc).show()

    //-------- join ---------
    val scos = Seq(Score(1001,"语文",60.0),
    Score(1004,"数学",90.0),
    Score(1019,"物理",70.0),
    Score(1099,"化学",80.0)).toDS()
    stus.join(scos,stus("id") === scos("id"),"inner").show()
    stus.join(scos,stus("id") === scos("id"),"left").show()
    stus.join(scos,stus("id") === scos("id"),"right").show()
    stus.join(scos,stus("id") === scos("id"),"full").show()
    */

    val s = Seq("y", "e", "k")
    val fun: String => Boolean = (name: String) => {
    val last = name.substring(name.length-1)
    s.contains(last)
    }
    spark.udf.register("lastIsX",fun)

    stus.createTempView("student")

    spark.sql("select * from student where lastIsX(name)").show()


    spark.close()
    }
  • 相关阅读:
    sql 中 列转换成拼音首字母简写【邹建版】
    取一个任意数所有 和的等式
    sql 汉字转全拼音(非首字母)
    实现消息来时让网页标题闪动
    hdoj 1754 I Hate It 线段树(二)
    nyoj 247 虚拟城市之旅 路径压缩
    hdoj 1247 字典树分词 strncpy函数
    hdoj 1671字典树水题之三 静态数组节约内存法
    sort函数
    hdoj 1166 排兵布阵 线段树()
  • 原文地址:https://www.cnblogs.com/Diyo/p/11410895.html
Copyright © 2011-2022 走看看