zoukankan      html  css  js  c++  java
  • 实验5 Spark SQL 编程初级实践

    源文件内容如下(包含 id,name,age),将数据复制保存到 ubuntu 系统/usr/local/spark 下, 命名为 employee.txt,实现从 RDD 转换得到 DataFrame,并按 id:1,name:Ella,age:36 的格式 打印出 DataFrame 的所有数据。请写出程序代码。(任选一种方法即可)

    1,Ella,36
    2,Bob,29
    3,Jack,29

    代码如下:

    import org.apache.spark.sql.types._
    import org.apache.spark.sql.Encoder
    import org.apache.spark.sql.Row
    import org.apache.spark.sql.SparkSession
    object RDDtoDF {
    def main(args: Array[String]) {
       val spark = SparkSession.builder().appName("RddToDFrame").master("local").getOrCreate()
       import spark.implicits._  
      val  employeeRDD  =spark.sparkContext.textFile("file:///usr/local/spark/employee.txt")
      val schemaString = "id name age"
      val fields = schemaString.split(" ").map(fieldName => StructField(fieldName,
      StringType, nullable = true))
      val schema = StructType(fields)
      val  rowRDD  =  employeeRDD.map(_.split(",")).map(attributes  =>
      Row(attributes(0).trim, attributes(1), attributes(2).trim))
      val employeeDF = spark.createDataFrame(rowRDD, schema)
      employeeDF.createOrReplaceTempView("employee")
      val results = spark.sql("SELECT id,name,age FROM employee")
      results.map(t => "id:"+t(0)+","+"name:"+t(1)+","+"age:"+t(2)).show()
      }
    }

    运行截图:

  • 相关阅读:
    STM32概述
    对xlslib库与libxls库的简易封装
    Makefile.am编写规则
    linux下使用autoconf制作Makefile
    python 登录三次禁止登录
    Linux -- gpasswd
    Linux -- userdel
    Linux -- groupmod
    Linux -- groupadd
    Linux -- passwd
  • 原文地址:https://www.cnblogs.com/z12568/p/10604400.html
Copyright © 2011-2022 走看看