zoukankan      html  css  js  c++  java
  • Spark RDD转换为DataFrame

     
    #构造case class,利用反射机制隐式转换
    scala> import spark.implicits._
    scala> val rdd= sc.textFile("input/textdata.txt")
    scala> case class Person(id:Int,name:String)
    scala> val df = rdd.map(_.split(",")).map(x=>Person(x(0).toInt,x(1))).toDF
    scala> df.show
    +---+--------+
    | id| name|
    +---+--------+
    | 1|zhangsan|
    | 2| lisi|
    | 3| wangwu|
    | 4| zhaoliu|
    +---+--------+
    
    #通过schema,Row构造dataframe
    scala> import org.apache.spark.sql.types._
    scala> import org.apache.spark.sql.Row
    scala> val structFields = Array(StructField("id",IntegerType,true),StructField("name",StringType,true))
    scala> val structType = StructType(structFields) #创建schema结构
    scala> val lines= sc.textFile("input/textdata.txt")
    scala> val rdd = lines.map(_.split(",")).map(x=>Row(x(0).toInt,x(1))) #创建RDD[Row]
    scala> val df = spark.createDataFrame(rdd,structType) #通过RDD[Row],schema构建DataFrame
    scala> df.show
    +---+--------+
    | id| name|
    +---+--------+
    | 1|zhangsan|
    | 2| lisi|
    | 3| wangwu|
    | 4| zhaoliu|
    +---+--------+
    

      

    cat textdata.txt
    1,zhangsan
    2,lisi
    3,wangwu
    4,zhaoliu
    

      

  • 相关阅读:
    C#
    C#
    C#
    创建一个ROS包
    创建一个工作空间
    ROS的文件系统
    单一职责原因
    策略模式
    UML类图
    简单工厂模式
  • 原文地址:https://www.cnblogs.com/tibit/p/7998274.html
Copyright © 2011-2022 走看看