zoukankan      html  css  js  c++  java
  • RDD & java 类 (反射)构建 DataFrame

    import org.apache.spark.SparkConf
    import org.apache.spark.SparkContext
    import org.apache.spark.sql.SQLContext
    
    object RDD2DataFrameByReflectionScala {
      case class Person(name: String, age: Int)
      def main(args: Array[String]): Unit = {
        val conf = new SparkConf() //创建sparkConf对象
        conf.setAppName("My First Spark App") //设置应用程序的名称,在程序运行的监控页面可以看到名称
        conf.setMaster("local")
        val sc = new SparkContext(conf)
        val sqlContext = new SQLContext(sc)
        import sqlContext.implicits._
        
        //从 txt 读取的数据 要跟 Person 类定义的属性 匹配  String  Integer
        val people = sc.textFile("Peoples.txt").map(_.split(",")).map(p => Person(p(1), p(2).trim.toInt)).toDF()
        
        
        people.registerTempTable("people")
        
        
        val teenagers = sqlContext.sql("SELECT name, age FROM people WHERE age >= 6 AND age <= 19")
        
        /**
         * 对dataFrame使用map算子后,返回类型是RDD<Row>
         */
        teenagers.map(t => "Name: " + t(0)).collect().foreach(println)
        
        // or by field name:
        teenagers.map(t => "Name: " + t.getAs[String]("name")).collect().foreach(println)
      }
    }
    

  • 相关阅读:
    结构~函数~输入输出
    常用缀名
    结构
    枚举
    int argc char*argv[]
    字符串的操作
    字符串函数#include<string.h>
    指针的应用
    2019.1.25~2019.1.30学习总结
    v-for
  • 原文地址:https://www.cnblogs.com/TendToBigData/p/10501292.html
Copyright © 2011-2022 走看看