zoukankan      html  css  js  c++  java
  • RDD & java 类 (反射)构建 DataFrame

    import org.apache.spark.SparkConf
    import org.apache.spark.SparkContext
    import org.apache.spark.sql.SQLContext
    
    object RDD2DataFrameByReflectionScala {
      case class Person(name: String, age: Int)
      def main(args: Array[String]): Unit = {
        val conf = new SparkConf() //创建sparkConf对象
        conf.setAppName("My First Spark App") //设置应用程序的名称,在程序运行的监控页面可以看到名称
        conf.setMaster("local")
        val sc = new SparkContext(conf)
        val sqlContext = new SQLContext(sc)
        import sqlContext.implicits._
        
        //从 txt 读取的数据 要跟 Person 类定义的属性 匹配  String  Integer
        val people = sc.textFile("Peoples.txt").map(_.split(",")).map(p => Person(p(1), p(2).trim.toInt)).toDF()
        
        
        people.registerTempTable("people")
        
        
        val teenagers = sqlContext.sql("SELECT name, age FROM people WHERE age >= 6 AND age <= 19")
        
        /**
         * 对dataFrame使用map算子后,返回类型是RDD<Row>
         */
        teenagers.map(t => "Name: " + t(0)).collect().foreach(println)
        
        // or by field name:
        teenagers.map(t => "Name: " + t.getAs[String]("name")).collect().foreach(println)
      }
    }
    

  • 相关阅读:
    open stack总结
    Nginx操作命令
    Nginx 配置详解
    Linux 常用命令-- top
    CEPH 使用SSD日志盘+SATA数据盘, 随OSD数目递增对性能影响的递增测试
    MyCat水平分库
    MyCat垂直分库
    MyCat基本知识
    utf8mb4复杂昵称问题
    Power安装linux-BIG ENDIAN mysql编译安装
  • 原文地址:https://www.cnblogs.com/TendToBigData/p/10501292.html
Copyright © 2011-2022 走看看