zoukankan      html  css  js  c++  java
  • RDD & java 类 (反射)构建 DataFrame

    import org.apache.spark.SparkConf
    import org.apache.spark.SparkContext
    import org.apache.spark.sql.SQLContext
    
    object RDD2DataFrameByReflectionScala {
      case class Person(name: String, age: Int)
      def main(args: Array[String]): Unit = {
        val conf = new SparkConf() //创建sparkConf对象
        conf.setAppName("My First Spark App") //设置应用程序的名称,在程序运行的监控页面可以看到名称
        conf.setMaster("local")
        val sc = new SparkContext(conf)
        val sqlContext = new SQLContext(sc)
        import sqlContext.implicits._
        
        //从 txt 读取的数据 要跟 Person 类定义的属性 匹配  String  Integer
        val people = sc.textFile("Peoples.txt").map(_.split(",")).map(p => Person(p(1), p(2).trim.toInt)).toDF()
        
        
        people.registerTempTable("people")
        
        
        val teenagers = sqlContext.sql("SELECT name, age FROM people WHERE age >= 6 AND age <= 19")
        
        /**
         * 对dataFrame使用map算子后,返回类型是RDD<Row>
         */
        teenagers.map(t => "Name: " + t(0)).collect().foreach(println)
        
        // or by field name:
        teenagers.map(t => "Name: " + t.getAs[String]("name")).collect().foreach(println)
      }
    }
    

  • 相关阅读:
    java快速排序代码
    java操作redis实现和mysql数据库的交互
    python 操作mysql数据库存
    JAVA 操作远程mysql数据库实现单表增删改查操作
    URI和URL及URN的区别
    day06_字符集设置
    day6_oracle手工建库
    day08_SGA后半部分
    day08_存储
    day05_sqlloader基础
  • 原文地址:https://www.cnblogs.com/TendToBigData/p/10501292.html
Copyright © 2011-2022 走看看