zoukankan      html  css  js  c++  java
  • Spark- Action实战

    Spark- Action实战

    package cn.rzlee.spark.core
    
    import org.apache.spark.rdd.RDD
    import org.apache.spark.{SparkConf, SparkContext}
    
    object ActionOperation {
      def main(args: Array[String]): Unit = {
        //reduce()
        //collect()
        //count()
        //take()
        //saveAsTextFile()
        countByKey()
      }
    
    
      def reduce(): Unit ={
        val conf = new SparkConf().setAppName(this.getClass.getSimpleName).setMaster("local[1]")
        val sc = new SparkContext(conf)
    
        val numbersList = Array(1,2,3,4,5,6,7,8,9,10)
        val numbersRdd: RDD[Int] = sc.parallelize(numbersList,1)
        val sum: Int = numbersRdd.reduce(_+_)
        println(sum)
      }
    
    
      def collect(): Unit ={
        val conf = new SparkConf().setAppName(this.getClass.getSimpleName).setMaster("local[1]")
        val sc = new SparkContext(conf)
    
        val numbersList = Array(1,2,3,4,5,6,7,8,9,10)
        val numbersRdd: RDD[Int] = sc.parallelize(numbersList,1)
    
        val doubleNumbers: RDD[Int] = numbersRdd.map(num=>num*2)
        for(num <- doubleNumbers){
          println(num)
        }
      }
    
      def count(): Unit ={
        val conf = new SparkConf().setAppName(this.getClass.getSimpleName).setMaster("local[1]")
        val sc = new SparkContext(conf)
    
        val numbersList = Array(1,2,3,4,5,6,7,8,9,10)
        val numbersRdd: RDD[Int] = sc.parallelize(numbersList,1)
        val count: Long = numbersRdd.count()
        println(count)
      }
    
    
    
    
      def take(): Unit ={
        val conf = new SparkConf().setAppName(this.getClass.getSimpleName).setMaster("local[1]")
        val sc = new SparkContext(conf)
    
        val numbersList = Array(1,2,3,4,5,6,7,8,9,10)
        val numbersRdd: RDD[Int] = sc.parallelize(numbersList,1)
    
        val top3Numners = numbersRdd.take(3)
        for (num <- top3Numners){
          println(num)
        }
      }
    
      def saveAsTextFile(): Unit ={
        val conf = new SparkConf().setAppName(this.getClass.getSimpleName).setMaster("local[1]")
        val sc = new SparkContext(conf)
    
        val numbersList = Array(1,2,3,4,5,6,7,8,9,10)
        val numbersRdd: RDD[Int] = sc.parallelize(numbersList,1)
        numbersRdd.saveAsTextFile("C:\Users\txdyl\Desktop\log\out\saveAsTest\")
      }
    
      def countByKey(): Unit ={
        val conf = new SparkConf().setAppName(this.getClass.getSimpleName).setMaster("local[1]")
        val sc = new SparkContext(conf)
    
        val studentList = Array(Tuple2("class1","tom"),Tuple2("class2","leo"), Tuple2("class1","jeo"),Tuple2("class2","jime"))
        val students: RDD[(String, String)] = sc.parallelize(studentList, 1)
        val studentsCounts: collection.Map[String, Long] = students.countByKey()
        println(studentsCounts)
      }
    
      // foreach是在远程机器上执行的,而不是将数据拉取到本地一条条执行,所以性能要比collect要高很多。
    
    }
  • 相关阅读:
    [JavaWeb基础] 001.简单的JavaWeb代码和Tomcat配置部署
    [程序员短壁纸]2015年05月
    [注]什么是用户?估计90%人不知道
    [注]排行榜相关知识
    [注]微信公众号的运营推广总结方案(持续更新)
    [注]6W运营法则教你盘活社区内容运营
    [注]一条牛B的游戏推送要具备哪些条件?
    [微信营销企划之路]001.环境搭建(XAMPP+WeiPHP)
    [Python基础]005.语法(4)
    Java多线程设计模式
  • 原文地址:https://www.cnblogs.com/RzCong/p/9893573.html
Copyright © 2011-2022 走看看