zoukankan      html  css  js  c++  java
  • Spark- Action实战

    Spark- Action实战

    package cn.rzlee.spark.core
    
    import org.apache.spark.rdd.RDD
    import org.apache.spark.{SparkConf, SparkContext}
    
    object ActionOperation {
      def main(args: Array[String]): Unit = {
        //reduce()
        //collect()
        //count()
        //take()
        //saveAsTextFile()
        countByKey()
      }
    
    
      def reduce(): Unit ={
        val conf = new SparkConf().setAppName(this.getClass.getSimpleName).setMaster("local[1]")
        val sc = new SparkContext(conf)
    
        val numbersList = Array(1,2,3,4,5,6,7,8,9,10)
        val numbersRdd: RDD[Int] = sc.parallelize(numbersList,1)
        val sum: Int = numbersRdd.reduce(_+_)
        println(sum)
      }
    
    
      def collect(): Unit ={
        val conf = new SparkConf().setAppName(this.getClass.getSimpleName).setMaster("local[1]")
        val sc = new SparkContext(conf)
    
        val numbersList = Array(1,2,3,4,5,6,7,8,9,10)
        val numbersRdd: RDD[Int] = sc.parallelize(numbersList,1)
    
        val doubleNumbers: RDD[Int] = numbersRdd.map(num=>num*2)
        for(num <- doubleNumbers){
          println(num)
        }
      }
    
      def count(): Unit ={
        val conf = new SparkConf().setAppName(this.getClass.getSimpleName).setMaster("local[1]")
        val sc = new SparkContext(conf)
    
        val numbersList = Array(1,2,3,4,5,6,7,8,9,10)
        val numbersRdd: RDD[Int] = sc.parallelize(numbersList,1)
        val count: Long = numbersRdd.count()
        println(count)
      }
    
    
    
    
      def take(): Unit ={
        val conf = new SparkConf().setAppName(this.getClass.getSimpleName).setMaster("local[1]")
        val sc = new SparkContext(conf)
    
        val numbersList = Array(1,2,3,4,5,6,7,8,9,10)
        val numbersRdd: RDD[Int] = sc.parallelize(numbersList,1)
    
        val top3Numners = numbersRdd.take(3)
        for (num <- top3Numners){
          println(num)
        }
      }
    
      def saveAsTextFile(): Unit ={
        val conf = new SparkConf().setAppName(this.getClass.getSimpleName).setMaster("local[1]")
        val sc = new SparkContext(conf)
    
        val numbersList = Array(1,2,3,4,5,6,7,8,9,10)
        val numbersRdd: RDD[Int] = sc.parallelize(numbersList,1)
        numbersRdd.saveAsTextFile("C:\Users\txdyl\Desktop\log\out\saveAsTest\")
      }
    
      def countByKey(): Unit ={
        val conf = new SparkConf().setAppName(this.getClass.getSimpleName).setMaster("local[1]")
        val sc = new SparkContext(conf)
    
        val studentList = Array(Tuple2("class1","tom"),Tuple2("class2","leo"), Tuple2("class1","jeo"),Tuple2("class2","jime"))
        val students: RDD[(String, String)] = sc.parallelize(studentList, 1)
        val studentsCounts: collection.Map[String, Long] = students.countByKey()
        println(studentsCounts)
      }
    
      // foreach是在远程机器上执行的,而不是将数据拉取到本地一条条执行,所以性能要比collect要高很多。
    
    }
  • 相关阅读:
    HDU——1596find the safest road(邻接矩阵+优先队列SPFA)
    POJ——3264Balanced Lineup(RMQ模版水题)
    周赛Problem 1025: Hkhv love spent money(RMQ)
    Problem 1004: 蛤玮打扫教室(区间覆盖端点记录)
    周赛Problem 1021: 分蛋糕(埃拉托斯特尼筛法)
    廖雪峰Java11多线程编程-1线程的概念-5中断线程
    廖雪峰Java11多线程编程-1线程的概念-3线程的状态
    廖雪峰Java11多线程编程-1线程的概念-2创建新线程
    廖雪峰Java11多线程编程-1线程的概念-1多线程简介
    廖雪峰Java10加密与安全-6数字证书-1数字证书
  • 原文地址:https://www.cnblogs.com/RzCong/p/9893573.html
Copyright © 2011-2022 走看看