zoukankan      html  css  js  c++  java
  • Spark 大数据平台 Introduction part 2 coding

    Basic Functions

    sc.parallelize(List(1,2,3,4,5,6)).map(_ * 2).filter(_ > 5).collect()
    *** res: Array[Int] = Array(6, 8, 10, 12) ***
    
    val rdd = sc.parallelize(List(1,2,3,4,5,6,7,8,9,10))
    rdd.reduce(_+_)
    *** res: Int = 55 ***
    

    union & intersection & join & lookup

    val rdd1 = sc.parallelize(List(("a", 1), ("a", 2), ("b", 1), ("b", 3)))
    val rdd2 = sc.parallelize(List(("a", 3), ("a", 4), ("b", 1), ("b", 2)))
    
    val unionRDD = rdd1.union(rdd2)
    unionRDD.collect() 
    *** res: Array((a,1), (a,2), (b,1), (b,3), (a,3), (a,4), (b,1), (b,2)) ***
    
    val intersectionRDD = rdd1.intersection(rdd2)
    intersectionRDD.collect() 
    *** res: Array[(String, Int)] = Array((b,1)) ***
    
    val joinRDD = rdd1.join(rdd2)
    joinRDD.collect()
    *** res: Array[(String, (Int, Int))] = Array((a,(1,3)), (a,(1,4)), (a,(2,3)), (a,(2,4)), (b,(1,1)), (b,(1,2)), (b,(3,1)), (b,(3,2))) ***
    
    rdd1.lookup("a")
    *** res: Seq[Int] = WrappedArray(1, 2) *** 
    
    unionRDD.lookup("a")
    *** res: Seq[Int] = WrappedArray(1, 2, 3, 4) ***
    
    joinRDD.lookup("a")
    *** res: Seq[(Int, Int)] = ArrayBuffer((1,3), (1,4), (2,3), (2,4)) ***
    

    chars count example

    val rdd = sc.textFile("/Users/tony/spark/spark-xiaoxiang-v1/chapter-01/char.data")
    
    val charCount = rdd.flatMap(_.split(" "))
                       .map(char => (char.toLowerCase, 1))
                       .reduceByKey(_+_)
    charCount.collect()
    
    charCount.saveAsTextFile("/Users/tony/spark/spark-xiaoxiang-v1/chapter-01/result")
    
    val charCountSort = rdd.flatMap(_.split(" "))
                           .map(char => (char.toLowerCase, 1))
                           .reduceByKey(_+_)
                           .map( p => (p._2, p._1) )
                           .sortByKey(false)
                           .map( p => (p._2, p._1) )
    charCountSort.collect()
    
  • 相关阅读:
    [补]2019HDU杭电多校第一场A
    [补]2019nowcoder牛客第三场F(暂且)
    [补]2019nowcoder牛客第一场E、I
    [学]从零(多项式基础与FFT)开始BM学习笔记
    [补]2019nowcoder牛客第二场E、H(upd0730)
    从一个简单的例子对win 服务程序进行讲解
    HTTP协议学习记录及总结
    Windows身份验证与forms身份验证的结合
    关于Sql server 的 几道面试题
    PlaceHolder控件的使用
  • 原文地址:https://www.cnblogs.com/rainbow203/p/Spark-da-shu-ju-ping-tai-Introduction-part-2-codin.html
Copyright © 2011-2022 走看看