zoukankan      html  css  js  c++  java
  • RDD的依赖关系


    scala> val personRDD=sc.textFile("/tmp/person.txt")
    personRDD: org.apache.spark.rdd.RDD[String] = /tmp/person.txt MapPartitionsRDD[39] at textFile at <console>:25

    scala> val ageRDD=personRDD.map(x=>{val arr=x.split(",");(arr(2),1)})
    ageRDD: org.apache.spark.rdd.RDD[(String, Int)] = MapPartitionsRDD[41] at map at <console>:27

    scala> val grouprdd=ageRDD.groupByKey()
    grouprdd: org.apache.spark.rdd.RDD[(String, Iterable[Int])] = ShuffledRDD[40] at groupByKey at <console>:29

    scala> grouprdd.dependencies.foreach(dep=>{println(dep.getClass);println(dep.rdd);println(dep.rdd.partitions);println(dep.rdd.partitions.size)})
    class org.apache.spark.ShuffleDependency
    MapPartitionsRDD[34] at map at <console>:27
    [Lorg.apache.spark.Partition;@2e33dd0d
    2

    scala> personRDD.dependencies.foreach(dep=>{println(dep.getClass);println(dep.rdd);println(dep.rdd.partitions);println(dep.rdd.partitions.size)})
    class org.apache.spark.OneToOneDependency
    /tmp/person.txt HadoopRDD[38] at textFile at <console>:25
    [Lorg.apache.spark.Partition;@5b0f052f
    2

    scala> ageRDD.dependencies.foreach(dep=>{println(dep.getClass);println(dep.rdd);println(dep.rdd.partitions);println(dep.rdd.partitions.size)})
    class org.apache.spark.OneToOneDependency
    /tmp/person.txt MapPartitionsRDD[39] at textFile at <console>:25
    [Lorg.apache.spark.Partition;@5b0f052f
    2

  • 相关阅读:
    CSS命名法
    CSS伪类
    CSS总结
    CSS应用给网页元素的几种方式总结
    CSS语法小记
    细谈WEB标准
    Spark 个人实战系列(1)--Spark 集群安装
    HBase 实战(1)--HBase的数据导入方式
    shell 脚本实战笔记(9)--linux自动批量添加用户
    shell 脚本实战笔记(8)--ssh免密码输入执行命令
  • 原文地址:https://www.cnblogs.com/playforever/p/9450010.html
Copyright © 2011-2022 走看看