zoukankan      html  css  js  c++  java
  • RDD的依赖关系


    scala> val personRDD=sc.textFile("/tmp/person.txt")
    personRDD: org.apache.spark.rdd.RDD[String] = /tmp/person.txt MapPartitionsRDD[39] at textFile at <console>:25

    scala> val ageRDD=personRDD.map(x=>{val arr=x.split(",");(arr(2),1)})
    ageRDD: org.apache.spark.rdd.RDD[(String, Int)] = MapPartitionsRDD[41] at map at <console>:27

    scala> val grouprdd=ageRDD.groupByKey()
    grouprdd: org.apache.spark.rdd.RDD[(String, Iterable[Int])] = ShuffledRDD[40] at groupByKey at <console>:29

    scala> grouprdd.dependencies.foreach(dep=>{println(dep.getClass);println(dep.rdd);println(dep.rdd.partitions);println(dep.rdd.partitions.size)})
    class org.apache.spark.ShuffleDependency
    MapPartitionsRDD[34] at map at <console>:27
    [Lorg.apache.spark.Partition;@2e33dd0d
    2

    scala> personRDD.dependencies.foreach(dep=>{println(dep.getClass);println(dep.rdd);println(dep.rdd.partitions);println(dep.rdd.partitions.size)})
    class org.apache.spark.OneToOneDependency
    /tmp/person.txt HadoopRDD[38] at textFile at <console>:25
    [Lorg.apache.spark.Partition;@5b0f052f
    2

    scala> ageRDD.dependencies.foreach(dep=>{println(dep.getClass);println(dep.rdd);println(dep.rdd.partitions);println(dep.rdd.partitions.size)})
    class org.apache.spark.OneToOneDependency
    /tmp/person.txt MapPartitionsRDD[39] at textFile at <console>:25
    [Lorg.apache.spark.Partition;@5b0f052f
    2

  • 相关阅读:
    PAT 甲级 1115 Counting Nodes in a BST (30 分)
    PAT 甲级 1114 Family Property (25 分)
    PAT 甲级 1114 Family Property (25 分)
    Python Ethical Hacking
    Python Ethical Hacking
    Python Ethical Hacking
    Python Ethical Hacking
    Python Ethical Hacking
    Python Ethical Hacking
    Python Ethical Hacking
  • 原文地址:https://www.cnblogs.com/playforever/p/9450010.html
Copyright © 2011-2022 走看看