scala> val personRDD=sc.textFile("/tmp/person.txt")
personRDD: org.apache.spark.rdd.RDD[String] = /tmp/person.txt MapPartitionsRDD[39] at textFile at <console>:25
scala> val ageRDD=personRDD.map(x=>{val arr=x.split(",");(arr(2),1)})
ageRDD: org.apache.spark.rdd.RDD[(String, Int)] = MapPartitionsRDD[41] at map at <console>:27
scala> val grouprdd=ageRDD.groupByKey()
grouprdd: org.apache.spark.rdd.RDD[(String, Iterable[Int])] = ShuffledRDD[40] at groupByKey at <console>:29
scala> grouprdd.dependencies.foreach(dep=>{println(dep.getClass);println(dep.rdd);println(dep.rdd.partitions);println(dep.rdd.partitions.size)})
class org.apache.spark.ShuffleDependency
MapPartitionsRDD[34] at map at <console>:27
[Lorg.apache.spark.Partition;@2e33dd0d
2
scala> personRDD.dependencies.foreach(dep=>{println(dep.getClass);println(dep.rdd);println(dep.rdd.partitions);println(dep.rdd.partitions.size)})
class org.apache.spark.OneToOneDependency
/tmp/person.txt HadoopRDD[38] at textFile at <console>:25
[Lorg.apache.spark.Partition;@5b0f052f
2
scala> ageRDD.dependencies.foreach(dep=>{println(dep.getClass);println(dep.rdd);println(dep.rdd.partitions);println(dep.rdd.partitions.size)})
class org.apache.spark.OneToOneDependency
/tmp/person.txt MapPartitionsRDD[39] at textFile at <console>:25
[Lorg.apache.spark.Partition;@5b0f052f
2