zoukankan      html  css  js  c++  java
  • RDD的依赖关系


    scala> val personRDD=sc.textFile("/tmp/person.txt")
    personRDD: org.apache.spark.rdd.RDD[String] = /tmp/person.txt MapPartitionsRDD[39] at textFile at <console>:25

    scala> val ageRDD=personRDD.map(x=>{val arr=x.split(",");(arr(2),1)})
    ageRDD: org.apache.spark.rdd.RDD[(String, Int)] = MapPartitionsRDD[41] at map at <console>:27

    scala> val grouprdd=ageRDD.groupByKey()
    grouprdd: org.apache.spark.rdd.RDD[(String, Iterable[Int])] = ShuffledRDD[40] at groupByKey at <console>:29

    scala> grouprdd.dependencies.foreach(dep=>{println(dep.getClass);println(dep.rdd);println(dep.rdd.partitions);println(dep.rdd.partitions.size)})
    class org.apache.spark.ShuffleDependency
    MapPartitionsRDD[34] at map at <console>:27
    [Lorg.apache.spark.Partition;@2e33dd0d
    2

    scala> personRDD.dependencies.foreach(dep=>{println(dep.getClass);println(dep.rdd);println(dep.rdd.partitions);println(dep.rdd.partitions.size)})
    class org.apache.spark.OneToOneDependency
    /tmp/person.txt HadoopRDD[38] at textFile at <console>:25
    [Lorg.apache.spark.Partition;@5b0f052f
    2

    scala> ageRDD.dependencies.foreach(dep=>{println(dep.getClass);println(dep.rdd);println(dep.rdd.partitions);println(dep.rdd.partitions.size)})
    class org.apache.spark.OneToOneDependency
    /tmp/person.txt MapPartitionsRDD[39] at textFile at <console>:25
    [Lorg.apache.spark.Partition;@5b0f052f
    2

  • 相关阅读:
    字符串数组和字符串的转换
    项目总结3
    解决几种中文乱码的问题
    ipms的sql语句
    ipms综合管理系统的总结2
    ipms综合管理系统的总结
    简答题汇总
    log4net根据日志类型写入到不同的文件中
    NUnit单元测试初试
    log4net
  • 原文地址:https://www.cnblogs.com/playforever/p/9450010.html
Copyright © 2011-2022 走看看