zoukankan      html  css  js  c++  java
  • RDD的依赖关系


    scala> val personRDD=sc.textFile("/tmp/person.txt")
    personRDD: org.apache.spark.rdd.RDD[String] = /tmp/person.txt MapPartitionsRDD[39] at textFile at <console>:25

    scala> val ageRDD=personRDD.map(x=>{val arr=x.split(",");(arr(2),1)})
    ageRDD: org.apache.spark.rdd.RDD[(String, Int)] = MapPartitionsRDD[41] at map at <console>:27

    scala> val grouprdd=ageRDD.groupByKey()
    grouprdd: org.apache.spark.rdd.RDD[(String, Iterable[Int])] = ShuffledRDD[40] at groupByKey at <console>:29

    scala> grouprdd.dependencies.foreach(dep=>{println(dep.getClass);println(dep.rdd);println(dep.rdd.partitions);println(dep.rdd.partitions.size)})
    class org.apache.spark.ShuffleDependency
    MapPartitionsRDD[34] at map at <console>:27
    [Lorg.apache.spark.Partition;@2e33dd0d
    2

    scala> personRDD.dependencies.foreach(dep=>{println(dep.getClass);println(dep.rdd);println(dep.rdd.partitions);println(dep.rdd.partitions.size)})
    class org.apache.spark.OneToOneDependency
    /tmp/person.txt HadoopRDD[38] at textFile at <console>:25
    [Lorg.apache.spark.Partition;@5b0f052f
    2

    scala> ageRDD.dependencies.foreach(dep=>{println(dep.getClass);println(dep.rdd);println(dep.rdd.partitions);println(dep.rdd.partitions.size)})
    class org.apache.spark.OneToOneDependency
    /tmp/person.txt MapPartitionsRDD[39] at textFile at <console>:25
    [Lorg.apache.spark.Partition;@5b0f052f
    2

  • 相关阅读:
    MySQL主从复制原理
    MySQL调优
    apache禁止php解析--安全
    apache禁止指定的user_agent访问
    python---日常练习
    字符、字节的概念和区别;编码概念
    Django模型初识
    git安装
    Django--Hello
    fillder---断言/打断点,更改提交数据
  • 原文地址:https://www.cnblogs.com/playforever/p/9450010.html
Copyright © 2011-2022 走看看