zoukankan      html  css  js  c++  java
  • Spark运行各个时间段的解释

    package org.apache.spark.ui


    private[spark] object ToolTips {
      val SCHEDULER_DELAY =
        """Scheduler delay includes time to ship the task from the scheduler to
           the executor, and time to send the task result from the executor to the scheduler. If
           scheduler delay is large, consider decreasing the size of tasks or decreasing the size
           of task results."""

      val TASK_DESERIALIZATION_TIME =
        """Time spent deserializing the task closure on the executor, including the time to read the
           broadcasted task."""

      val KSHUFFLE_READ_BLOCED_TIME =
        "Time that the task spent blocked waiting for shuffle data to be read from remote machines."

      val INPUT = "Bytes and records read from Hadoop or from Spark storage."

      val OUTPUT = "Bytes and records written to Hadoop."

      val STORAGE_MEMORY =
        "Memory used / total available memory for storage of data " +
          "like RDD partitions cached in memory. "

      val SHUFFLE_WRITE =
        "Bytes and records written to disk in order to be read by a shuffle in a future stage."

      val SHUFFLE_READ =
        """Total shuffle bytes and records read (includes both data read locally and data read from
           remote executors). """

      val SHUFFLE_READ_REMOTE_SIZE =
        """Total shuffle bytes read from remote executors. This is a subset of the shuffle
           read bytes; the remaining shuffle data is read locally. """

      val GETTING_RESULT_TIME =
        """Time that the driver spends fetching task results from workers. If this is large, consider
           decreasing the amount of data returned from each task."""

      val RESULT_SERIALIZATION_TIME =
        """Time spent serializing the task result on the executor before sending it back to the
           driver."""

      val GC_TIME =
        """Time that the executor spent paused for Java garbage collection while the task was
           running."""

      val JOB_TIMELINE =
        """Shows when jobs started and ended and when executors joined or left. Drag to scroll.
           Click Enable Zooming and use mouse wheel to zoom in/out."""

      val STAGE_TIMELINE =
        """Shows when stages started and ended and when executors joined or left. Drag to scroll.
           Click Enable Zooming and use mouse wheel to zoom in/out."""

      val JOB_DAG =
        """Shows a graph of stages executed for this job, each of which can contain
           multiple RDD operations (e.g. map() and filter()), and of RDDs inside each operation
           (shown as dots)."""

      val STAGE_DAG =
        """Shows a graph of RDD operations in this stage, and RDDs inside each one. A stage can run
           multiple operations (e.g. two map() functions) if they can be pipelined. Some operations
           also create multiple RDDs internally. Cached RDDs are shown in green.
        """
    }

  • 相关阅读:
    计算机网络知识技能水平的测评试题
    Socket与系统调用深度分析
    学习构建调试Linux内核网络代码的环境MenuOS系统
    深入学习socket网络编程,以java语言为例
    网络配置工具iproute2和net-tools的基本原理和基本使用方法
    Linux系统学习总结报告
    结合中断上下文切换和进程上下文切换分析Linux内核的一般执行过程
    深入理解系统调用-40号调用
    基于mykernel2.0 编写一个操作系统内核
    交互式多媒体图书平台的设计与实现
  • 原文地址:https://www.cnblogs.com/wzyxidian/p/4853619.html
Copyright © 2011-2022 走看看