zoukankan      html  css  js  c++  java
  • Yarn 配置多队列容量调度器

    首先配置hadoop/etc/capacity-scheduler.xml文件

    <!--
      Licensed under the Apache License, Version 2.0 (the "License");
      you may not use this file except in compliance with the License.
      You may obtain a copy of the License at
    
        http://www.apache.org/licenses/LICENSE-2.0
    
      Unless required by applicable law or agreed to in writing, software
      distributed under the License is distributed on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      See the License for the specific language governing permissions and
      limitations under the License. See accompanying LICENSE file.
    -->
    <configuration>
    
       <!-- 容量调度器最多可以容纳多少个job-->
      <property>
        <name>yarn.scheduler.capacity.maximum-applications</name>
        <value>10000</value>
        <description>
          Maximum number of applications that can be pending and running.
        </description>
      </property>
    
      <!-- 当前队列中启动的MRAppMaster进程,所占用的资源可以达到队列总资源的多少
            通过这个参数可以限制队列中提交的Job数量
      -->
      <property>
        <name>yarn.scheduler.capacity.maximum-am-resource-percent</name>
        <value>0.1</value>
        <description>
          Maximum percent of resources in the cluster which can be used to run 
          application masters i.e. controls number of concurrent running
          applications.
        </description>
      </property>
    
      <!-- 为Job分配资源时,使用什么策略进行计算
      -->
      <property>
        <name>yarn.scheduler.capacity.resource-calculator</name>
        <value>org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator</value>
        <description>
          The ResourceCalculator implementation to be used to compare 
          Resources in the scheduler.
          The default i.e. DefaultResourceCalculator only uses Memory while
          DominantResourceCalculator uses dominant-resource to compare 
          multi-dimensional resources such as Memory, CPU etc.
        </description>
      </property>
    
       <!-- root队列中有哪些子队列,新增了a,b队列---->
      <property>
        <name>yarn.scheduler.capacity.root.queues</name>
        <value>default,a,b</value>
        <description>
          The queues at the this level (root is the root queue).
        </description>
      </property>
    
      <!-- root队列中default队列占用的容量百分比
            所有子队列的容量相加必须等于100
      -->
      <property>
        <name>yarn.scheduler.capacity.root.default.capacity</name>
        <value>40</value>
        <description>Default queue target capacity.</description>
      </property>
      
      <property>
        <name>yarn.scheduler.capacity.root.a.capacity</name>
        <value>30</value>
        <description>Default queue target capacity.</description>
      </property>
      
      <property>
        <name>yarn.scheduler.capacity.root.b.capacity</name>
        <value>30</value>
        <description>Default queue target capacity.</description>
      </property>
    
        <!-- 队列中用户能使用此队列资源的极限百分比
      -->
      <property>
        <name>yarn.scheduler.capacity.root.default.user-limit-factor</name>
        <value>1</value>
        <description>
          Default queue user limit a percentage from 0.0 to 1.0.
        </description>
      </property>
      
       <property>
        <name>yarn.scheduler.capacity.root.a.user-limit-factor</name>
        <value>1</value>
        <description>
          Default queue user limit a percentage from 0.0 to 1.0.
        </description>
      </property>
      
       <property>
        <name>yarn.scheduler.capacity.root.b.user-limit-factor</name>
        <value>1</value>
        <description>
          Default queue user limit a percentage from 0.0 to 1.0.
        </description>
      </property>
    
      <!-- root队列中default队列占用的容量百分比的最大值
      -->
      <property>
        <name>yarn.scheduler.capacity.root.default.maximum-capacity</name>
        <value>100</value>
        <description>
          The maximum capacity of the default queue. 
        </description>
      </property>
      
       <property>
        <name>yarn.scheduler.capacity.root.a.maximum-capacity</name>
        <value>100</value>
        <description>
          The maximum capacity of the default queue. 
        </description>
      </property>
      
       <property>
        <name>yarn.scheduler.capacity.root.b.maximum-capacity</name>
        <value>100</value>
        <description>
          The maximum capacity of the default queue. 
        </description>
      </property>
    
        <!-- root队列中default队列的状态
      -->
      <property>
        <name>yarn.scheduler.capacity.root.default.state</name>
        <value>RUNNING</value>
        <description>
          The state of the default queue. State can be one of RUNNING or STOPPED.
        </description>
      </property>
      
        <property>
        <name>yarn.scheduler.capacity.root.a.state</name>
        <value>RUNNING</value>
        <description>
          The state of the default queue. State can be one of RUNNING or STOPPED.
        </description>
      </property>
    
      
        <property>
        <name>yarn.scheduler.capacity.root.b.state</name>
        <value>RUNNING</value>
        <description>
          The state of the default queue. State can be one of RUNNING or STOPPED.
        </description>
      </property>
    
      <!-- 限制向default队列提交的用户,即访问权限-->
      <property>
        <name>yarn.scheduler.capacity.root.default.acl_submit_applications</name>
        <value>*</value>
        <description>
          The ACL of who can submit jobs to the default queue.
        </description>
      </property>
      
      <property>
        <name>yarn.scheduler.capacity.root.a.acl_submit_applications</name>
        <value>*</value>
        <description>
          The ACL of who can submit jobs to the default queue.
        </description>
      </property>
      
      <property>
        <name>yarn.scheduler.capacity.root.b.acl_submit_applications</name>
        <value>*</value>
        <description>
          The ACL of who can submit jobs to the default queue.
        </description>
      </property>
    <!-- 设置管理员-->
      <property>
        <name>yarn.scheduler.capacity.root.default.acl_administer_queue</name>
        <value>*</value>
        <description>
          The ACL of who can administer jobs on the default queue.
        </description>
      </property>
      
      <property>
        <name>yarn.scheduler.capacity.root.a.acl_administer_queue</name>
        <value>*</value>
        <description>
          The ACL of who can administer jobs on the default queue.
        </description>
      </property>
      
      <property>
        <name>yarn.scheduler.capacity.root.b.acl_administer_queue</name>
        <value>*</value>
        <description>
          The ACL of who can administer jobs on the default queue.
        </description>
      </property>
    
      <property>
        <name>yarn.scheduler.capacity.node-locality-delay</name>
        <value>40</value>
        <description>
          Number of missed scheduling opportunities after which the CapacityScheduler 
          attempts to schedule rack-local containers. 
          Typically this should be set to number of nodes in the cluster, By default is setting 
          approximately number of nodes in one rack which is 40.
        </description>
      </property>
    
      <property>
        <name>yarn.scheduler.capacity.queue-mappings</name>
        <value></value>
        <description>
          A list of mappings that will be used to assign jobs to queues
          The syntax for this list is [u|g]:[name]:[queue_name][,next mapping]*
          Typically this list will be used to map users to queues,
          for example, u:%user:%user maps all users to queues with the same name
          as the user.
        </description>
      </property>
    
      <property>
        <name>yarn.scheduler.capacity.queue-mappings-override.enable</name>
        <value>false</value>
        <description>
          If a queue mapping is present, will it override the value specified
          by the user? This can be used by administrators to place jobs in queues
          that are different than the one specified by the user.
          The default is false.
        </description>
      </property>
    
    </configuration>

    配置完之后使用刷新命令

    yarn rmadmin -refreshQueues

    然后进入集群的yarn界面就可以看到队列变成了三个

     那么接下来就是怎么设置job在其他队列运行

    要知道,决定job在哪个队列运行是由mapred -default.xml 文件中来决定的

    所以需要更改这个配置:

    1.如果用idea,可以在用

    conf.set("mapred.job.queue.name", "a");

    这样就指定了在a队列运行job

    2.如果在Linux上运行jar包,则可以用

    hadoop jar hadoop-mapreduce-examples-2.7.2.jar  wordcount -D mapreduce.job.queuename=a /mapjoin /output3

     如图,job切换到了a队列

  • 相关阅读:
    MTputty设置字体 MTputty菜单栏隐藏显示
    Ubuntu安装SSH服务
    LeetCode--Text Justification
    海量数据处理--hash和bit-map
    海量数据处理--bloom filter
    SSH原理简介(转)
    Ubuntu12.04 安装android集成环境(xserver被卸载)
    内存对齐(转)
    Ubuntu12.04 安装(无法将 grub-efi 软件包安装到/target/中,如果没有 GRUB 启动引导期,所安装的系统无法启动)
    算法题--等概率产生0和1(有扩展)
  • 原文地址:https://www.cnblogs.com/yangxusun9/p/12404136.html
Copyright © 2011-2022 走看看