zoukankan      html  css  js  c++  java
  • CentOS7 下 Hadoop 单节点部署

    准备工作

    hadoop下载:(hadoop2.7.5)

    http://archive.apache.org/dist/hadoop/core/

    关闭防火墙:

    # 停止防火墙
    systemctl stop firewalld
    
    # 关闭防火墙开机自启动
    systemctl disable firewalld

    修改 hosts 文件,让 hadoop 对应本机 IP 地址 (非 127.0.0.1)

    # vim /etc/hosts
    127.0.0.1   localhost localhost.localdomain localhost4 localhost4.localdomain4
    ::1         localhost localhost.localdomain localhost6 localhost6.localdomain6
    
    10.0.0.19 vsr119
    10.1.0.19 sr119
    10.1.0.31  sr131
    10.0.0.29 vsr129
    10.1.0.29 sr129

    安装JDK

    # 解压(1.8以上)
    tar xf /opt/jdk-8u202-linux-x64.tar.gz
    
    # 配置环境变量(切换到自己的用户)
    $ vim .bashrc
    # JAVA_HOME
    export JAVA_HOME=/home/jiangchun/jdk1.8
    export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar:$CLASSPATH
    export PATH=$JAVA_HOME/bin:$JAVA_HOME/jre/bin:$PATH
    
    # 刷新环境变量
    source .bashrc
    
    # 验证
    $ java -version
    java version "1.8.0_161"
    Java(TM) SE Runtime Environment (build 1.8.0_161-b12)
    Java HotSpot(TM) 64-Bit Server VM (build 25.161-b12, mixed mode)

    安装Hadoop

    # 解压
    tar xf hadoop-2.7.5.tar.gz
    
    # 配置环境变量
    vim .bashrc
    export HADOOP_HOME=/home/jiangchun/hadoop-2.7.5
    export PATH=$PATH:$HADOOP_HOME/bin
    export PATH=$PATH:$HADOOP_HOME/sbin
    
    # 刷新环境变量
    source .bashrc
    
    # 验证
     $ hadoop version
    Hadoop 2.7.5
    Subversion https://shv@git-wip-us.apache.org/repos/asf/hadoop.git -r 18065c2b6806ed4aa6a3187d77cbe21bb3dba075
    Compiled by kshvachk on 2017-12-16T01:06Z
    Compiled with protoc 2.5.0
    From source with checksum 9f118f95f47043332d51891e37f736e9
    This command was run using /home/jiangchun/hadoop-2.7.5/share/hadoop/common/hadoop-common-2.7.5.jar

    配置Hadoop

    一、配置 HDFS

    hadoop-env.sh

    # vim /home/jiangchun/hadoop-2.7.5/etc/hadoop/hadoop-env.sh
    
    # 配置 JDK 路径
    # The java implementation to use.
    export JAVA_HOME=/home/jiangchun/jdk1.8

    core-site.xml

    # fs.defaultFS:默认的文件系统,NN会在这个节点(sr131)启动
    # vim /home/jiangchun/hadoop-2.7.5/etc/hadoop/core-site.xml
    
    <configuration>
       <property>
         <name>fs.defaultFS</name>
         <value>hdfs://sr131:9000</value>
      </property>
    
    </configuration>

    hdfs-site.xml

    <configuration>
    
        <property>
          <name>dfs.namenode.name.dir</name>
          <value>file:/home/jiangchun/hadoop-2.7.5/dfs/name</value>
        </property>
        <property>
                <name>dfs.datanode.data.dir</name>
                <value>/mnt/DP_disk1/tpcds/dfs,/mnt/DP_disk2/tpcds/dfs,/mnt/DP_disk3/tpcds/dfs,/mnt/DP_disk4/tpcds/dfs,/mnt/DP_disk5/tpcds/dfs,/mnt/DP_disk6/tpcds/dfs,/mnt/DP_disk7/tpcds/dfs,/mnt/DP_disk8/tpcds/dfs</value>
        </property>
        <property>
          <name>dfs.permissions</name>
          <value>false</value>
        </property>
        <property>
          <name>dfs.replication</name>
          <value>1</value>
        </property>
    
        <property>
          <name>dfs.datanode.socket.write.timeout</name>
          <value>600000</value>
        </property>
        <!--
        <property>
          <name>dfs.socket.timeout</name>
          <value>0</value>
        </property>
        -->
        <property>
          <name>dfs.datanode.max.transfer.threads</name>
          <value>4096000</value>
        </property>
        <property>
          <name>dfs.datanode.directoryscan.throttle.limit.ms.per.sec</name>
          <value>1000</value>
        </property>
        <property>
          <name>dfs.datanode.handler.count</name>
          <value>40</value>
        </property>
        <property>
          <name>dfs.client.socket-timeout</name>
          <value>300000</value>
        </property>
        <property>
          <name>dfs.datanode.max.xcievers</name>
          <value>8192</value>
        </property>
    </configuration>

    Slaves

    # vim slaves    # 写本主机在hosts文件中注册的名称
    sr131    

    挂载磁盘

    # vim mount.sh
    mount -t ext4 -o noatime,nodiratime /dev/sdi1 /mnt/DP_disk1
    mount -t ext4 -o noatime,nodiratime /dev/sdb1 /mnt/DP_disk2
    mount -t ext4 -o noatime,nodiratime /dev/sdc1 /mnt/DP_disk3
    mount -t ext4 -o noatime,nodiratime /dev/sdd1 /mnt/DP_disk4
    mount -t ext4 -o noatime,nodiratime /dev/sde1 /mnt/DP_disk5
    mount -t ext4 -o noatime,nodiratime /dev/sdf1 /mnt/DP_disk6
    mount -t ext4 -o noatime,nodiratime /dev/sdg1 /mnt/DP_disk7
    mount -t ext4 -o noatime,nodiratime /dev/sdh1 /mnt/DP_disk8
    
    mount -o dax  /dev/pmem0    /mnt/pmem0
    mount -o dax  /dev/pmem1    /mnt/pmem1

    启动hdfs

    # 第一次使用需要先格式化一次。之前若格式化过请先停止进程,然后删除文件再执行格式化操作
    hdfs namenode -format
    
    # 启动 namenode
    hadoop-daemon.sh start namenode
    
    # 启动 datanode
    hadoop-daemon.sh start datanode
    
    # 验证,查看 jvm 进程
    jps
    
    # 84609 Jps
    # 84242 NameNode
    # 84471 DataNode

    浏览器访问 CentOS 的 IP 地址加端口号 (默认50070) 即可看到 web 端

    二、配置 YARN

    yarn-env.sh

    # vim /home/jiangchun/hadoop-2.7.5/etc/hadoop/yarn-env.sh
    默认

    yarn-site.xml

    # vim yarn-site.xml
    
    <configuration>
    
    <!-- Site specific YARN configuration properties -->
    
       <property>
          <name>yarn.resourcemanager.hostname</name>
          <value>sr131</value>
       </property>
       <property>
          <name>yarn.resourcemanager.scheduler.class</name>
          <value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler</value>
       </property>
       <property>
          <name>yarn.nodemanager.aux-services</name>
          <value>mapreduce_shuffle</value>
       </property>
       <property>
          <name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
          <value>org.apache.hadoop.mapred.ShuffleHandler</value>
       </property>
       <property>
               <name>yarn.nodemanager.disk-health-checker.max-disk-utilization-per-disk-percentage</name>
               <value>99</value>
       </property>
       <property>
          <name>yarn.nodemanager.resource.memory-mb</name>
          <!--<value>786432</value>-->
          <value>1715472</value>
       </property>
       <property>
          <name>yarn.scheduler.maximum-allocation-vcores</name>
          <!--<value>120</value>-->
          <value>96</value>
       </property>
       <property>
          <name>yarn.scheduler.minimum-allocation-vcores</name>
          <value>1</value>
       </property>
       <property>
          <name>yarn.nodemanager.local-dirs</name>
         <value>/mnt/DP_disk1/tpcds/yarn,/mnt/DP_disk2/tpcds/yarn,/mnt/DP_disk3/tpcds/yarn,/mnt/DP_disk4/tpcds/yarn,/mnt/DP_disk5/tpcds/yarn,/mnt/DP_disk6/tpcds/yarn,/mnt/DP_disk7/tpcds/yarn,/mnt/DP_disk8/tpcds/yarn</value>
       </property>
       <property>
          <name>yarn.log.aggregation.enable</name>
          <value>fasle</value>
       </property>
    
       <property>
          <name>yarn.nodemanager.log.retain-seconds</name>
          <value>25920000</value>
       </property>
    
       <property>
          <name>yarn.log.server.url</name>
          <value>http://sr131:19888/jobhistory/logs/</value>
       </property>
    
       <property>
          <name>yarn.nodemanager.pmem-check-enabled</name>
          <value>false</value>
       </property>
    
       <property>
          <name>yarn.nodemanager.vmem-check-enabled</name>
          <value>false</value>
       </property>
    
    </configuration>

    启动 yarn,需保证 hdfs 已启动

    # 启动 resourcemanager
    yarn-daemon.sh start resourcemanager
    
    # 启动 nodemanager
    yarn-daemon.sh start nodemanager
    
    # 查看 JVM 进程
    jps
    
    # 1604 DataNode
    # 1877 ResourceManager
    # 3223 Jps
    # 1468 NameNode
    # 2172 NodeManager

    浏览器访问 CentOS 的 IP 地址加端口号 (默认8088) 即可看到 web 端

    三、配置 MapReduce

    mapred-env.sh

    # vim mapred-env.sh
    
    默认
    如果不行,修改一下JDK路径
    export JAVA_HOME=/home/jiangchun/jdk1.8

    mapred-site.xml

    # 复制一份
    cp /opt/hadoop-2.9.2/etc/hadoop/mapred-site.xml.template /opt/hadoop-2.9.2/etc/hadoop/mapred-site.xml
    
    # 编辑
    vim /opt/hadoop-2.9.2/etc/hadoop/mapred-site.xml
    <configuration>
        <!-- 指定MR运行在YARN上 -->
        <property>
            <name>mapreduce.framework.name</name>
            <value>yarn</value>
        </property>
    </configuration>
  • 相关阅读:
    Android Studio 开发
    Jsp编写的页面如何适应手机浏览器页面
    电影
    Oracle 拆分列为多行 Splitting string into multiple rows in Oracle
    sql server 2008 自动备份
    WINGIDE 激活失败
    python安装 错误 “User installations are disabled via policy on the machine”
    ble编程-外设发送数据到中心
    iOS开发-NSString去掉所有换行及空格
    ios9 字符串与UTF-8 互相转换
  • 原文地址:https://www.cnblogs.com/baicai37/p/13684280.html
Copyright © 2011-2022 走看看