zoukankan      html  css  js  c++  java
  • CentOS7 下 Hadoop 单节点部署

    准备工作

    hadoop下载:(hadoop2.7.5)

    http://archive.apache.org/dist/hadoop/core/

    关闭防火墙:

    # 停止防火墙
    systemctl stop firewalld
    
    # 关闭防火墙开机自启动
    systemctl disable firewalld

    修改 hosts 文件,让 hadoop 对应本机 IP 地址 (非 127.0.0.1)

    # vim /etc/hosts
    127.0.0.1   localhost localhost.localdomain localhost4 localhost4.localdomain4
    ::1         localhost localhost.localdomain localhost6 localhost6.localdomain6
    
    10.0.0.19 vsr119
    10.1.0.19 sr119
    10.1.0.31  sr131
    10.0.0.29 vsr129
    10.1.0.29 sr129

    安装JDK

    # 解压(1.8以上)
    tar xf /opt/jdk-8u202-linux-x64.tar.gz
    
    # 配置环境变量(切换到自己的用户)
    $ vim .bashrc
    # JAVA_HOME
    export JAVA_HOME=/home/jiangchun/jdk1.8
    export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar:$CLASSPATH
    export PATH=$JAVA_HOME/bin:$JAVA_HOME/jre/bin:$PATH
    
    # 刷新环境变量
    source .bashrc
    
    # 验证
    $ java -version
    java version "1.8.0_161"
    Java(TM) SE Runtime Environment (build 1.8.0_161-b12)
    Java HotSpot(TM) 64-Bit Server VM (build 25.161-b12, mixed mode)

    安装Hadoop

    # 解压
    tar xf hadoop-2.7.5.tar.gz
    
    # 配置环境变量
    vim .bashrc
    export HADOOP_HOME=/home/jiangchun/hadoop-2.7.5
    export PATH=$PATH:$HADOOP_HOME/bin
    export PATH=$PATH:$HADOOP_HOME/sbin
    
    # 刷新环境变量
    source .bashrc
    
    # 验证
     $ hadoop version
    Hadoop 2.7.5
    Subversion https://shv@git-wip-us.apache.org/repos/asf/hadoop.git -r 18065c2b6806ed4aa6a3187d77cbe21bb3dba075
    Compiled by kshvachk on 2017-12-16T01:06Z
    Compiled with protoc 2.5.0
    From source with checksum 9f118f95f47043332d51891e37f736e9
    This command was run using /home/jiangchun/hadoop-2.7.5/share/hadoop/common/hadoop-common-2.7.5.jar

    配置Hadoop

    一、配置 HDFS

    hadoop-env.sh

    # vim /home/jiangchun/hadoop-2.7.5/etc/hadoop/hadoop-env.sh
    
    # 配置 JDK 路径
    # The java implementation to use.
    export JAVA_HOME=/home/jiangchun/jdk1.8

    core-site.xml

    # fs.defaultFS:默认的文件系统,NN会在这个节点(sr131)启动
    # vim /home/jiangchun/hadoop-2.7.5/etc/hadoop/core-site.xml
    
    <configuration>
       <property>
         <name>fs.defaultFS</name>
         <value>hdfs://sr131:9000</value>
      </property>
    
    </configuration>

    hdfs-site.xml

    <configuration>
    
        <property>
          <name>dfs.namenode.name.dir</name>
          <value>file:/home/jiangchun/hadoop-2.7.5/dfs/name</value>
        </property>
        <property>
                <name>dfs.datanode.data.dir</name>
                <value>/mnt/DP_disk1/tpcds/dfs,/mnt/DP_disk2/tpcds/dfs,/mnt/DP_disk3/tpcds/dfs,/mnt/DP_disk4/tpcds/dfs,/mnt/DP_disk5/tpcds/dfs,/mnt/DP_disk6/tpcds/dfs,/mnt/DP_disk7/tpcds/dfs,/mnt/DP_disk8/tpcds/dfs</value>
        </property>
        <property>
          <name>dfs.permissions</name>
          <value>false</value>
        </property>
        <property>
          <name>dfs.replication</name>
          <value>1</value>
        </property>
    
        <property>
          <name>dfs.datanode.socket.write.timeout</name>
          <value>600000</value>
        </property>
        <!--
        <property>
          <name>dfs.socket.timeout</name>
          <value>0</value>
        </property>
        -->
        <property>
          <name>dfs.datanode.max.transfer.threads</name>
          <value>4096000</value>
        </property>
        <property>
          <name>dfs.datanode.directoryscan.throttle.limit.ms.per.sec</name>
          <value>1000</value>
        </property>
        <property>
          <name>dfs.datanode.handler.count</name>
          <value>40</value>
        </property>
        <property>
          <name>dfs.client.socket-timeout</name>
          <value>300000</value>
        </property>
        <property>
          <name>dfs.datanode.max.xcievers</name>
          <value>8192</value>
        </property>
    </configuration>

    Slaves

    # vim slaves    # 写本主机在hosts文件中注册的名称
    sr131    

    挂载磁盘

    # vim mount.sh
    mount -t ext4 -o noatime,nodiratime /dev/sdi1 /mnt/DP_disk1
    mount -t ext4 -o noatime,nodiratime /dev/sdb1 /mnt/DP_disk2
    mount -t ext4 -o noatime,nodiratime /dev/sdc1 /mnt/DP_disk3
    mount -t ext4 -o noatime,nodiratime /dev/sdd1 /mnt/DP_disk4
    mount -t ext4 -o noatime,nodiratime /dev/sde1 /mnt/DP_disk5
    mount -t ext4 -o noatime,nodiratime /dev/sdf1 /mnt/DP_disk6
    mount -t ext4 -o noatime,nodiratime /dev/sdg1 /mnt/DP_disk7
    mount -t ext4 -o noatime,nodiratime /dev/sdh1 /mnt/DP_disk8
    
    mount -o dax  /dev/pmem0    /mnt/pmem0
    mount -o dax  /dev/pmem1    /mnt/pmem1

    启动hdfs

    # 第一次使用需要先格式化一次。之前若格式化过请先停止进程,然后删除文件再执行格式化操作
    hdfs namenode -format
    
    # 启动 namenode
    hadoop-daemon.sh start namenode
    
    # 启动 datanode
    hadoop-daemon.sh start datanode
    
    # 验证,查看 jvm 进程
    jps
    
    # 84609 Jps
    # 84242 NameNode
    # 84471 DataNode

    浏览器访问 CentOS 的 IP 地址加端口号 (默认50070) 即可看到 web 端

    二、配置 YARN

    yarn-env.sh

    # vim /home/jiangchun/hadoop-2.7.5/etc/hadoop/yarn-env.sh
    默认

    yarn-site.xml

    # vim yarn-site.xml
    
    <configuration>
    
    <!-- Site specific YARN configuration properties -->
    
       <property>
          <name>yarn.resourcemanager.hostname</name>
          <value>sr131</value>
       </property>
       <property>
          <name>yarn.resourcemanager.scheduler.class</name>
          <value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler</value>
       </property>
       <property>
          <name>yarn.nodemanager.aux-services</name>
          <value>mapreduce_shuffle</value>
       </property>
       <property>
          <name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
          <value>org.apache.hadoop.mapred.ShuffleHandler</value>
       </property>
       <property>
               <name>yarn.nodemanager.disk-health-checker.max-disk-utilization-per-disk-percentage</name>
               <value>99</value>
       </property>
       <property>
          <name>yarn.nodemanager.resource.memory-mb</name>
          <!--<value>786432</value>-->
          <value>1715472</value>
       </property>
       <property>
          <name>yarn.scheduler.maximum-allocation-vcores</name>
          <!--<value>120</value>-->
          <value>96</value>
       </property>
       <property>
          <name>yarn.scheduler.minimum-allocation-vcores</name>
          <value>1</value>
       </property>
       <property>
          <name>yarn.nodemanager.local-dirs</name>
         <value>/mnt/DP_disk1/tpcds/yarn,/mnt/DP_disk2/tpcds/yarn,/mnt/DP_disk3/tpcds/yarn,/mnt/DP_disk4/tpcds/yarn,/mnt/DP_disk5/tpcds/yarn,/mnt/DP_disk6/tpcds/yarn,/mnt/DP_disk7/tpcds/yarn,/mnt/DP_disk8/tpcds/yarn</value>
       </property>
       <property>
          <name>yarn.log.aggregation.enable</name>
          <value>fasle</value>
       </property>
    
       <property>
          <name>yarn.nodemanager.log.retain-seconds</name>
          <value>25920000</value>
       </property>
    
       <property>
          <name>yarn.log.server.url</name>
          <value>http://sr131:19888/jobhistory/logs/</value>
       </property>
    
       <property>
          <name>yarn.nodemanager.pmem-check-enabled</name>
          <value>false</value>
       </property>
    
       <property>
          <name>yarn.nodemanager.vmem-check-enabled</name>
          <value>false</value>
       </property>
    
    </configuration>

    启动 yarn,需保证 hdfs 已启动

    # 启动 resourcemanager
    yarn-daemon.sh start resourcemanager
    
    # 启动 nodemanager
    yarn-daemon.sh start nodemanager
    
    # 查看 JVM 进程
    jps
    
    # 1604 DataNode
    # 1877 ResourceManager
    # 3223 Jps
    # 1468 NameNode
    # 2172 NodeManager

    浏览器访问 CentOS 的 IP 地址加端口号 (默认8088) 即可看到 web 端

    三、配置 MapReduce

    mapred-env.sh

    # vim mapred-env.sh
    
    默认
    如果不行,修改一下JDK路径
    export JAVA_HOME=/home/jiangchun/jdk1.8

    mapred-site.xml

    # 复制一份
    cp /opt/hadoop-2.9.2/etc/hadoop/mapred-site.xml.template /opt/hadoop-2.9.2/etc/hadoop/mapred-site.xml
    
    # 编辑
    vim /opt/hadoop-2.9.2/etc/hadoop/mapred-site.xml
    <configuration>
        <!-- 指定MR运行在YARN上 -->
        <property>
            <name>mapreduce.framework.name</name>
            <value>yarn</value>
        </property>
    </configuration>
  • 相关阅读:
    优化tableView加载cell与model的过程
    java.net.UnknownHostException: master
    Give root password for maintenance(or type control -D to continue)
    软件自动化部署脚本
    关于yum网络版仓库(本地yum仓库的安装配置,如果没网了,做一个局域网内的yum仓库)
    一脸懵逼学习keepalived(对Nginx进行热备)
    一脸懵逼学习Nginx及其安装,Tomcat的安装
    一脸懵逼学习Linux的Shell编程
    一脸懵逼学习KafKa集群的安装搭建--(一种高吞吐量的分布式发布订阅消息系统)
    一脸懵逼学习Storm的搭建--(一个开源的分布式实时计算系统)
  • 原文地址:https://www.cnblogs.com/baicai37/p/13684280.html
Copyright © 2011-2022 走看看