zoukankan      html  css  js  c++  java
  • 3.hadoop完全分布式搭建

    3.Hadoop完全分布式搭建

    1.完全分布式搭建

    1. 配置

      #cd /soft/hadoop/etc/
      #mv hadoop local
      #cp -r local full
      #ln -s full hadoop
      #cd hadoop
      
      
    2. 修改core-site.xml配置文件

      #vim core-site.xml
      [core-site.xml配置如下]
      	<?xml version="1.0"?>
         <configuration>
         	<property>
         		<name>fs.defaultFS</name>
         		<value>hdfs://hadoop-1</value>
         	</property>
         </configuration>
      
    3. 修改hdfs-site.xml配置文件

      #vim hdfs-site.xml 
      [hdfs-site.xml配置如下]
      <?xml version="1.0"?>
      <configuration>
      	<property>
      		<name>dfs.replication</name>
      		<value>3</value>
      	</property>
      	<property>
      		<name>dfs.namenode.secondary.http-address</name>
      		<value>hadoop-2:50090</value>
      	</description>
      </property>
      </configuration>
      
    4. 修改mapred-site.xml配置文件

      #cp mapred-site.xml.template mapred-site.xml
      #vim mapred-site.xml
      [mapred-site.xml配置如下]
      <?xml version="1.0"?>
      <configuration>
      	<property>
      		<name>mapreduce.framework.name</name>
      		<value>yarn</value>
      	</property>
      </configuration>
      
    5. 修改yarn-site.xml配置文件

      #vim yarn-site.xml 
        [yarn-site.xml配置如下]
      <?xml version="1.0"?>
      <configuration>
              <property>
                      <name>yarn.resourcemanager.hostname</name>
                      <value>hadoop-1</value>
              </property>
              <property>
                      <name>yarn.nodemanager.aux-services</name>
                      <value>mapreduce_shuffle</value>
              </property>
      </configuration>
      
    6. 修改slaves配置文件

      #vim slaves
      [salves]
      hadoop-2
      hadoop-3
      hadoop-4
      hadoop-5
      
    7. 同步到其他节点

       #scp -r /soft/hadoop/etc/full  hadoop-2:/soft/hadoop/etc/
       #scp -r /soft/hadoop/etc/full  hadoop-3:/soft/hadoop/etc/
       #scp -r /soft/hadoop/etc/full  hadoop-4:/soft/hadoop/etc/
       #scp -r /soft/hadoop/etc/full  hadoop-5:/soft/hadoop/etc/
       #ssh hadoop-2 ln -s /soft/hadoop/etc/full /soft/hadoop/etc/hadoop
       #ssh hadoop-3 ln -s /soft/hadoop/etc/full /soft/hadoop/etc/hadoop
       #ssh hadoop-4 ln -s /soft/hadoop/etc/full /soft/hadoop/etc/hadoop
       #ssh hadoop-5 ln -s /soft/hadoop/etc/full /soft/hadoop/etc/hadoop
      
    8. 格式化hdfs分布式文件系统

      #hadoop namenode -format
      
    9. 启动服务

      [root@hadoop-1 hadoop]# start-all.sh 
      This script is Deprecated. Instead use start-dfs.sh and start-yarn.sh
      Starting namenodes on [hadoop-1]
      hadoop-1: starting namenode, logging to /soft/hadoop-2.7.3/logs/hadoop-root-namenode-hadoop-1.out
      hadoop-2: starting datanode, logging to /soft/hadoop-2.7.3/logs/hadoop-root-datanode-hadoop-2.out
      hadoop-3: starting datanode, logging to /soft/hadoop-2.7.3/logs/hadoop-root-datanode-hadoop-3.out
      hadoop-4: starting datanode, logging to /soft/hadoop-2.7.3/logs/hadoop-root-datanode-hadoop-4.out
      hadoop-5: starting datanode, logging to /soft/hadoop-2.7.3/logs/hadoop-root-datanode-hadoop-5.out
      Starting secondary namenodes [hadoop-2]
      hadoop-2: starting secondarynamenode, logging to /soft/hadoop-2.7.3/logs/hadoop-root-secondarynamenode-hadoop-2.out
      starting yarn daemons
      starting resourcemanager, logging to /soft/hadoop-2.7.3/logs/yarn-root-resourcemanager-hadoop-1.out
      hadoop-3: starting nodemanager, logging to /soft/hadoop-2.7.3/logs/yarn-root-nodemanager-hadoop-3.out
      hadoop-4: starting nodemanager, logging to /soft/hadoop-2.7.3/logs/yarn-root-nodemanager-hadoop-4.out
      hadoop-2: starting nodemanager, logging to /soft/hadoop-2.7.3/logs/yarn-root-nodemanager-hadoop-2.out
      hadoop-5: starting nodemanager, logging to /soft/hadoop-2.7.3/logs/yarn-root-nodemanager-hadoop-5.out
      
    10. 查看服务运行状态

          [root@hadoop-1 hadoop]# jps
          16358 ResourceManager
          12807 NodeManager                                                                                                                 
          16011 NameNode
          16204 SecondaryNameNode
          16623 Jps
          
          hadoop-5 | SUCCESS | rc=0 >>
          16993 NodeManager
          16884 DataNode
          17205 Jps
          
          hadoop-1 | SUCCESS | rc=0 >>
          28520 ResourceManager
          28235 NameNode
          29003 Jps
          
          hadoop-2 | SUCCESS | rc=0 >>
          17780 Jps
          17349 DataNode
          17529 NodeManager
          17453 SecondaryNameNode
          
          hadoop-4 | SUCCESS | rc=0 >>
          17105 Jps
          16875 NodeManager
          16766 DataNode
          
          hadoop-3 | SUCCESS | rc=0 >>
          16769 DataNode
          17121 Jps
          16878 NodeManager
      
    11. 登陆WEB查看

    2. 完全分布式单词统计

    1. 通过hadoop自带的demo运行单词统计

      #mkdir /input
      #cd /input/
      #echo "hello world" > file1.txt
      #echo "hello world" > file2.txt
      #echo "hello world" > file3.txt
      #echo "hello hadoop" > file4.txt
      #echo "hello hadoop" > file5.txt
      #echo "hello mapreduce" > file6.txt
      #echo "hello mapreduce" > file7.txt
      #hadoop dfs -mkdir /input
      #hdfs dfs -ls /
      #hadoop fs -ls /
      #hadoop fs -put /input/* /input
      #hadoop fs -ls /input
      
    2. 开始统计

      [root@hadoop-1 ~]# hadoop jar /soft/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.3.jar wordcount /input/ /output
      17/05/14 23:01:07 INFO client.RMProxy: Connecting to ResourceManager at hadoop-1/10.31.133.19:8032
      17/05/14 23:01:09 INFO input.FileInputFormat: Total input paths to process : 7
      17/05/14 23:01:10 INFO mapreduce.JobSubmitter: number of splits:7
      17/05/14 23:01:10 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1494773207391_0001
      17/05/14 23:01:10 INFO impl.YarnClientImpl: Submitted application application_1494773207391_0001
      17/05/14 23:01:11 INFO mapreduce.Job: The url to track the job: http://hadoop-1:8088/proxy/application_1494773207391_0001/
      17/05/14 23:01:11 INFO mapreduce.Job: Running job: job_1494773207391_0001
      17/05/14 23:01:23 INFO mapreduce.Job: Job job_1494773207391_0001 running in uber mode : false
      17/05/14 23:01:23 INFO mapreduce.Job:  map 0% reduce 0%
      17/05/14 23:01:56 INFO mapreduce.Job:  map 43% reduce 0%
      17/05/14 23:01:57 INFO mapreduce.Job:  map 100% reduce 0%
      17/05/14 23:02:04 INFO mapreduce.Job:  map 100% reduce 100%
      17/05/14 23:02:05 INFO mapreduce.Job: Job job_1494773207391_0001 completed successfully
      17/05/14 23:02:05 INFO mapreduce.Job: Counters: 50
              File System Counters
                      FILE: Number of bytes read=184
                      FILE: Number of bytes written=949365
                      FILE: Number of read operations=0
                      FILE: Number of large read operations=0
                      FILE: Number of write operations=0
                      HDFS: Number of bytes read=801
                      HDFS: Number of bytes written=37
                  HDFS: Number of read operations=24
                  HDFS: Number of large read operations=0
                  HDFS: Number of write operations=2
          Job Counters 
                  Killed map tasks=1
                  Launched map tasks=7
                  Launched reduce tasks=1
                  Data-local map tasks=7
                  Total time spent by all maps in occupied slots (ms)=216289
                  Total time spent by all reduces in occupied slots (ms)=4827
                  Total time spent by all map tasks (ms)=216289
                  Total time spent by all reduce tasks (ms)=4827
                  Total vcore-milliseconds taken by all map tasks=216289
                  Total vcore-milliseconds taken by all reduce tasks=4827
                  Total megabyte-milliseconds taken by all map tasks=221479936
                  Total megabyte-milliseconds taken by all reduce tasks=4942848
          Map-Reduce Framework
                  Map input records=7
                  Map output records=14
                  Map output bytes=150
                  Map output materialized bytes=220
                  Input split bytes=707
                  Combine input records=14
                  Combine output records=14
                  Reduce input groups=4
                  Reduce shuffle bytes=220
                  Reduce input records=14
                  Reduce output records=4
                  Spilled Records=28
                  Shuffled Maps =7
                  Failed Shuffles=0
                  Merged Map outputs=7
                  GC time elapsed (ms)=3616
                  CPU time spent (ms)=3970
                  Physical memory (bytes) snapshot=1528823808
                  Virtual memory (bytes) snapshot=16635846656
                  Total committed heap usage (bytes)=977825792
          Shuffle Errors
                  BAD_ID=0
                  CONNECTION=0
                  IO_ERROR=0
                  WRONG_LENGTH=0
                  WRONG_MAP=0
                  WRONG_REDUCE=0
          File Input Format Counters 
                  Bytes Read=94
          File Output Format Counters 
                  Bytes Written=37
                  
      
    3. 查看

      [root@hadoop-1 ~]# hadoop fs -ls /out/put
      Found 2 items
      -rw-r--r--   3 root supergroup          0 2017-05-14 23:02 /out/put/_SUCCESS
      -rw-r--r--   3 root supergroup         37 2017-05-14 23:02 /out/put/part-r-00000
      [root@hadoop-1 ~]# hadoop fs -cat /out/put/part-r-00000
      hadoop  2
      hello   7
      mapreduce       2
      world   3
      [root@hadoop-1 ~]# 
      
  • 相关阅读:
    4.计算机启动过程的简单介绍 计算机启动流程 计算机BIOS作用 POST 开机自检 计算机启动顺序 分区表 操作系统启动
    3.操作系统简单介绍 操作系统发展历史 批处理分时系统 操作系统是什么 操作系统对文件的抽象 进程 虚拟内存是什么 操作系统作用 操作系统功能
    2.计算机组成-数字逻辑电路 门电路与半加器 异或运算半加器 全加器组成 全加器结构 反馈电路 振荡器 存储 D T 触发器 循环移位 计数器 寄存器 传输门电路 译码器 晶体管 sram rom 微处理 计算机
    1.计算机发展阶段 计算机发展历史 机械式计算机 机电式计算机 电子计算机 逻辑电路与计算机 二极管 电子管 晶体管 硅 门电路 计算机 电磁学计算机二进制
    如何解决svn清理失败 不能更新 cleanup失败 cleanup乱码 更新乱码 svn更新提示清理 清理乱码不能清理 svn故障修复SVN cleanup 陷入死循环 svn cleanup时遇到错误怎么办
    eclipse svn插件卸载 重新安装 Subclipse卸载安装 The project was not built since its build path is incomplete This client is too old to work with the working copy at
    java for循环里面执行sql语句操作,有效结果只有一次,只执行了一次sql mybatis 循环执行update生效一次 实际只执行一次
    windows资源管理器多标签打开 windows文件夹多标签浏览 浏览器tab页面一样浏览文件夹 clover win8 win10 报错 无响应问题怎么解决 clover卡死 clover怎么换皮肤
    批处理启动vm虚拟机服务 vm12启动无界面启动vm虚拟机系统 windows上如何操作服务 sc net启动关闭服务
    不能ssh连接ubuntu linux 服务器 secureCRT不能ssh连接服务器 不能远程ssh连接虚拟机的ubuntu linux
  • 原文地址:https://www.cnblogs.com/liu-yao/p/7067900.html
Copyright © 2011-2022 走看看