zoukankan      html  css  js  c++  java
  • 3.hadoop完全分布式搭建

    3.Hadoop完全分布式搭建

    1.完全分布式搭建

    1. 配置

      #cd /soft/hadoop/etc/
      #mv hadoop local
      #cp -r local full
      #ln -s full hadoop
      #cd hadoop
      
      
    2. 修改core-site.xml配置文件

      #vim core-site.xml
      [core-site.xml配置如下]
      	<?xml version="1.0"?>
         <configuration>
         	<property>
         		<name>fs.defaultFS</name>
         		<value>hdfs://hadoop-1</value>
         	</property>
         </configuration>
      
    3. 修改hdfs-site.xml配置文件

      #vim hdfs-site.xml 
      [hdfs-site.xml配置如下]
      <?xml version="1.0"?>
      <configuration>
      	<property>
      		<name>dfs.replication</name>
      		<value>3</value>
      	</property>
      	<property>
      		<name>dfs.namenode.secondary.http-address</name>
      		<value>hadoop-2:50090</value>
      	</description>
      </property>
      </configuration>
      
    4. 修改mapred-site.xml配置文件

      #cp mapred-site.xml.template mapred-site.xml
      #vim mapred-site.xml
      [mapred-site.xml配置如下]
      <?xml version="1.0"?>
      <configuration>
      	<property>
      		<name>mapreduce.framework.name</name>
      		<value>yarn</value>
      	</property>
      </configuration>
      
    5. 修改yarn-site.xml配置文件

      #vim yarn-site.xml 
        [yarn-site.xml配置如下]
      <?xml version="1.0"?>
      <configuration>
              <property>
                      <name>yarn.resourcemanager.hostname</name>
                      <value>hadoop-1</value>
              </property>
              <property>
                      <name>yarn.nodemanager.aux-services</name>
                      <value>mapreduce_shuffle</value>
              </property>
      </configuration>
      
    6. 修改slaves配置文件

      #vim slaves
      [salves]
      hadoop-2
      hadoop-3
      hadoop-4
      hadoop-5
      
    7. 同步到其他节点

       #scp -r /soft/hadoop/etc/full  hadoop-2:/soft/hadoop/etc/
       #scp -r /soft/hadoop/etc/full  hadoop-3:/soft/hadoop/etc/
       #scp -r /soft/hadoop/etc/full  hadoop-4:/soft/hadoop/etc/
       #scp -r /soft/hadoop/etc/full  hadoop-5:/soft/hadoop/etc/
       #ssh hadoop-2 ln -s /soft/hadoop/etc/full /soft/hadoop/etc/hadoop
       #ssh hadoop-3 ln -s /soft/hadoop/etc/full /soft/hadoop/etc/hadoop
       #ssh hadoop-4 ln -s /soft/hadoop/etc/full /soft/hadoop/etc/hadoop
       #ssh hadoop-5 ln -s /soft/hadoop/etc/full /soft/hadoop/etc/hadoop
      
    8. 格式化hdfs分布式文件系统

      #hadoop namenode -format
      
    9. 启动服务

      [root@hadoop-1 hadoop]# start-all.sh 
      This script is Deprecated. Instead use start-dfs.sh and start-yarn.sh
      Starting namenodes on [hadoop-1]
      hadoop-1: starting namenode, logging to /soft/hadoop-2.7.3/logs/hadoop-root-namenode-hadoop-1.out
      hadoop-2: starting datanode, logging to /soft/hadoop-2.7.3/logs/hadoop-root-datanode-hadoop-2.out
      hadoop-3: starting datanode, logging to /soft/hadoop-2.7.3/logs/hadoop-root-datanode-hadoop-3.out
      hadoop-4: starting datanode, logging to /soft/hadoop-2.7.3/logs/hadoop-root-datanode-hadoop-4.out
      hadoop-5: starting datanode, logging to /soft/hadoop-2.7.3/logs/hadoop-root-datanode-hadoop-5.out
      Starting secondary namenodes [hadoop-2]
      hadoop-2: starting secondarynamenode, logging to /soft/hadoop-2.7.3/logs/hadoop-root-secondarynamenode-hadoop-2.out
      starting yarn daemons
      starting resourcemanager, logging to /soft/hadoop-2.7.3/logs/yarn-root-resourcemanager-hadoop-1.out
      hadoop-3: starting nodemanager, logging to /soft/hadoop-2.7.3/logs/yarn-root-nodemanager-hadoop-3.out
      hadoop-4: starting nodemanager, logging to /soft/hadoop-2.7.3/logs/yarn-root-nodemanager-hadoop-4.out
      hadoop-2: starting nodemanager, logging to /soft/hadoop-2.7.3/logs/yarn-root-nodemanager-hadoop-2.out
      hadoop-5: starting nodemanager, logging to /soft/hadoop-2.7.3/logs/yarn-root-nodemanager-hadoop-5.out
      
    10. 查看服务运行状态

          [root@hadoop-1 hadoop]# jps
          16358 ResourceManager
          12807 NodeManager                                                                                                                 
          16011 NameNode
          16204 SecondaryNameNode
          16623 Jps
          
          hadoop-5 | SUCCESS | rc=0 >>
          16993 NodeManager
          16884 DataNode
          17205 Jps
          
          hadoop-1 | SUCCESS | rc=0 >>
          28520 ResourceManager
          28235 NameNode
          29003 Jps
          
          hadoop-2 | SUCCESS | rc=0 >>
          17780 Jps
          17349 DataNode
          17529 NodeManager
          17453 SecondaryNameNode
          
          hadoop-4 | SUCCESS | rc=0 >>
          17105 Jps
          16875 NodeManager
          16766 DataNode
          
          hadoop-3 | SUCCESS | rc=0 >>
          16769 DataNode
          17121 Jps
          16878 NodeManager
      
    11. 登陆WEB查看

    2. 完全分布式单词统计

    1. 通过hadoop自带的demo运行单词统计

      #mkdir /input
      #cd /input/
      #echo "hello world" > file1.txt
      #echo "hello world" > file2.txt
      #echo "hello world" > file3.txt
      #echo "hello hadoop" > file4.txt
      #echo "hello hadoop" > file5.txt
      #echo "hello mapreduce" > file6.txt
      #echo "hello mapreduce" > file7.txt
      #hadoop dfs -mkdir /input
      #hdfs dfs -ls /
      #hadoop fs -ls /
      #hadoop fs -put /input/* /input
      #hadoop fs -ls /input
      
    2. 开始统计

      [root@hadoop-1 ~]# hadoop jar /soft/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.3.jar wordcount /input/ /output
      17/05/14 23:01:07 INFO client.RMProxy: Connecting to ResourceManager at hadoop-1/10.31.133.19:8032
      17/05/14 23:01:09 INFO input.FileInputFormat: Total input paths to process : 7
      17/05/14 23:01:10 INFO mapreduce.JobSubmitter: number of splits:7
      17/05/14 23:01:10 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1494773207391_0001
      17/05/14 23:01:10 INFO impl.YarnClientImpl: Submitted application application_1494773207391_0001
      17/05/14 23:01:11 INFO mapreduce.Job: The url to track the job: http://hadoop-1:8088/proxy/application_1494773207391_0001/
      17/05/14 23:01:11 INFO mapreduce.Job: Running job: job_1494773207391_0001
      17/05/14 23:01:23 INFO mapreduce.Job: Job job_1494773207391_0001 running in uber mode : false
      17/05/14 23:01:23 INFO mapreduce.Job:  map 0% reduce 0%
      17/05/14 23:01:56 INFO mapreduce.Job:  map 43% reduce 0%
      17/05/14 23:01:57 INFO mapreduce.Job:  map 100% reduce 0%
      17/05/14 23:02:04 INFO mapreduce.Job:  map 100% reduce 100%
      17/05/14 23:02:05 INFO mapreduce.Job: Job job_1494773207391_0001 completed successfully
      17/05/14 23:02:05 INFO mapreduce.Job: Counters: 50
              File System Counters
                      FILE: Number of bytes read=184
                      FILE: Number of bytes written=949365
                      FILE: Number of read operations=0
                      FILE: Number of large read operations=0
                      FILE: Number of write operations=0
                      HDFS: Number of bytes read=801
                      HDFS: Number of bytes written=37
                  HDFS: Number of read operations=24
                  HDFS: Number of large read operations=0
                  HDFS: Number of write operations=2
          Job Counters 
                  Killed map tasks=1
                  Launched map tasks=7
                  Launched reduce tasks=1
                  Data-local map tasks=7
                  Total time spent by all maps in occupied slots (ms)=216289
                  Total time spent by all reduces in occupied slots (ms)=4827
                  Total time spent by all map tasks (ms)=216289
                  Total time spent by all reduce tasks (ms)=4827
                  Total vcore-milliseconds taken by all map tasks=216289
                  Total vcore-milliseconds taken by all reduce tasks=4827
                  Total megabyte-milliseconds taken by all map tasks=221479936
                  Total megabyte-milliseconds taken by all reduce tasks=4942848
          Map-Reduce Framework
                  Map input records=7
                  Map output records=14
                  Map output bytes=150
                  Map output materialized bytes=220
                  Input split bytes=707
                  Combine input records=14
                  Combine output records=14
                  Reduce input groups=4
                  Reduce shuffle bytes=220
                  Reduce input records=14
                  Reduce output records=4
                  Spilled Records=28
                  Shuffled Maps =7
                  Failed Shuffles=0
                  Merged Map outputs=7
                  GC time elapsed (ms)=3616
                  CPU time spent (ms)=3970
                  Physical memory (bytes) snapshot=1528823808
                  Virtual memory (bytes) snapshot=16635846656
                  Total committed heap usage (bytes)=977825792
          Shuffle Errors
                  BAD_ID=0
                  CONNECTION=0
                  IO_ERROR=0
                  WRONG_LENGTH=0
                  WRONG_MAP=0
                  WRONG_REDUCE=0
          File Input Format Counters 
                  Bytes Read=94
          File Output Format Counters 
                  Bytes Written=37
                  
      
    3. 查看

      [root@hadoop-1 ~]# hadoop fs -ls /out/put
      Found 2 items
      -rw-r--r--   3 root supergroup          0 2017-05-14 23:02 /out/put/_SUCCESS
      -rw-r--r--   3 root supergroup         37 2017-05-14 23:02 /out/put/part-r-00000
      [root@hadoop-1 ~]# hadoop fs -cat /out/put/part-r-00000
      hadoop  2
      hello   7
      mapreduce       2
      world   3
      [root@hadoop-1 ~]# 
      
  • 相关阅读:
    数组、List和ArrayList的区别
    Spring 学习笔记(一)——Spring 基本概念和体系概览
    解压大文件提示C盘空间不够的问题
    在Struts2 Action中快速简便的访问Request、Session等变量
    Java 以及JEE环境快速搭建
    JAVA对象布局之对象头(Object Header)
    Docker之两小时入门
    Java创建多线程的几种方式
    JAVA并发之加锁导致的活跃性问题
    JAVA并发之多线程引发的问题剖析以及如何保证线程安全
  • 原文地址:https://www.cnblogs.com/liu-yao/p/7067900.html
Copyright © 2011-2022 走看看