zoukankan      html  css  js  c++  java
  • Apache hadoop安装配置

    1.网络中继更改问题

            命令:   vi /etc/sysconfig/network-scripts/ifcfg-eth0

    需要修改的代码

    DEVICE=eth0

    HWADDR=00:0C:29:11:02:E8

    TYPE=Ethernet

    UUID=c1038317-21f4-4251-a68f-0962fd644cab

    NBOOT=yes

    NM_CONTROLLED=yes

    BOOTPROTO=static

    IPADDR=192.168.17.238

    GATEWAY=192.168.17.1

    NDS1=114.114.114.114

    IPV6INIT=NO

           2.hadoop 环境配置问题

               1.修改主机名称

                       命令:vi /etc/hosts

               2.配置java hadoop环境变量

                        命令:vi/etc/prifile

    #java

    JAVA_HOME=/jdk1.7.0_79

    JAVA_BIN=/jdk1.7.0_79/bin

    PATH=$JAVA_HOME/bin:$PATH

    CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar

    export JAVA_HOME JAVA_BIN PATH CLASSPATH

    #hadoop

    export HADOOP_HOME=/home/hadoop-2.5.2

    export PATH=$HADOOP_HOME/bin:$PATH

    export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native

    export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib"

    export JAVA_LIBRARY_PATH=/home/hadoop-2.5.2/lib/native/

     

            3关闭防火墙

                    

              service iptables stop

               chkconfig iptables off

               3.1修改配置文件

               vi  /etc/selinux/config

                       修改为:

                         SELINUX=disabled

           4.ssh免密设置

             下载 ssh服务

                命令:yum -y install openssh-clients

             生成ssh密匙:

                ssh-keygen -t rsa

            进入根目录:命令: cd~;

             cd .ssh

              ls 查看文件

             将文件  id_rsa.hub 考入: 命令:cat id_rsa.pub  >>authorized_keys

              datanode节点上的 id_rsa.hub 考入 命令

             ssh datanode1 cat .ssh/id_rsa.pub >>authorized_keys

            将文件发送到datanode节点:   

                        [root@namenode ~]# scp authorized_keys datanode1:~/.ssh

           测试 ssh datanode1 免密登陆到datanode1主机

           至此 前期配置完成

    5 新建组和用户

    groupadd hadoop

    useradd -g hadoop hadoop

    Passwd hadoop

     6解压hadoop2-2-*home/hadoop

                 tar -xzvf hadoop *    /home/hadoop

    配置五个文件

    Cd   /home/hodoop/hadoop2-***/etc/hadoop/

    Ll 列举文件下内容

    nameNodedataNodes都需要配置

    cd hadoop-2.5.2/etc/hadoop

    6.1 vi core-site.xml

    <configuration>

        <property>

            <name>hadoop.tmp.dir</name>

            <value>/home/hadoop/tmp</value>  //这里写入的文件夹要手动创建

            <description>Abase for other temporary directories.</description>

        </property>

        <property>

            <name>fs.defaultFS</name>

            <value>hdfs://192.168.131.7:9000</value> //这里的ip地址写入的是masterip地址

        </property>

        <property>

            <name>io.file.buffer.size</name>

            <value>4096</value>

        </property>

    </configuration>

    mkdir -p $HOME/dfs/name

    mkdir -p $HOME/dfs/data

    6.2 vi hdfs-site.xml

    <configuration>

        <property>

            <name>dfs.nameservices</name>

            <value>hadoop-cluster1</value>

        </property>

        <property>

            <name>dfs.namenode.secondary.http-address</name>

            <value>192.168.131.7:50090</value>     //这里的ip地址写入的是masterip地址

        </property>

        <property>

            <name>dfs.namenode.name.dir</name>

            <value>file:///home/hadoop/dfs/name</value>   //这里写入的文件夹要手动创建

        </property>

        <property>

            <name>dfs.datanode.data.dir</name>

            <value>file:///home/hadoop/dfs/data</value>  //这里写入的文件夹要手动创建

        </property>

        <property>

            <name>dfs.replication</name>

            <value>2</value>   //有多少台节点就写多少

        </property>

        <property>

            <name>dfs.webhdfs.enabled</name>

            <value>true</value>

        </property>

     </configuration>

    6.3 vi mapred-site.xml

    <configuration>

        <property>

            <name>mapreduce.framework.name</name>

            <value>yarn</value>  //这是hadoop对原有第一代M/R计算模型的改进版框架yarn

        </property>

        <property>

            <name>mapreduce.jobtracker.http.address</name>

            <value>192.168.131.7:50030</value>     //这里的ip地址写入的是masterip地址

        </property>

        <property>

            <name>mapreduce.jobhistory.address</name>

            <value>192.168.131.7:10020</value>    //这里的ip地址写入的是masterip地址

        </property>

        <property>

            <name>mapreduce.jobhistory.webapp.address</name>

            <value>192.168.131.7:19888</value>     //这里的ip地址写入的是masterip地址

        </property>  

    </configuration>

    6.4 vi yarn-site.xml

    <configuration>

    <!-- Site specific YARN configuration properties -->

        <property>

            <name>yarn.nodemanager.aux-services</name>

            <value>mapreduce_shuffle</value>

        </property>

        <property>

            <name>yarn.resourcemanager.address</name>

            <value>192.168.131.7:8032</value>    //这里的ip地址写入的是masterip地址

        </property>

        <property>

            <name>yarn.resourcemanager.scheduler.address</name>

            <value>192.168.131.7:8030</value>    //这里的ip地址写入的是masterip地址

        </property>

        <property>

            <name>yarn.resourcemanager.resource-tracker.address</name>

            <value>192.168.131.7:8031</value>    //这里的ip地址写入的是masterip地址

        </property>

        <property>

            <name>yarn.resourcemanager.admin.address</name>

            <value>192.168.131.7:8033</value>    //这里的ip地址写入的是masterip地址

        </property>

        <property>

            <name>yarn.resourcemanager.webapp.address</name>

            <value>192.168.131.7:8088</value>   //这里的ip地址写入的是masterip地址

        </property>

    </configuration>

    vi slaves

    192.168.79.101

    192.168.79.102

    vi hadoop-env.sh

    export JAVA_HOME=/opt/jdk1.7.0_06

    vi yarn-env.sh

    export JAVA_HOME=/opt/jdk1.7.0_06

    在一台机器上配置完后,可批量复制至另外的机器

    scp yarn-site.xml mapred-site.xml slaves hdfs-site.xml yarn-env.sh hadoop-env.sh dataNode1:/home/hadoop/hadoop-2.5.2/etc/hadoop

    scp yarn-site.xml mapred-site.xml slaves hdfs-site.xml yarn-env.sh hadoop-env.sh dataNode2:/home/hadoop/hadoop-2.5.2/etc/hadoop

    7 格式化文件系统

    hdfs namenode –format

    8 启动和关闭

    master机器的hadoop目录下执行:

    sbin/start-all.sh   等同于运行start-dfs.shstart-yarn.sh  

    sbin/stop-all.sh   等同于运行stop-dfs.shstop-yarn.sh  

    如果启动报错:Unable to load native-hadoop library for your platform... using builtin-java classes where applicable

    Starting namenodes on [nameNode]

    去这个地址下载相应版本的本地库:

    http://dl.bintray.com/sequenceiq/sequenceiq-bin/

    解压:tar -xvf hadoop-native-64-2.5.2.tar -C  /home/hadoop/hadoop-2.5.2/lib/native/

    scp * dataNode1:/home/hadoop/hadoop-2.5.2/lib/native/

    scp * dataNode2:/home/hadoop/hadoop-2.5.2/lib/native/

    再检查环境变量是否设置:

    export JAVA_LIBRARY_PATH=/home/hadoop/hadoop-2.5.2/lib/native/

    9查看启动的进程

    Jps

    10 测试访问

    http://192.168.79.100:50070/  查看hdfs节点信息和文件系统,10.0.1.100masterip地址

    http:// 192.168.79.100:8088/   查看map/reducejob调用情况

    报错处理

    如果出现:put: File /user/hadoop/input/mapred-site.xml._COPYING_ could only be replicated to 0 nodes instead of minReplication (=1).  There are 2 datanode(s) running and 2 node(s) are excluded in this operation.

    需要关闭所有节点的防火墙。

  • 相关阅读:
    stress-Linux系统压力测试工具使用及系统负载很高的几种场景测试
    execsnoop-短时进程追踪工具
    走迷宫--DFS
    马踏棋盘--dfs
    查询前缀出现的次数----字典树
    找两个质数和为偶数的两个数
    煤气灶---递归
    求合力
    hdu2089---不要62(数位DP)
    轻重匹配
  • 原文地址:https://www.cnblogs.com/tsxylhs/p/hadoop.html
Copyright © 2011-2022 走看看