zoukankan      html  css  js  c++  java
  • hadoop-3.0.0与hbase-2.1.0 快速安装

    基于环境:centos7.6.1810,jdk 1.8.0_77,4G的系统内存,建议安装CentOS7-Minimal版本

    hadoop-3.0.0安装

    vi /etc/hosts
    192.168.118.129 master

    vi /etc/hostname
    master

     vi /etc/security/limits.conf

    * soft nproc 4096
    * hard nproc 4096
    * soft nofile 65536
    * hard nofile 65536
    * soft memlock unlimited
    * hard memlock unlimited

    vi /etc/security/limits.d/20-nproc.conf

    * soft nproc 65536
    root soft nproc unlimited


    su es
    vi ~/.bashrc
    export HADOOP_HOME=/home/es/hadoop-3.0.0
    export HBASE_HOME=/home/es/hbase-2.1.0
    export JAVA_HOME=/usr/local/jdk

    cd /home/es
    wget http://archive.apache.org/dist/hadoop/common/hadoop-3.0.0/hadoop-3.0.0.tar.gz

    mkdir -p /home/es/data/hadoop/dfs/{namenode,datanode,tmp}
    mkdir -p /home/es/data/hadoop/tmp

    tar -zxf hadoop-3.0.0.tar.gz
    cd hadoop-3.0.0
    vi etc/hadoop/mapred-site.xml

    <configuration>
    
            <property>
                    <name>mapreduce.framework.name</name>
                    <value>yarn</value>
            </property>
            <property>
                    <name>mapreduce.jobhistory.address</name>
                    <value>master:10020</value>
            </property>
            <property>
                    <name>mapreduce.jobhistory.webapp.address</name>
                    <value>master:19888</value>
            </property>
            <property>
                    <name>mapreduce.jobhistory.done-dir</name>
                    <value>/history/done</value>
            </property>
            <property>
                    <name>mapreduce.jobhistory.intermediate-done-dir</name>
                    <value>/history/done_intermediate</value>
            </property>
    
            <property>
                    <name>mapreduce.input.fileinputformat.split.minsize</name>
                    <value>5120</value>
                    <description>5M</description>
            </property>
                <property>
                    <name>mapreduce.cluster.local.dir</name>
                    <value>/home/es/data/hadoop/local</value>
            </property>
            <property>
                    <name>mapreduce.map.memory.mb</name>
                    <value>512</value>
                    <description>The amount of memory to request from the scheduler for each
                    map task.
                    </description>
            </property>
            <property>
                    <name>mapreduce.reduce.memory.mb</name>
                    <value>1024</value>
                    <description>The amount of memory to request from the scheduler for each
                    reduce task.
                    </description>
            </property>
            <property>
                    <name>mapreduce.map.java.opts</name>
                    <value>-Xmx435m -XX:-UseGCOverheadLimit</value>
                    <description>Java opts only for the child processes that are maps. If set,
                    this will be used instead of mapred.child.java.opts.
                    </description>
            </property>
            <property>
                    <name>mapreduce.reduce.java.opts</name>
                    <value>-Xmx870m -XX:-UseGCOverheadLimit</value>
                    <description>Java opts only for the child processes that are reduces. If set,
                    this will be used instead of mapred.child.java.opts.
                    </description>
            </property>
                <property>
                    <name>mapreduce.task.timeout</name>
                    <value>300000</value>
            </property>
            <property>
                    <name>mapreduce.jobtracker.handler.count</name>
                    <value>10</value>
            </property>
            <property>
                    <name>mapreduce.job.reduce.slowstart.completedmaps</name>
                    <value>0.07</value>
                    <description>Fraction of the number of maps in the job which should be
                    complete before reduces are scheduled for the job.
                    </description>
                    </property>
            <property>
                    <name>mapreduce.reduce.shuffle.parallelcopies</name>
                    <value>10</value>
            </property>
            <property>
                    <name>mapreduce.tasktracker.http.threads</name>
                    <value>10</value>
            </property>
            <property>
                    <name>mapreduce.tasktracker.map.tasks.maximum</name>
                    <value>4</value>
            </property>
            <property>
                    <name>mapreduce.tasktracker.reduce.tasks.maximum</name>
                    <value>1</value>
            </property>
    
    </configuration>

    vi etc/hadoop/core-site.xml

    <configuration>
    
            <property>
                    <name>hadoop.tmp.dir</name>
                    <value>file:///home/es/data/hadoop/tmp</value>
                    <description>A base for other temporary directories.</description>
            </property>
            <property>
                    <name>io.file.buffer.size</name>
                    <value>131072</value>
            </property>
            <property>
                    <name>fs.defaultFS</name>
                    <value>hdfs://master:8020</value>
            </property>
    
    </configuration>

    vi etc/hadoop/hdfs-site.xml

    <configuration>
    
            <property>
                    <name>dfs.namenode.name.dir</name>
                    <value>/home/es/data/hadoop/dfs/namenode</value>
            </property>
            <property>
                    <name>dfs.datanode.data.dir</name>
                    <value>/home/es/data/hadoop/dfs/datanode</value>
            </property>
            <property>
                    <name>dfs.permissions</name>
                    <value>false</value>
            </property>
        <property>
                    <name>dfs.replication</name>
                    <value>1</value>
            </property>
    
    </configuration>

    vi etc/hadoop/yarn-site.xml

    <configuration>
    
            <property>
                    <name>yarn.resourcemanager.hostname</name>
                    <value>master</value>
            </property>
            <property>
                    <description>The address of the applications manager interface in the RM.</description>
                    <name>yarn.resourcemanager.address</name>
                    <value>${yarn.resourcemanager.hostname}:8032</value>
            </property>
    
            <property>
                    <description>The address of the scheduler interface.</description>
                    <name>yarn.resourcemanager.scheduler.address</name>
                    <value>${yarn.resourcemanager.hostname}:8030</value>
            </property>
    
            <property>
                    <description>The http address of the RM web application.</description>
                    <name>yarn.resourcemanager.webapp.address</name>
                    <value>${yarn.resourcemanager.hostname}:8088</value>
            </property>
    
            <property>
                    <description>The https adddress of the RM web application.</description>
                    <name>yarn.resourcemanager.webapp.https.address</name>
                    <value>${yarn.resourcemanager.hostname}:8090</value>
            </property>
            <property>
                    <name>yarn.resourcemanager.resource-tracker.address</name>
                    <value>${yarn.resourcemanager.hostname}:8031</value>
            </property>
            <property>
                    <description>The address of the RM admin interface.</description>
                    <name>yarn.resourcemanager.admin.address</name>
                    <value>${yarn.resourcemanager.hostname}:8033</value>
            </property>
    
            <property>
                    <name>yarn.nodemanager.aux-services</name>
                    <value>mapreduce_shuffle</value>
                    <description></description>
            </property>
    
            <property>
                    <name>yarn.log-aggregation-enable</name>
                    <value>true</value>
            </property>
    
    
    
            <property>
                    <description>Amount of physical memory, in MB, that can be allocated
                     for containers.default is 8192MB</description>
                    <name>yarn.nodemanager.resource.memory-mb</name>
                    <value>2048</value>
            </property>
                <property>
                    <description>The minimum allocation for every container request at the RM,
                    in MBs. Memory requests lower than this won't take effect,
                    and the specified value will get allocated at minimum.</description>
                    <name>yarn.scheduler.minimum-allocation-mb</name>
                    <value>512</value>
            </property>
            <property>
                    <description>The maximum allocation for every container request at the RM,
                    in MBs. Memory requests higher than this won't take effect,
                    and will get capped to this value.</description>
                    <name>yarn.scheduler.maximum-allocation-mb</name>
                    <value>2048</value>
            </property>
            <property>
                    <name>yarn.app.mapreduce.am.resource.mb</name>
                    <value>1024</value>
            </property>
            <property>
                    <name>yarn.app.mapreduce.am.command-opts</name>
                    <value>-Xmx870m</value>
            </property>
    
    
            <property>
                    <description>Number of vcores that can be allocated
                    for containers. This is used by the RM scheduler when allocating
                    resources for containers. This is not used to limit the number of
                    physical cores used by YARN containers.</description>
                    <name>yarn.nodemanager.resource.cpu-vcores</name>
                    <value>4</value>
            </property>
            <property>
                    <name>yarn.scheduler.minimum-allocation-vcores</name>
                    <value>1</value>
            </property>
            <property>
                    <description>The maximum allocation for every container request at the RM,
                    in terms of virtual CPU cores. Requests higher than this won't take effect,
                    and will get capped to this value.</description>
                    <name>yarn.scheduler.maximum-allocation-vcores</name>
                    <value>4</value>
            </property>
            <property>
                    <name>yarn.nodemanager.vmem-pmem-ratio</name>
                    <value>5</value>
            </property>
            <property>
                    <name>yarn.nodemanager.vmem-check-enabled</name>
                    <value>false</value>
            </property>
            <property>
                    <description>List of directories to store localized files in. An
                    application's localized file directory will be found in:
                    ${yarn.nodemanager.local-dirs}/usercache/${user}/appcache/application_${appid}.
                    Individual containers' work directories, called container_${contid}, will
                    be subdirectories of this.</description>
                    <name>yarn.nodemanager.local-dirs</name>
                    <value>/home/es/data/hadoop/tmp/nm_local_dir</value>
            </property>
    
        <!-- hadoop-3.0.0 new config-->
            <property>
                    <name>yarn.nodemanager.env-whitelist</name>
                    <value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME</value>
            </property>
    
    </configuration>

    vi etc/hadoop/hadoop-env.sh add in bottom
    export JAVA_HOME=/usr/local/jdk
    export HADOOP_HEAPSIZE=256

    vi etc/hadoop/workers
    master
    #format namenode
    bin/hdfs namenode -format ==== bin/hadoop namenode -format
    #start hdfs
    bin/hdfs --daemon start namenode ===sbin/hadoop-daemon.sh start namenode
    bin/hdfs --daemon start datanode

    #stop
    bin/hdfs --daemon stop datanode
    bin/hdfs --daemon stop namenode


    #start yarn
    bin/yarn --daemon start resourcemanager
    bin/yarn --daemon start nodemanager

    #stop yarn
    bin/yarn --daemon stop nodemanager
    bin/yarn --daemon stop resourcemanager


    #test hdfs
    bin/hdfs dfs -mkdir /tmp
    bin/hdfs dfs -put README.txt /tmp
    bin/hdfs dfs -text /tmp/README.txt
    bin/hdfs dfs -rm /tmp/README.txt

    #test mr in namenode
    bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-3.0.0.jar pi 2 2


    su root
    #firewall-cmd --zone=public --remove-port=9200/tcp --permanent
    #firewall-cmd --zone=public --list-ports
    #firewall-cmd --reload

    #namenode http
    firewall-cmd --zone=public --add-port=9870/tcp --permanent
    ##namenode rpc
    firewall-cmd --zone=public --add-port=8020/tcp --permanent

    #datanode
    firewall-cmd --zone=public --add-port=9864/tcp --permanent
    firewall-cmd --zone=public --add-port=9866/tcp --permanent
    firewall-cmd --zone=public --add-port=9867/tcp --permanent

    #resourcemanager
    firewall-cmd --zone=public --add-port=8088/tcp --permanent
    firewall-cmd --zone=public --add-port=8030/tcp --permanent
    firewall-cmd --zone=public --add-port=8031/tcp --permanent
    firewall-cmd --zone=public --add-port=8032/tcp --permanent
    firewall-cmd --zone=public --add-port=8033/tcp --permanent

    #nodemanager
    firewall-cmd --zone=public --add-port=8040/tcp --permanent
    firewall-cmd --zone=public --add-port=8042/tcp --permanent
    #firewall-cmd --zone=public --add-port=13562/tcp --permanent
    #firewall-cmd --zone=public --add-port=37828/tcp --permanent

    hbase-2.1.0 安装
    su es
    cd /home/es
    wget http://archive.apache.org/dist/hbase/2.1.0/hbase-2.1.0-bin.tar.gz
    tar -zxf hbase-2.1.0-bin.tar.gz
    cd hbase-2.1.0

    vi conf/hbase-site.xml

    <configuration>
            <property>
                    <name>hbase.rootdir</name>
                    <value>hdfs://master:8020/hbase</value>
            </property>
            <property>
                    <name>hbase.cluster.distributed</name>
                    <value>true</value>
            </property>
            <property>
                    <name>hbase.zookeeper.property.dataDir</name>
                    <value>/home/es/data/zookeeper</value>
            </property>
            <property>
                    <name>hbase.zookeeper.quorum</name>
                    <value>master:2181</value>
            </property>
            
            <property>
                    <name>hbase.unsafe.stream.capability.enforce</name>
                    <value>false</value>
            </property>
    
    </configuration>

    vi conf/hbase-env.sh
    export HBASE_HEAPSIZE=1G
    export JAVA_HOME=/usr/local/jdk

    vi conf/regionservers
    master

    #start
    bin/hbase-daemon.sh start zookeeper
    bin/hbase-daemon.sh start master
    bin/hbase-daemon.sh start regionserver
    bin/hbase-daemon.sh start master-backup

    #stop
    bin/hbase-daemon.sh stop regionserver
    hbase-daemon.sh stop master

    #zk
    firewall-cmd --zone=public --add-port=2181/tcp --permanent
    #hmaster
    #http
    firewall-cmd --zone=public --add-port=16010/tcp --permanent
    #rpc
    firewall-cmd --zone=public --add-port=16000/tcp --permanent
    #regionserver
    firewall-cmd --zone=public --add-port=16030/tcp --permanent
    firewall-cmd --zone=public --add-port=16020/tcp --permanent

    #test hbase
    bin/hbase shell
    create 'blog','info'
    desc 'blog'
    list
    put 'blog','r1','info:c1','v1'
    scan 'blog',{LIMIT=>2}
    get 'blog','r1'
    delete 'blog','r1','info:c1'


    第一个family列族对应一个文件
    bin/hdfs dfs -ls /hbase/blog
    手工把memstore写到Hfile中
    flush 'blog'
    手工合并hfile
    compact 't1'

    hbase2列族必需事先定义,后不可新增

    disable 'blog'
    drop 'blog'
    create 'blog','info','info1'
    enable 'blog'
    exists 'blog'
    truncate 'blog'

  • 相关阅读:
    liunx某台服务器无法访问其他服务器!!!!!!!!
    下载历史版本CentOS
    通过sparkstreaming分析url的数据
    Linux查看空间大小的命令
    secureCRT背景颜色
    布谷鸟算法详细讲解
    matlab 绘图
    浏览器内存泄露问题
    C#和java的语法区别
    i-m-a-g-e-7
  • 原文地址:https://www.cnblogs.com/biginfo/p/10653962.html
Copyright © 2011-2022 走看看