zoukankan      html  css  js  c++  java
  • hadoop学习之路(6)

    1.hive

    conf/hive-log4j.properties

    # Licensed to the Apache Software Foundation (ASF) under one
    # or more contributor license agreements.  See the NOTICE file
    # distributed with this work for additional information
    # regarding copyright ownership.  The ASF licenses this file
    # to you under the Apache License, Version 2.0 (the
    # "License"); you may not use this file except in compliance
    # with the License.  You may obtain a copy of the License at
    #
    #     http://www.apache.org/licenses/LICENSE-2.0
    #
    # Unless required by applicable law or agreed to in writing, software
    # distributed under the License is distributed on an "AS IS" BASIS,
    # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    # See the License for the specific language governing permissions and
    # limitations under the License.
    
    # Define some default values that can be overridden by system properties
    hive.log.threshold=ALL
    hive.root.logger=INFO,DRFA
    hive.log.dir=/opt/module/hive/logs
    hive.log.file=hive.log
    
    # Define the root logger to the system property "hadoop.root.logger".
    log4j.rootLogger=${hive.root.logger}, EventCounter
    
    # Logging Threshold
    log4j.threshold=${hive.log.threshold}
    
    #
    # Daily Rolling File Appender
    #
    # Use the PidDailyerRollingFileAppend class instead if you want to use separate log files
    # for different CLI session.
    #
    # log4j.appender.DRFA=org.apache.hadoop.hive.ql.log.PidDailyRollingFileAppender
    
    log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender
    
    log4j.appender.DRFA.File=${hive.log.dir}/${hive.log.file}
    
    # Rollver at midnight
    log4j.appender.DRFA.DatePattern=.yyyy-MM-dd
    
    # 30-day backup
    #log4j.appender.DRFA.MaxBackupIndex=30
    log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout
    
    # Pattern format: Date LogLevel LoggerName LogMessage
    #log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
    # Debugging Pattern format
    log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p [%t]: %c{2} (%F:%M(%L)) - %m%n
    
    
    #
    # console
    # Add "console" to rootlogger above if you want to use this
    #
    
    log4j.appender.console=org.apache.log4j.ConsoleAppender
    log4j.appender.console.target=System.err
    log4j.appender.console.layout=org.apache.log4j.PatternLayout
    log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} [%t]: %p %c{2}: %m%n
    log4j.appender.console.encoding=UTF-8
    
    #custom logging levels
    #log4j.logger.xxx=DEBUG
    
    #
    # Event Counter Appender
    # Sends counts of logging messages at different severity levels to Hadoop Metrics.
    #
    log4j.appender.EventCounter=org.apache.hadoop.hive.shims.HiveEventCounter
    
    
    log4j.category.DataNucleus=ERROR,DRFA
    log4j.category.Datastore=ERROR,DRFA
    log4j.category.Datastore.Schema=ERROR,DRFA
    log4j.category.JPOX.Datastore=ERROR,DRFA
    log4j.category.JPOX.Plugin=ERROR,DRFA
    log4j.category.JPOX.MetaData=ERROR,DRFA
    log4j.category.JPOX.Query=ERROR,DRFA
    log4j.category.JPOX.General=ERROR,DRFA
    log4j.category.JPOX.Enhancer=ERROR,DRFA
    
    
    # Silence useless ZK logs
    log4j.logger.org.apache.zookeeper.server.NIOServerCnxn=WARN,DRFA
    log4j.logger.org.apache.zookeeper.ClientCnxnSocketNIO=WARN,DRFA
    View Code

    conf/hive-site.xml(使用mysql5.6)

    <?xml version="1.0"?>
    <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
    <configuration>
        <property>
          <name>javax.jdo.option.ConnectionURL</name>
          <value>jdbc:mysql://hadoop102:3306/metastore?createDatabaseIfNotExist=true</value>
          <description>JDBC connect string for a JDBC metastore</description>
        </property>
    
        <property>
          <name>javax.jdo.option.ConnectionDriverName</name>
          <value>com.mysql.jdbc.Driver</value>
          <description>Driver class name for a JDBC metastore</description>
        </property>
    
        <property>
          <name>javax.jdo.option.ConnectionUserName</name>
          <value>root</value>
          <description>username to use against metastore database</description>
        </property>
    
        <property>
          <name>javax.jdo.option.ConnectionPassword</name>
          <value>666666</value>
          <description>password to use against metastore database</description>
        </property>
           <property>
        <name>hive.metastore.warehouse.dir</name>
        <value>/hive</value>
        <description>location of default database for the warehouse</description>
        </property>
    <property>
        <name>hive.cli.print.header</name>
        <value>true</value>
    </property>
    
    <property>
        <name>hive.cli.print.current.db</name>
        <value>true</value>
    </property>
    <property>
      <name>hive.zookeeper.quorum</name>
      <value>hadoop102,hadoop103,hadoop101</value>
      <description>The list of ZooKeeper servers to talk to. This is only needed for read/write locks.</description>
    </property>
    <property>
      <name>hive.zookeeper.client.port</name>
      <value>2181</value>
      <description>The port of ZooKeeper servers to talk to. This is only needed for read/write locks.</description>
    </property>
    
    </configuration>
    View Code

    2.flume

    创建myagents/,此目录放置配置自定义文件

    execsource-hdfssink.conf

    #a1是agent的名称,a1中定义了一个叫r1的source,如果有多个,使用空格间隔
    a1.sources = r1
    a1.sinks = k1
    a1.channels = c1
    #组名名.属性名=属性值
    a1.sources.r1.type=exec
    a1.sources.r1.command=tail -f /opt/module/hive/logs/hive.log
    
    #定义chanel
    a1.channels.c1.type=memory
    a1.channels.c1.capacity=1000
    
    #定义sink
    a1.sinks.k1.type = hdfs
    #一旦路径中含有基于时间的转义序列,要求event的header中必须有timestamp=时间戳,如果没有需要将useLocalTimeStamp = true
    a1.sinks.k1.hdfs.path = hdfs://hadoop101:9000/flume/%Y%m%d/%H/%M
    #上传文件的前缀
    a1.sinks.k1.hdfs.filePrefix = logs-
    
    #以下三个和目录的滚动相关,目录一旦设置了时间转义序列,基于时间戳滚动
    #是否将时间戳向下舍
    a1.sinks.k1.hdfs.round = true
    #多少时间单位创建一个新的文件夹
    a1.sinks.k1.hdfs.roundValue = 1
    #重新定义时间单位
    a1.sinks.k1.hdfs.roundUnit = minute
    
    #是否使用本地时间戳
    a1.sinks.k1.hdfs.useLocalTimeStamp = true
    #积攒多少个Event才flush到HDFS一次
    a1.sinks.k1.hdfs.batchSize = 100
    
    #以下三个和文件的滚动相关,以下三个参数是或的关系!以下三个参数如果值为0都代表禁用!
    #60秒滚动生成一个新的文件
    a1.sinks.k1.hdfs.rollInterval = 10
    #设置每个文件到128M时滚动
    a1.sinks.k1.hdfs.rollSize = 134217700
    #每写多少个event滚动一次
    a1.sinks.k1.hdfs.rollCount = 0
    #以不压缩的文本形式保存数据
    a1.sinks.k1.hdfs.fileType=DataStream
    
    #连接组件 同一个source可以对接多个channel,一个sink只能从一个channel拿数据!
    a1.sources.r1.channels=c1
    a1.sinks.k1.channel=c1
    View Code

    taildirsource-loggersink.conf

    #a1是agent的名称,a1中定义了一个叫r1的source,如果有多个,使用空格间隔
    a1.sources = r1
    a1.sinks = k1
    a1.channels = c1
    #组名名.属性名=属性值
    a1.sources.r1.type=TAILDIR
    a1.sources.r1.filegroups=f1 f2
    a1.sources.r1.filegroups.f1=/home/layman/hi
    a1.sources.r1.filegroups.f2=/home/layman/test
    
    #定义sink
    a1.sinks.k1.type=logger
    a1.sinks.k1.maxBytesToLog=100
    
    #定义chanel
    a1.channels.c1.type=memory
    a1.channels.c1.capacity=1000
    
    #连接组件 同一个source可以对接多个channel,一个sink只能从一个channel拿数据!
    a1.sources.r1.channels=c1
    a1.sinks.k1.channel=c1
    View Code

    spoolingdirsource-hdfsink.conf

    #a1是agent的名称,a1中定义了一个叫r1的source,如果有多个,使用空格间隔
    a1.sources = r1
    a1.sinks = k1
    a1.channels = c1
    #组名名.属性名=属性值
    a1.sources.r1.type=spooldir
    a1.sources.r1.spoolDir=/home/layman/flume
    
    #定义chanel
    a1.channels.c1.type=memory
    a1.channels.c1.capacity=1000
    
    #定义sink
    a1.sinks.k1.type = hdfs
    #一旦路径中含有基于时间的转义序列,要求event的header中必须有timestamp=时间戳,如果没有需要将useLocalTimeStamp = true
    a1.sinks.k1.hdfs.path = hdfs://hadoop101:9000/flume/%Y%m%d/%H/%M
    #上传文件的前缀
    a1.sinks.k1.hdfs.filePrefix = logs-
    
    #以下三个和目录的滚动相关,目录一旦设置了时间转义序列,基于时间戳滚动
    #是否将时间戳向下舍
    a1.sinks.k1.hdfs.round = true
    #多少时间单位创建一个新的文件夹
    a1.sinks.k1.hdfs.roundValue = 1
    #重新定义时间单位
    a1.sinks.k1.hdfs.roundUnit = minute
    
    #是否使用本地时间戳
    a1.sinks.k1.hdfs.useLocalTimeStamp = true
    #积攒多少个Event才flush到HDFS一次
    a1.sinks.k1.hdfs.batchSize = 100
    
    #以下三个和文件的滚动相关,以下三个参数是或的关系!以下三个参数如果值为0都代表禁用!
    #60秒滚动生成一个新的文件
    a1.sinks.k1.hdfs.rollInterval = 30
    #设置每个文件到128M时滚动
    a1.sinks.k1.hdfs.rollSize = 134217700
    #每写多少个event滚动一次
    a1.sinks.k1.hdfs.rollCount = 0
    #以不压缩的文本形式保存数据
    a1.sinks.k1.hdfs.fileType=DataStream 
    
    
    #连接组件 同一个source可以对接多个channel,一个sink只能从一个channel拿数据!
    a1.sources.r1.channels=c1
    a1.sinks.k1.channel=c1
    View Code

    avrosource-loggersink.conf

    #agent2
    #a1是agent的名称,a1中定义了一个叫r1的source,如果有多个,使用空格间隔
    a1.sources = r1
    a1.sinks = k1
    a1.channels = c1
    #组名名.属性名=属性值
    a1.sources.r1.type=avro
    a1.sources.r1.bind=hadoop102
    a1.sources.r1.port=33333
    
    #定义sink
    a1.sinks.k1.type=logger
    
    #定义chanel
    a1.channels.c1.type=memory
    a1.channels.c1.capacity=1000
    
    #连接组件 同一个source可以对接多个channel,一个sink只能从一个channel拿数据!
    a1.sources.r1.channels=c1
    a1.sinks.k1.channel=c1
    View Code

    netcatsource-avrosink.conf

    #agent1
    #a1是agent的名称,a1中定义了一个叫r1的source,如果有多个,使用空格间隔
    a1.sources = r1
    a1.sinks = k1
    a1.channels = c1
    #组名名.属性名=属性值
    a1.sources.r1.type=netcat
    a1.sources.r1.bind=hadoop101
    a1.sources.r1.port=44444
    
    #定义sink
    a1.sinks.k1.type=avro
    a1.sinks.k1.hostname=hadoop102
    a1.sinks.k1.port=33333
    #定义chanel
    a1.channels.c1.type=memory
    a1.channels.c1.capacity=1000
    
    #连接组件 同一个source可以对接多个channel,一个sink只能从一个channel拿数据!
    a1.sources.r1.channels=c1
    a1.sinks.k1.channel=c1
    View Code

    注意上面两个agent的启动顺序

    3.kafka

    conf/server.properties

    # Licensed to the Apache Software Foundation (ASF) under one or more
    # contributor license agreements.  See the NOTICE file distributed with
    # this work for additional information regarding copyright ownership.
    # The ASF licenses this file to You under the Apache License, Version 2.0
    # (the "License"); you may not use this file except in compliance with
    # the License.  You may obtain a copy of the License at
    #
    #    http://www.apache.org/licenses/LICENSE-2.0
    #
    # Unless required by applicable law or agreed to in writing, software
    # distributed under the License is distributed on an "AS IS" BASIS,
    # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    # See the License for the specific language governing permissions and
    # limitations under the License.
    
    # see kafka.server.KafkaConfig for additional details and defaults
    
    ############################# Server Basics #############################
    
    # The id of the broker. This must be set to a unique integer for each broker.
    broker.id=102
    
    # Switch to enable topic deletion or not, default value is false
    delete.topic.enable=true
    
    ############################# Socket Server Settings #############################
    
    # The address the socket server listens on. It will get the value returned from 
    # java.net.InetAddress.getCanonicalHostName() if not configured.
    #   FORMAT:
    #     listeners = listener_name://host_name:port
    #   EXAMPLE:
    #     listeners = PLAINTEXT://your.host.name:9092
    #listeners=PLAINTEXT://:9092
    
    # Hostname and port the broker will advertise to producers and consumers. If not set, 
    # it uses the value for "listeners" if configured.  Otherwise, it will use the value
    # returned from java.net.InetAddress.getCanonicalHostName().
    #advertised.listeners=PLAINTEXT://your.host.name:9092
    
    # Maps listener names to security protocols, the default is for them to be the same. See the config documentation for more details
    #listener.security.protocol.map=PLAINTEXT:PLAINTEXT,SSL:SSL,SASL_PLAINTEXT:SASL_PLAINTEXT,SASL_SSL:SASL_SSL
    
    # The number of threads that the server uses for receiving requests from the network and sending responses to the network
    num.network.threads=3
    
    # The number of threads that the server uses for processing requests, which may include disk I/O
    num.io.threads=8
    
    # The send buffer (SO_SNDBUF) used by the socket server
    socket.send.buffer.bytes=102400
    
    # The receive buffer (SO_RCVBUF) used by the socket server
    socket.receive.buffer.bytes=102400
    
    # The maximum size of a request that the socket server will accept (protection against OOM)
    socket.request.max.bytes=104857600
    
    
    ############################# Log Basics #############################
    
    # A comma seperated list of directories under which to store log files
    log.dirs=/opt/module/kafka/datas
    
    # The default number of log partitions per topic. More partitions allow greater
    # parallelism for consumption, but this will also result in more files across
    # the brokers.
    num.partitions=1
    
    # The number of threads per data directory to be used for log recovery at startup and flushing at shutdown.
    # This value is recommended to be increased for installations with data dirs located in RAID array.
    num.recovery.threads.per.data.dir=1
    
    ############################# Internal Topic Settings  #############################
    # The replication factor for the group metadata internal topics "__consumer_offsets" and "__transaction_state"
    # For anything other than development testing, a value greater than 1 is recommended for to ensure availability such as 3.
    offsets.topic.replication.factor=1
    transaction.state.log.replication.factor=1
    transaction.state.log.min.isr=1
    
    ############################# Log Flush Policy #############################
    
    # Messages are immediately written to the filesystem but by default we only fsync() to sync
    # the OS cache lazily. The following configurations control the flush of data to disk.
    # There are a few important trade-offs here:
    #    1. Durability: Unflushed data may be lost if you are not using replication.
    #    2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush.
    #    3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks.
    # The settings below allow one to configure the flush policy to flush data after a period of time or
    # every N messages (or both). This can be done globally and overridden on a per-topic basis.
    
    # The number of messages to accept before forcing a flush of data to disk
    #log.flush.interval.messages=10000
    
    # The maximum amount of time a message can sit in a log before we force a flush
    #log.flush.interval.ms=1000
    
    ############################# Log Retention Policy #############################
    
    # The following configurations control the disposal of log segments. The policy can
    # be set to delete segments after a period of time, or after a given size has accumulated.
    # A segment will be deleted whenever *either* of these criteria are met. Deletion always happens
    # from the end of the log.
    
    # The minimum age of a log file to be eligible for deletion due to age
    log.retention.hours=168
    
    # A size-based retention policy for logs. Segments are pruned from the log as long as the remaining
    # segments don't drop below log.retention.bytes. Functions independently of log.retention.hours.
    #log.retention.bytes=1073741824
    
    # The maximum size of a log segment file. When this size is reached a new log segment will be created.
    log.segment.bytes=1073741824
    
    # The interval at which log segments are checked to see if they can be deleted according
    # to the retention policies
    log.retention.check.interval.ms=300000
    
    ############################# Zookeeper #############################
    
    # Zookeeper connection string (see zookeeper docs for details).
    # This is a comma separated host:port pairs, each corresponding to a zk
    # server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002".
    # You can also append an optional chroot string to the urls to specify the
    # root directory for all kafka znodes.
    zookeeper.connect=hadoop101:2181,hadoop102:2181,hadoop103:2181
    
    # Timeout in ms for connecting to zookeeper
    zookeeper.connection.timeout.ms=6000
    
    
    ############################# Group Coordinator Settings #############################
    
    # The following configuration specifies the time, in milliseconds, that the GroupCoordinator will delay the initial consumer rebalance.
    # The rebalance will be further delayed by the value of group.initial.rebalance.delay.ms as new members join the group, up to a maximum of max.poll.interval.ms.
    # The default value for this is 3 seconds.
    # We override this to 0 here as it makes for a better out-of-the-box experience for development and testing.
    # However, in production environments the default value of 3 seconds is more suitable as this will help to avoid unnecessary, and potentially expensive, rebalances during application startup.
    group.initial.rebalance.delay.ms=0
    View Code

    zk群起脚本

    #!/bin/bash
    #启动(start)|停止(stop)|查看zk集群状态(status)
    #检测用户是否传入了参数
    if(($#==0))
    then
        echo '请输入start|stop|status'
        exit;
    fi
    
    #对参数检查,看参数是否复合要求
    if [ $1 = start ] || [ $1 = stop ] || [ $1 = status ]
    then
        xcall /opt/module/zookeeper-3.4.10/bin/zkServer.sh $1
    else
        echo '只允许输入start|stop|status!'
    fi
    View Code

    /etc/profile

    # /etc/profile
    
    # System wide environment and startup programs, for login setup
    # Functions and aliases go in /etc/bashrc
    
    # It's NOT a good idea to change this file unless you know what you
    # are doing. It's much better to create a custom.sh shell script in
    # /etc/profile.d/ to make custom changes to your environment, as this
    # will prevent the need for merging in future updates.
    
    pathmunge () {
        case ":${PATH}:" in
            *:"$1":*)
                ;;
            *)
                if [ "$2" = "after" ] ; then
                    PATH=$PATH:$1
                else
                    PATH=$1:$PATH
                fi
        esac
    }
    
    
    if [ -x /usr/bin/id ]; then
        if [ -z "$EUID" ]; then
            # ksh workaround
            EUID=`id -u`
            UID=`id -ru`
        fi
        USER="`id -un`"
        LOGNAME=$USER
        MAIL="/var/spool/mail/$USER"
    fi
    
    # Path manipulation
    if [ "$EUID" = "0" ]; then
        pathmunge /sbin
        pathmunge /usr/sbin
        pathmunge /usr/local/sbin
    else
        pathmunge /usr/local/sbin after
        pathmunge /usr/sbin after
        pathmunge /sbin after
    fi
    
    HOSTNAME=`/bin/hostname 2>/dev/null`
    HISTSIZE=1000
    if [ "$HISTCONTROL" = "ignorespace" ] ; then
        export HISTCONTROL=ignoreboth
    else
        export HISTCONTROL=ignoredups
    fi
    
    export PATH USER LOGNAME MAIL HOSTNAME HISTSIZE HISTCONTROL
    
    # By default, we want umask to get set. This sets it for login shell
    # Current threshold for system reserved uid/gids is 200
    # You could check uidgid reservation validity in
    # /usr/share/doc/setup-*/uidgid file
    if [ $UID -gt 199 ] && [ "`id -gn`" = "`id -un`" ]; then
        umask 002
    else
        umask 022
    fi
    
    for i in /etc/profile.d/*.sh ; do
        if [ -r "$i" ]; then
            if [ "${-#*i}" != "$-" ]; then
                . "$i"
            else
                . "$i" >/dev/null 2>&1
            fi
        fi
    done
    
    unset i
    unset -f pathmunge
    JAVA_HOME=/opt/module/jdk1.8.0_241
    HADOOP_HOME=/opt/module/hadoop-2.7.2
    HIVE_HOME=/opt/module/hive
    FLUME_HOME=/opt/module/flume
    HBASE_HOME=/opt/module/hbase
    PHOENIX_HOME=/opt/module/phoenix
    PHOENIX_CLASSPATH=$PHOENIX_HOME
    ZOOKEEPER_HOME=/opt/module/zookeeper-3.4.10
    PATH=$PATH:$JAVA_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$HIVE_HOME/bin:$FLUME_HOME/bin:$HBASE_HOME/bin:$PHOENIX_HOME/bin
    export JAVA_HOME PATH HADOOP_HOME HIVE_HOME FLUME_HOME HBASE_HOME PHOENIX_HOME PHOENIX_CLASSPATH ZOOKEEPER_HOME
    View Code

    4.hbase

    conf/hbase-site.xml

    <?xml version="1.0"?>
    <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
    <!--
    /**
     *
     * Licensed to the Apache Software Foundation (ASF) under one
     * or more contributor license agreements.  See the NOTICE file
     * distributed with this work for additional information
     * regarding copyright ownership.  The ASF licenses this file
     * to you under the Apache License, Version 2.0 (the
     * "License"); you may not use this file except in compliance
     * with the License.  You may obtain a copy of the License at
     *
     *     http://www.apache.org/licenses/LICENSE-2.0
     *
     * Unless required by applicable law or agreed to in writing, software
     * distributed under the License is distributed on an "AS IS" BASIS,
     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     * See the License for the specific language governing permissions and
     * limitations under the License.
     */
    -->
    <configuration>
    <property>     
            <name>hbase.rootdir</name>     
            <value>hdfs://hadoop101:9000/HBase</value>   
        </property>
    
        <property>   
            <name>hbase.cluster.distributed</name>
            <value>true</value>
        </property>
    
       <!-- 0.98后的新变动,之前版本没有.port,默认端口为60000 -->
        <property>
            <name>hbase.master.port</name>
            <value>16000</value>
        </property>
    
        <property>    
            <name>hbase.zookeeper.quorum</name>
             <value>hadoop102,hadoop103,hadoop101</value>
        </property>
    
        <property>   
            <name>hbase.zookeeper.property.dataDir</name>
             <value>/opt/module/zookeeper-3.4.10/datas</value>
        </property>
    <!-- phoenix regionserver 配置参数 -->
    <property>
        <name>hbase.regionserver.wal.codec</name>
        <value>org.apache.hadoop.hbase.regionserver.wal.IndexedWALEditCodec</value>
    </property>
    
    <property>
        <name>hbase.region.server.rpc.scheduler.factory.class</name>
        <value>org.apache.hadoop.hbase.ipc.PhoenixRpcSchedulerFactory</value>
    <description>Factory to create the Phoenix RPC Scheduler that uses separate queues for index and metadata updates</description>
    </property>
    
    <property>
        <name>hbase.rpc.controllerfactory.class</name>
        <value>org.apache.hadoop.hbase.ipc.controller.ServerRpcControllerFactory</value>
        <description>Factory to create the Phoenix RPC Scheduler that uses separate queues for index and metadata updates</description>
    </property>
    <!-- phoenix master 配置参数 -->
    <property>
        <name>hbase.master.loadbalancer.class</name>
        <value>org.apache.phoenix.hbase.index.balancer.IndexLoadBalancer</value>
    </property>
    
    <property>
        <name>hbase.coprocessor.master.classes</name>
        <value>org.apache.phoenix.hbase.index.master.IndexMasterObserver</value>
    </property>
    View Code

    conf/regionservers

    hadoop101
    hadoop102
    hadoop103
    View Code

    conf/backup-masters(只能用这个名)

    hadoop101
    hadoop102
    hadoop103
    View Code

    conf/hbase-env.sh

    #
    #/**
    # * Licensed to the Apache Software Foundation (ASF) under one
    # * or more contributor license agreements.  See the NOTICE file
    # * distributed with this work for additional information
    # * regarding copyright ownership.  The ASF licenses this file
    # * to you under the Apache License, Version 2.0 (the
    # * "License"); you may not use this file except in compliance
    # * with the License.  You may obtain a copy of the License at
    # *
    # *     http://www.apache.org/licenses/LICENSE-2.0
    # *
    # * Unless required by applicable law or agreed to in writing, software
    # * distributed under the License is distributed on an "AS IS" BASIS,
    # * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    # * See the License for the specific language governing permissions and
    # * limitations under the License.
    # */
    
    # Set environment variables here.
    
    # This script sets variables multiple times over the course of starting an hbase process,
    # so try to keep things idempotent unless you want to take an even deeper look
    # into the startup scripts (bin/hbase, etc.)
    
    # The java implementation to use.  Java 1.7+ required.
    # export JAVA_HOME=/usr/java/jdk1.6.0/
    
    # Extra Java CLASSPATH elements.  Optional.
    # export HBASE_CLASSPATH=
    
    # The maximum amount of heap to use. Default is left to JVM default.
    # export HBASE_HEAPSIZE=1G
    
    # Uncomment below if you intend to use off heap cache. For example, to allocate 8G of 
    # offheap, set the value to "8G".
    # export HBASE_OFFHEAPSIZE=1G
    
    # Extra Java runtime options.
    # Below are what we set by default.  May only work with SUN JVM.
    # For more on why as well as other possible settings,
    # see http://wiki.apache.org/hadoop/PerformanceTuning
    export HBASE_OPTS="-XX:+UseConcMarkSweepGC"
    
    # Configure PermSize. Only needed in JDK7. You can safely remove it for JDK8+
    #export HBASE_MASTER_OPTS="$HBASE_MASTER_OPTS -XX:PermSize=128m -XX:MaxPermSize=128m"
    #export HBASE_REGIONSERVER_OPTS="$HBASE_REGIONSERVER_OPTS -XX:PermSize=128m -XX:MaxPermSize=128m"
    
    # Uncomment one of the below three options to enable java garbage collection logging for the server-side processes.
    
    # This enables basic gc logging to the .out file.
    # export SERVER_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps"
    
    # This enables basic gc logging to its own file.
    # If FILE-PATH is not replaced, the log file(.gc) would still be generated in the HBASE_LOG_DIR .
    # export SERVER_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:<FILE-PATH>"
    
    # This enables basic GC logging to its own file with automatic log rolling. Only applies to jdk 1.6.0_34+ and 1.7.0_2+.
    # If FILE-PATH is not replaced, the log file(.gc) would still be generated in the HBASE_LOG_DIR .
    # export SERVER_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:<FILE-PATH> -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=1 -XX:GCLogFileSize=512M"
    
    # Uncomment one of the below three options to enable java garbage collection logging for the client processes.
    
    # This enables basic gc logging to the .out file.
    # export CLIENT_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps"
    
    # This enables basic gc logging to its own file.
    # If FILE-PATH is not replaced, the log file(.gc) would still be generated in the HBASE_LOG_DIR .
    # export CLIENT_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:<FILE-PATH>"
    
    # This enables basic GC logging to its own file with automatic log rolling. Only applies to jdk 1.6.0_34+ and 1.7.0_2+.
    # If FILE-PATH is not replaced, the log file(.gc) would still be generated in the HBASE_LOG_DIR .
    # export CLIENT_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:<FILE-PATH> -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=1 -XX:GCLogFileSize=512M"
    
    # See the package documentation for org.apache.hadoop.hbase.io.hfile for other configurations
    # needed setting up off-heap block caching. 
    
    # Uncomment and adjust to enable JMX exporting
    # See jmxremote.password and jmxremote.access in $JRE_HOME/lib/management to configure remote password access.
    # More details at: http://java.sun.com/javase/6/docs/technotes/guides/management/agent.html
    # NOTE: HBase provides an alternative JMX implementation to fix the random ports issue, please see JMX
    # section in HBase Reference Guide for instructions.
    
    # export HBASE_JMX_BASE="-Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.authenticate=false"
    # export HBASE_MASTER_OPTS="$HBASE_MASTER_OPTS $HBASE_JMX_BASE -Dcom.sun.management.jmxremote.port=10101"
    # export HBASE_REGIONSERVER_OPTS="$HBASE_REGIONSERVER_OPTS $HBASE_JMX_BASE -Dcom.sun.management.jmxremote.port=10102"
    # export HBASE_THRIFT_OPTS="$HBASE_THRIFT_OPTS $HBASE_JMX_BASE -Dcom.sun.management.jmxremote.port=10103"
    # export HBASE_ZOOKEEPER_OPTS="$HBASE_ZOOKEEPER_OPTS $HBASE_JMX_BASE -Dcom.sun.management.jmxremote.port=10104"
    # export HBASE_REST_OPTS="$HBASE_REST_OPTS $HBASE_JMX_BASE -Dcom.sun.management.jmxremote.port=10105"
    
    # File naming hosts on which HRegionServers will run.  $HBASE_HOME/conf/regionservers by default.
    # export HBASE_REGIONSERVERS=${HBASE_HOME}/conf/regionservers
    
    # Uncomment and adjust to keep all the Region Server pages mapped to be memory resident
    #HBASE_REGIONSERVER_MLOCK=true
    #HBASE_REGIONSERVER_UID="hbase"
    
    # File naming hosts on which backup HMaster will run.  $HBASE_HOME/conf/backup-masters by default.
    # export HBASE_BACKUP_MASTERS=${HBASE_HOME}/conf/backup-masters
    
    # Extra ssh options.  Empty by default.
    # export HBASE_SSH_OPTS="-o ConnectTimeout=1 -o SendEnv=HBASE_CONF_DIR"
    
    # Where log files are stored.  $HBASE_HOME/logs by default.
    # export HBASE_LOG_DIR=${HBASE_HOME}/logs
    
    # Enable remote JDWP debugging of major HBase processes. Meant for Core Developers 
    # export HBASE_MASTER_OPTS="$HBASE_MASTER_OPTS -Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=8070"
    # export HBASE_REGIONSERVER_OPTS="$HBASE_REGIONSERVER_OPTS -Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=8071"
    # export HBASE_THRIFT_OPTS="$HBASE_THRIFT_OPTS -Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=8072"
    # export HBASE_ZOOKEEPER_OPTS="$HBASE_ZOOKEEPER_OPTS -Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=8073"
    
    # A string representing this instance of hbase. $USER by default.
    # export HBASE_IDENT_STRING=$USER
    
    # The scheduling priority for daemon processes.  See 'man nice'.
    # export HBASE_NICENESS=10
    
    # The directory where pid files are stored. /tmp by default.
    # export HBASE_PID_DIR=/var/hadoop/pids
    
    # Seconds to sleep between slave commands.  Unset by default.  This
    # can be useful in large clusters, where, e.g., slave rsyncs can
    # otherwise arrive faster than the master can service them.
    # export HBASE_SLAVE_SLEEP=0.1
    
    # Tell HBase whether it should manage it's own instance of Zookeeper or not.
    export HBASE_MANAGES_ZK=false
    
    # The default log rolling policy is RFA, where the log file is rolled as per the size defined for the 
    # RFA appender. Please refer to the log4j.properties file to see more details on this appender.
    # In case one needs to do log rolling on a date change, one should set the environment property
    # HBASE_ROOT_LOGGER to "<DESIRED_LOG LEVEL>,DRFA".
    # For example:
    # HBASE_ROOT_LOGGER=INFO,DRFA
    # The reason for changing default to RFA is to avoid the boundary case of filling out disk space as 
    # DRFA doesn't put any cap on the log size. Please refer to HBase-5655 for more context.
    View Code

    5.phoenix

    注意继承HBase 的配置,hbase中配置二级索引,zookeeper集群配置的时候端口号省略

    6.sqoop 

    配置相关的HADOOP相关的环境变量,由于etc/profile已经配置全局,因此不需要指定

     hadoop,zookeeper,hive,hbase,phoenix,sqoop启动一堆效果

  • 相关阅读:
    为啥负利率国债有人抢着买?因为时代变了
    微增长时代
    U盘插入电脑后图标是灰色的,打开提示“请将磁盘插入驱动器”
    计算shell 脚本的执行时间
    win10系统应用商店打开后无法联网 代码: 0x80131500 的解决办法
    Jetbrains家的软件都可用的激活码-pycharm
    postman中x-www-form-urlencoded与form-data的区别
    升级Gogs版本
    上海对售价超1499元的茅台酒即没收并另处罚款
    提高收入的根本途径
  • 原文地址:https://www.cnblogs.com/shun998/p/14567577.html
Copyright © 2011-2022 走看看