zoukankan      html  css  js  c++  java
  • 搭建Hadoop-1.2.1&hbase-0.94.17&hive-0.9.0&centos6.8_x64集群

    一、搭建环境
    --yum install -y java-1.7.0-openjdk.x86_64
    系统:CentOS6.8_x64 hadoop-1.2.1 zookeeper-3.4.6 hbase-0.94.17
    下载软件包:
    --hadoop-1.2.1.tar.gz
    http://mirrors.cnnic.cn/apache/hadoop/common/hadoop-1.2.1/hadoop-1.2.1.tar.gz
    --zookeeper-3.4.6.tar.gz
    http://mirrors.cnnic.cn/apache/zookeeper/zookeeper-3.4.6/zookeeper-3.4.6.tar.gz
    --hbase-0.94.17.tar.gz
    http://archive.apache.org/dist/hbase/hbase-0.94.17/
     
    hadoop    192.168.92.17 NameNode
    hadoop1 192.168.92.18 Datanode
    hadoop2 192.168.92.19 Datanode
     
    --通过浏览器进行查看hadoop等运行状态
    --namenode
    http://192.168.92.17:50070
    --dirctory
    http://192.168.92.17:50075
    --tracker_hadoop
    http://192.168.92.17:50060
    --Map/Reduce
    http://192.168.92.17:50030
    --hbase
    http://192.168.92.17:60010/
    --hive web interface
    http://192.168.92.17:9999/hwi/
     
    二、搭建流程
    useradd -d /home/hadoop -s /bin/bash -m hadoop
    passwd hadoop
     
    chkconfig iptables off
    service iptables stop
    vi /etc/sysconfig/selinux
    SELINUX=disabled
     
    --Master设置ntpdate服务
    chkconfig ntpd on
    service ntpd restart
    vi /etc/rc.local 加入以下内容 同时确保这slave节点上ntpd要关闭
     
    while [ 1 ]; do ntpdate hadoop 1>/dev/null 2>&1; sleep 2; done &
     
    --设置hadoop用户的Shell Limits,用root用户登录
    vi /etc/security/limits.conf 添加
    hadoop  -       nofile  32768
     
    --slave节点关闭ntpd
    service ntpd status
    chkconfig ntpd --list
    chkconfig ntpd off
    service ntpd stop
     
    vim /etc/hosts
     
    192.168.92.17   hadoop
    192.168.92.18   hadoop1
    192.168.92.19   hadoop2
     
    --将安装包拷贝到/home/hadoop目录
    cd /home/hadoop
    chown hadoop.hadoop *
    chmod 775 *
     
    su - hadoop
    --配置各节点间的hadoop用户的ssh公钥互信
    --namenode datanode
    --export HADOOP_HOME_WARN_SUPPRESS=1此配置是用来解决hadoop启动警告
    vim ~/.bash_profile
     
    JAVA_HOME=/home/hadoop/jdk1.7.0_55
    export PATH=$JAVA_HOME/bin:$PATH
    export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar
    HADOOP_HOME=/home/hadoop/hadoop-1.2.1
    PATH=$PATH:$JAVA_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
    export JAVA_HOME PATH HADOOP_HOME
    export HADOOP_HOME_WARN_SUPPRESS=1
     
    export HBASE_HOME=/home/hadoop/hbase-0.94.17
    export PATH=$PATH:$HBASE_HOME/bin
    ZK_HOME=/home/hadoop/zookeeper-3.4.6
    PATH=$ZK_HOME/bin:$PATH
    export PATH ZK_HOME
     
    source ~/.bash_profile
     
    mkdir ~/.ssh
    --需对.ssh目录赋予700权限
    chmod 700 ~/.ssh
    cd .ssh
    --3个节点执行
    ssh-keygen -t rsa
    --namenode执行
    cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
    --cat ~/.ssh/id_rsa.pub|ssh hadoop1 'sh -c "cat - >>~/.ssh/authorized_keys"'
    --cat ~/.ssh/id_rsa.pub|ssh hadoop2 'sh -c "cat - >>~/.ssh/authorized_keys"'
    ssh hadoop1 cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
    ssh hadoop2 cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
    chmod 644 ~/.ssh/authorized_keys
     
    --复制到 datanode
    scp ~/.ssh/authorized_keys root@node2:~/.ssh/authorized_keys
    scp ~/.ssh/authorized_keys root@node3:~/.ssh/authorized_keys
     
    --验证ssh
    ssh hadoop date
    ssh hadoop1 date
    ssh hadoop2 date
     
    --安装配置jdk 可以用jdk-6u45-linux-x64.bin 部署集群后可以更换jdk版本
    cd
    tar xvf jdk-7u55-linux-x64.tar.gz
    --./jdk-6u45-linux-x64.bin
    --复制到slave节点
    scp -r jdk1.7.0_55 hadoop@hadoop1:/home/hadoop
    scp -r jdk1.7.0_55 hadoop@hadoop2:/home/hadoop
     
    tar -xvf hadoop-1.2.1.tar.gz
    tar -xvf hbase-0.94.17.tar.gz
    tar -xvf zookeeper-3.4.6.tar.gz
     
    --安装hadoop
    vim hadoop-1.2.1/conf/hadoop-env.sh
     
    export JAVA_HOME=/home/hadoop/jdk1.7.0_55
     
    --配置主配置文件
    vim hadoop-1.2.1/conf/core-site.xml
     
    <?xml version="1.0"?>
    <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
    <!-- Put site-specific property overrides in this file. -->
    <configuration>
      <property>
        <name>fs.default.name</name>
        <value>hdfs://192.168.92.17:9000</value>
      </property>
    </configuration>
     
    --修改hdfs配置文件hadoop-1.2.1/conf/hdfs-site.xml
    --mkdir -p /home/hadoop/hadoop-1.2.1/data/dfs/name
    --mkdir -p /home/hadoop/hadoop-1.2.1/data/dfs/data
    --mkdir -p /home/hadoop/hadoop-1.2.1/data/dfs/namesecondary
     
    vim hadoop-1.2.1/conf/hdfs-site.xml
     
    <?xml version="1.0"?>
    <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
    <!-- Put site-specific property overrides in this file. -->
    <configuration>
      <property>
        <name>dfs.name.dir</name>
        <value>/home/hadoop/hadoop-1.2.1/data/dfs/name</value>
      </property>
      <property>
        <name>dfs.data.dir</name>
        <value>/home/hadoop/hadoop-1.2.1/data/dfs/data</value>
      </property>
      <property>
        <name>dfs.replication</name>
        <value>3</value>   <!-- 表示3个从服务器  -->
      </property>
    </configuration>
     
    --配置任务调度服务配置hadoop-1.2.1/conf/mapred-site.xml
    vim hadoop-1.2.1/conf/mapred-site.xml
     
    <?xml version="1.0"?>
    <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
    <!-- Put site-specific property overrides in this file. -->
    <configuration>
      <property>
        <name>mapred.job.tracker</name>
        <value>hdfs://192.168.92.17:9001</value> <!-- 配置到主服务器9001端口 -->
      </property>
    </configuration>
     
    --配置主服务器地址 hadoop-1.2.1/conf/masters
    vim hadoop-1.2.1/conf/masters
    hadoop
     
    --配置从服务器地址 应注释192.168.92.17
    vim hadoop-1.2.1/conf/slaves
    hadoop1
    hadoop2
       
    --格式化节点服务器 注意:如果执行过程出现ERROR信息必须解决后重新格式化
    hadoop namenode -format
     
    14/03/27 10:46:39 INFO namenode.NameNode: STARTUP_MSG:
    /************************************************************
    STARTUP_MSG: Starting NameNode
    STARTUP_MSG:   host = hadoop/192.168.92.17
    STARTUP_MSG:   args = [-format]
    STARTUP_MSG:   version = 1.2.1
    STARTUP_MSG:   build = https://svn.apache.org/repos/asf/hadoop/common/branches/branch-1.2 -r 1503152; compiled by 'mattf' on Mon Jul 22 15:23:09 PDT 2013
    STARTUP_MSG:   java = 1.6.0_45
    ************************************************************/ 
    14/03/27 10:46:39 INFO util.GSet: Computing capacity for map BlocksMap
    14/03/27 10:46:39 INFO util.GSet: VM type       = 64-bit
    14/03/27 10:46:39 INFO util.GSet: 2.0% max memory = 1013645312
    14/03/27 10:46:39 INFO util.GSet: capacity      = 2^21 = 2097152 entries
    14/03/27 10:46:39 INFO util.GSet: recommended=2097152, actual=2097152
    14/03/27 10:46:40 INFO namenode.FSNamesystem: fsOwner=hadoop
    14/03/27 10:46:40 INFO namenode.FSNamesystem: supergroup=supergroup
    14/03/27 10:46:40 INFO namenode.FSNamesystem: isPermissionEnabled=true
    14/03/27 10:46:40 INFO namenode.FSNamesystem: dfs.block.invalidate.limit=100
    14/03/27 10:46:40 INFO namenode.FSNamesystem: isAccessTokenEnabled=false accessKeyUpdateInterval=0 min(s), accessTokenLifetime=0 min(s)
    14/03/27 10:46:40 INFO namenode.FSEditLog: dfs.namenode.edits.toleration.length = 0
    14/03/27 10:46:40 INFO namenode.NameNode: Caching file names occuring more than 10 times
    14/03/27 10:46:41 INFO common.Storage: Image file /tmp/hadoop-hadoop/dfs/name/current/fsimage of size 114 bytes saved in 0 seconds.
    14/03/27 10:46:41 INFO namenode.FSEditLog: closing edit log: position=4, editlog=/tmp/hadoop-hadoop/dfs/name/current/edits
    14/03/27 10:46:41 INFO namenode.FSEditLog: close success: truncate to 4, editlog=/tmp/hadoop-hadoop/dfs/name/current/edits
    14/03/27 10:46:41 INFO common.Storage: Storage directory /tmp/hadoop-hadoop/dfs/name has been successfully formatted.
    14/03/27 10:46:41 INFO namenode.NameNode: SHUTDOWN_MSG:
    /************************************************************
    SHUTDOWN_MSG: Shutting down NameNode at hadoop/192.168.92.17
    ************************************************************/
     
    --拷贝hadoop-1.2.1到其他服务器
    scp -r hadoop-1.2.1 hadoop@hadoop1:~
    scp -r hadoop-1.2.1 hadoop@hadoop2:~
     
    --启动和停止集群
    start-all.sh
     
    starting namenode, logging to /home/hadoop/hadoop-1.2.1/libexec/../jpslogs/hadoop-hadoop-namenode-hadoop.out
    192.168.92.18: starting datanode, logging to /home/hadoop/hadoop-1.2.1/libexec/../logs/hadoop-hadoop-datanode-hadoop1.out
    192.168.92.19: starting datanode, logging to /home/hadoop/hadoop-1.2.1/libexec/../logs/hadoop-hadoop-datanode-hadoop2.out
    192.168.92.17: starting datanode, logging to /home/hadoop/hadoop-1.2.1/libexec/../logs/hadoop-hadoop-datanode-hadoop.out
    192.168.92.17: starting secondarynamenode, logging to /home/hadoop/hadoop-1.2.1/libexec/../logs/hadoop-hadoop-secondarynamenode-hadoop.out
    starting jobtracker, logging to /home/hadoop/hadoop-1.2.1/libexec/../logs/hadoop-hadoop-jobtracker-hadoop.out
    192.168.92.18: starting tasktracker, logging to /home/hadoop/hadoop-1.2.1/libexec/../logs/hadoop-hadoop-tasktracker-hadoop1.out
    192.168.92.19: starting tasktracker, logging to /home/hadoop/hadoop-1.2.1/libexec/../logs/hadoop-hadoop-tasktracker-hadoop2.out
    192.168.92.17: starting tasktracker, logging to /home/hadoop/hadoop-1.2.1/libexec/../logs/hadoop-hadoop-tasktracker-hadoop.out
     
    jps
    3749 DataNode
    3629 NameNode
    3972 JobTracker
    4102 TaskTracker
    4149 Jps
    3872 SecondaryNameNode
     
    jps
    3690 Jps
    3607 TaskTracker
    3502 DataNode
     
    --在主服务器上输入stop-all.sh就可以关闭整个集群
    stop-all.sh
    stopping jobtracker
    192.168.92.18: stopping tasktracker
    192.168.92.17: stopping tasktracker
    192.168.92.19: stopping tasktracker
    stopping namenode
    192.168.92.18: stopping datanode
    192.168.92.17: stopping datanode
    192.168.92.19: stopping datanode
    192.168.92.17: stopping secondarynamenode
     
    --如果某些地址看不了,应该是windows下的hosts文件没有配置主机IP和主机名映射导致的,比如windows7下,就修改C:WindowsSystem32driversetchosts这个文件,加入主机名和IP映射
    192.168.92.17   hadoop1
    192.168.92.18   hadoop2
    192.168.92.19   hadoop3
     
    ---------------------------------------------------------------------
    HBase在Hadoop集群下搭建过程:
    在安装HBase集群前,必须先安装zookeeper。
    ZooKeeper是Hadoop的正式子项目,它是一个针对大型分布式系统的可靠协调系统,提供的功能包括:配置维护、名字服务、分布式同步、组服务等。ZooKeeper的目标就是封装好复杂易出错的关键服务,将简单易用的接口和性能高效、功能稳定的系统提供给用户。Zookeeper是Google的Chubby一个开源的实现,是高有效和可靠的协同工作系统,Zookeeper能够用来leader选举,配置信息维护等,在一个分布式的环境中,需要一个Master实例或存储一些配置信息,确保文件写入的一致性等.ZooKeeper是一个分布式的,开放源码的分布式应用程序协调服务,包含一个简单的原语集,是Hadoop和Hbase的重要组件。HBase需要Zookeeper来协调HBase集群,Zookeeper Quorum中除了存储了 HBase的-ROOT-表的地址和HMaster的地址,HRegionServer也会把自己以Ephemeral方式注册到Zookeeper中,使得 HMaster可以随时感知到各个HRegionServer的健康状态。此外,Zookeeper也避免了HBase中HMaster的单点问题。
    一、安装zookeeper
    cd /home/hadoop/zookeeper-3.4.6/conf/
    cp zoo_sample.cfg zoo.cfg
    vim zoo.cfg
     
    #修改此目录
    dataDir=/home/hadoop/zookeeper-3.4.6/data
    #添加如下内容 
    server.1=192.168.92.17:2887:3887
    server.2=192.168.92.18:2888:3888
    server.3=192.168.92.19:2889:3889
     
    输入服务器编号myid,分别为1 2 3
    cd ..
    mkdir data
    cd data
    vim myid
    1
     
    --拷贝数据到其他服务器
    cd ~
    scp -r zookeeper-3.4.6 hadoop@hadoop1:~
    scp -r zookeeper-3.4.6 hadoop@hadoop2:~
    --复制完成,以hadoop用户登录到其他服务器,修改zookeeper-3.4.6/data/myid 文件的内容,hadoop1服务器就改成2,hadoop2服务器就改成3
    vim zookeeper-3.4.6/data/myid
    2
    vim zookeeper-3.4.6/data/myid
    3
     
    --启动验证
    在从hadoop2、hadoop1,hadoop,依次执行zkServer.sh start 来启动 zookeeper,所有服务器启动完成后,就可以通过zkServer.sh status来查看服务器状态。没有报错说明都正常了。输入jps可以看到服务器中多了一个QuorumPeerMain服务。
    zkServer.sh start
    zkServer.sh start
    zkServer.sh start
     
    JMX enabled by default
    Using config: /home/hadoop/zookeeper-3.4.6/bin/../conf/zoo.cfg
    Starting zookeeper ... STARTED
     
    zkServer.sh status
     
    JMX enabled by default
    Using config: /home/hadoop/zookeeper-3.4.6/bin/../conf/zoo.cfg
    Mode: follower
     
    jps
    4870 SecondaryNameNode
    4625 NameNode
    4746 DataNode
    5102 TaskTracker
    6377 Jps
    5744 QuorumPeerMain
    4971 JobTracker
     
    二、 安装hbase
    --配置数据目录、集群模式、Zookeeper服务器地址
    --hbase.rootdir与hadoop的core-site.xml的fs.default.name配置一样:hdfs://hadoop:9000/hbase
    vim hbase-0.94.17/conf/hbase-site.xml
     
    <?xml version="1.0"?>
    <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
    <configuration>
      <property>
        <name>hbase.rootdir</name>
        <value>hdfs://hadoop:9000/hbase</value>
        <description>区域服务器使用存储HBase数据库数据的目录,服务器名称不能填IP,不然会报错</description>
      </property>
      <property>
         <name>hbase.cluster.distributed</name>
         <value>true</value>
         <description>指定HBase运行的模式:false: 单机模式或者为分布式模式true: 全分布模式 </description>
      </property>
      <property>
        <name>hbase.zookeeper.quorum</name>
        <value>hadoop,hadoop1,hadoop2</value>
        <description>ZooKeeper集群服务器的位置</description>
      </property>
    </configuration>
     
    --配置数据服务器地址
    vim hbase-0.94.17/conf/regionservers
    hadoop
    hadoop1
    hadoop2
     
    --配置HBase中Zeekeeper使用方式
    --在hbase-0.94.17/conf/hbase-env.sh文件最尾部,打开注释 export HBASE_MANAGES_ZK=false,修改true为false。意思是使用外部的Zeekeeper
    vim hbase-0.94.17/conf/hbase-env.sh
     
    export HBASE_MANAGES_ZK=false
    export JAVA_HOME=/home/hadoop/jdk1.7.0_55
     
    --复制HBase目录到其他服务器
    --在hadoop上以hadoop用户,使用以下命令进行复制:
    scp -r hbase-0.94.17 hadoop@hadoop1:~
    scp -r hbase-0.94.17 hadoop@hadoop2:~
     
    --启动,验证
    拷贝完成后就可以输入:start-hbase.sh启动HBase集群了;启动完成后,hadoop上使用jps命令可以看到多了一个HMaster服务,在子节点输入jps可以看到多了一个HRegionServer服务; 登录HBase可以使用hbase shell命令登录HBase,输入status查看当前状态。输入exit退出HBase服务。
    start-hbase.sh
     
    starting master, logging to /home/hadoop/hbase-0.94.17/logs/hbase-hadoop-master-hadoop.out
    192.168.92.18: starting regionserver, logging to /home/hadoop/hbase-0.94.17/bin/../logs/hbase-hadoop-regionserver-hadoop1.out
    192.168.92.19: starting regionserver, logging to /home/hadoop/hbase-0.94.17/bin/../logs/hbase-hadoop-regionserver-hadoop2.out
    192.168.92.17: starting regionserver, logging to /home/hadoop/hbase-0.94.17/bin/../logs/hbase-hadoop-regionserver-hadoop.out
     
    jps
     
    4870 SecondaryNameNode
    4625 NameNode
    6013 HMaster
    4746 DataNode
    5102 TaskTracker
    6377 Jps
    5744 QuorumPeerMain
    4971 JobTracker
    6171 HRegionServer
     
    hbase shell
     
    HBase Shell; enter 'help<RETURN>' for list of supported commands.
    Type "exit<RETURN>" to leave the HBase Shell
    Version 0.94.17, r1569509, Tue Feb 18 22:25:31 UTC 2014
     
    status
     
    3 servers, 0 dead, 0.6667 average load
     
    --退出hbase
    exit
     
    --通过浏览器查看:在浏览器中输入
    --Master: hadoop
    http://192.168.92.17:60010
     
    --启动集群
    start-all.sh
    --所有节点执行zkServer.sh start
    zkServer.sh start
    --查询zkServer进程状态
    zkServer.sh status
    start-hbase.sh star
    --停止集群
    stop-hbase.sh star
    zkServer.sh stop
    stop-all.sh
     
    ---------------------------------------------
    --hive客户端script操作的网页页面 对应服务为RunJar
    http://192.168.92.17:9999/hwi
    --整合 hive-0.9.0 hbase-0.94.17
    tar -xzvf hive-0.9.0.tar.gz
    mkdir hive-config
    cd conf
    cp hive-env.sh.template hive-env.sh
    cp hive-default.xml.template hive-default.xml
    cp hive-default.xml.template hive-site.xml
    cp hive-exec-log4j.properties.template hive-exec-log4j.properties.xml
    cp hive-log4j.properties.template hive-log4j.properties
     
    --在hive-log4j.properties中将log4j.appender.EventCounter的值修改为org.apache.hadoop.log.metrics.EventCounter,这样就不会报WARNING: org.apache.hadoop.metrics.jvm.EventCounter is deprecated. Please use org.apache.hadoop.log.metrics.EventCounter in all the log4j.properties files.的警告了。
    vi hive-log4j.properties 修改如下
     
    log4j.appender.EventCounter=org.apache.hadoop.log.metrics.EventCounter
     
    --修改用户的环境变量,加入以下内容。
    vi  ~/.bash_profile
     
    export HIVE_HOME=/home/hadoop/hive-0.9.0
    export PATH=$PATH:$HIVE_HOME/bin
     
    source ~/.bash_profile
     
    --退出hadoop用户重新登录,执行hive命令
    hive
     
    Logging initialized using configuration in file:/hadoop/hive-config/hive-log4j.properties
    Hive history file=/tmp/hadoop/hive_job_log_hadoop_201209171124_1209357583.txt
     
    hive> show tables;
    OK
    Time taken: 4.222 seconds
     
    --式例 hbase测试
    hadoop fs -lsr /
    --表名test 键值名rowkey 字段属性info
    create 'test','rowkey','info';
    list
    put 'test','rowkey1','info:name','zhangsan'
    put 'test','rowkey2','info:address',''
    scan 'test'
    --更新键值rowkey2对应的记录
    put 'test','rowkey2','info:address','shanghai'
    --查询'test'表中的数据
    scan 'test'
    exit
     
    --可以配置hive页面等
    vi hive-0.9.0/conf/hive-env.sh 添加
    export HIVE_HOME=/home/hadoop/hive-0.9.0
    export PATH=$HIVE_HOME/bin:$PATH
    JAVA_HOME=/home/hadoop/jdk1.7.0_55
    export CLASSPATH=.:$JAVA_HOME/lib:$JRE_HOME/lib:$CLASSPATH
    export PATH=.:$JAVA_HOME/bin:$JRE_HOME/bin:$PATH
     
    --编辑hive-site.xml添加
    --hive-hwi-0.9.0.war为hive的页面对应的包
    --无,file:///hadoop/hbase/lib/protobuf-java-2.4.0a.jar
    --自动创建/home/hadoop/hive-0.9.0/logs
    vi hive-site.xml
     
    <property>
       <name>hbase.zookeeper.quorum</name>
       <value>hadoop,hadoop1,hadoop2</value>
    </property>
    <property>
      <name>hive.aux.jars.path</name>
      <value>file:///hadoop/hive/lib/hive-hbase-handler-0.9.0.jar,file:///hadoop/hive/lib/zookeeper-3.4.3.jar,file:///hadoop/hive/lib/hbase-0.92.0.jar</value>
    </property>
    <property>
      <name>hive.querylog.location</name>
      <value>/home/hadoop/hive-0.9.0/logs</value>
    </property>
     
    --当多用户登录时需安装mysql修改如下参数
    --avax.jdo.option.ConnectionURL(当出现字符集问题是在此指定字符集)
    <property>
      <name>javax.jdo.option.ConnectionURL</name>
       <value>jdbc:mysql://hadoop:3306/hive</value>
    </property>
    <property>
      <name>javax.jdo.option.ConnectionDriverName</name>
       <value>com.mysql.jdbc.Driver</value>
    </property>
    <property>
      <name>javax.jdo.option.ConnectionUserName</name>
      <value>hive</value>
    </property>
    <property>
      <name>javax.jdo.option.ConnectionPassword</name>
      <value>oracle</value>
    </property>
     
    --启动hive的web界面 如何停止???
    sh $HIVE_HOME/bin/hive --service hwi &
    --hiv连接habse测试
    hive
    --外部表不能load data数据
    CREATE TABLE hbase_table_1(key int, value string) STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,cf1:val") TBLPROPERTIES ("hbase.table.name" = "xyz");
    select * from hbase_table_1;
    --查询表结构
    desc hbase_table_1;
    quit;
    --转到hbase
    --查询与hive的表hbase_table_1对应的hbae中的表xyz
    hbase shell
    list
    scan 'xyz'
     
    --创建本地表
    --准备测试数据
    cd /home/hadoop/data_hive
    vi test.txt
    1       'a1'
    2       'a2'
     
    CREATE TABLE hbase_table_2(key int, value string) ROW FORMAT DELIMITED fields TERMINATED BY ' ' STORED AS TEXTFILE;
    --load data到表hbase_table_2
    load data local inpath '/home/hadoop/data_hive/test.txt' into table hbase_table_2;
     
    --注意有可能jar包版本不一致需要在hbase和hadoop拷贝对应版本
    --mysql安装
    rpm -ivh mysql-community-common-5.7.18-1.el5.x86_64.rpm --nodeps --force
    rpm -ivh mysql-community-libs-5.7.18-1.el5.x86_64.rpm
    rpm -ivh mysql-community-client-5.7.18-1.el5.x86_64.rpm
    rpm -ivh mysql-community-server-5.7.18-1.el5.x86_64.rpm
     
    --mysql初始化 默认路径和指定路径2个库都进行文件初始化 默认路径初始化要求的密码复杂度高
    mkdir -pv /usr/local/mysql/data
     
    --初始化过程无任何提示
    mysqld --initialize --user=mysql --datadir=/usr/local/mysql/data --innodb_undo_tablespaces=3 --explicit_defaults_for_timestamp
     
    --------------------------
    vi /etc/my.cnf 修改为
     
    --注释datadir
    #datadir=/var/lib/mysql
     
    --增加以下
    datadir=/usr/local/mysql/dat
     
    [mysql.server]
    user=mysql
    basedir=/usr/local/mysql
     
    [client]
    /var/lib/mysql/mysql.sock
     
    --------------------------
    --开启mysqld服务 数据文件路径设置问题
    service mysqld start
    --端口是否打开
    lsof -i:3306
    --mysqld服务是否正在运行
    service mysqld status
     
    --查询mysql初始化密码(自定义路径的库) password:t-HsdeRt?8d0
    grep 'temporary password' /var/log/mysqld.log
    --cat /root/.mysql_secret
     
    --设置密码 进入mysql
    mysql -uroot -p
    --输入上一步查询出的初始化密码
    SET PASSWORD = PASSWORD('oracle');
    --或
    alter user root@localhost identified by 'oracle';
    flush privileges;
     
    --mysql创建数据库hive
    created database hive;
    --创建用户
    grant all on *.* to hadoop@'%' identified by 'oracle';
    --grant ALL ON *.* to 'hadoop'@'192.168.0.0/255.255.0.0' identified by 'oracle';
    flush privileges;
    --创建测试数据库 指定字符集和编码
    show databases;
    --创建hive数据库
    create database hive;
     
    ---------------------------------------------------------------------
    --整合mysql到hive后 load data报错 但数据可以load成功
    load data local inpath '/home/hadoop/data_hive/test.txt' into table test;
    Copying data from file:/home/hadoop/data_hive/test.txt
    Copying file: file:/home/hadoop/data_hive/test.txt
    Loading data to table default.test
    Failed with exception Iteration request failed : SELECT `A0`.`BUCKET_COL_NAME`,`A0`.`INTEGER_IDX` AS NUCORDER0 FROM `BUCKETING_COLS` `A0` WHERE `A0`.`SD_ID` = ? AND `A0`.`INTEGER_IDX` >= 0 ORDER BY NUCORDER0
    FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.MoveTask
     
    --去掉hadoop的安全模式
    --确认hbase中conf配置文件hbase.site.xml中的hbase.rootdir值: hdfs://master:54310/hbase 与 hadoop中配置文件core-site.xml中的fs.default.name值:hdfs://master:54310/hbase一样
    --重新执行Hadoop和Hbase进程时,要kill掉当前的HBase,Hadoop进程
    hadoop dfsadmin -safemode leave
     
    常见错误
    error 1:
    ---------------------
    hive> show tables;
    FAILED: Error in metadata: javax.jdo.JDOFatalInternalException: Error creating transactional connection factory
     
    Solution:
    --下载mysql-connector-java-5.1.18.tar.gz Hive不带mysql JDBC驱动
    --wget http://dev.mysql.com/get/Downloads/Connector-J/mysql-connector-java-5.1.18.tar.gz/from/http://mysql.mirror.kangaroot.net/
    tar zxf mysql-connector-java-5.1.18.tar.gz
    cd mysql-connector-java-5.1.18
    cp mysql-connector*.jar $HIVE_HOME/lib
     
    error 2:
    ----------------------
    hive> show tables;
    FAILED: Error in metadata: javax.jdo.JDOException: Couldnt obtain a new sequence (unique id) : Cannot execute statement: impossible to write to binary log since BINLOG_FORMAT = STATEMENT and at least one table uses a storage engine limited to row-based logging. InnoDB is limited to row-logging when transaction isolation level is READ COMMITTED or READ UNCOMMITTED.
     
    Solution:
    在mysql中设置 binlog_format='MIXED'
    mysql> SET SESSION binlog_format = 'MIXED';
     
  • 相关阅读:
    软件开发人员的简历项目经验怎么写?
    mapreduce 多种输入
    lnmp如何实现伪静态,默认目录伪静态
    LNMP环境中WordPress程序伪静态解决方案
    wordpress必装的插件 wp最常用的十个插件
    debian系统下改语言设置
    Centos7 开启端口
    EventProcessor与WorkPool用法--可处理多消费者
    Disruptor入门
    Disruptor初级入门
  • 原文地址:https://www.cnblogs.com/buffercache/p/14238226.html
Copyright © 2011-2022 走看看