zoukankan      html  css  js  c++  java
  • 配置Hive元数据数据库为PostgreSQL

    配置Hive元数据数据库为PostgreSQL
    前提环境,在Linux服务器安装hadoop集群
    安装hive
    到hive官网下载hive安装包
    解压及重命名
    tar zxvf apache-hive-2.3.6-bin.tar.gz
    mv apache-hive-2.3.6-bin hive
    /etc/profile环境变量配置
    vim /etc/profile
    export JAVA_HOME=/usr/local/jdk1.8.0_261
    PATH=$PATH:$JAVA_HOME/bin
    export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar
    export ZOOKEEPER_HOME=/usr/local/zookeeper-3.4.5
    export PATH=$ZOOKEEPER_HOME/bin:$PATH
    export HADOOP_HOME=/usr/local/hadoop-2.7.3
    export JAVA_LIBRARY_PATH=$HADOOP_HOME/lib/native
    export HIVE_HOME=/usr/local/hive
    export HIVE_CONF_DIR=$HIVE_HOME/conf
    export PATH=$PATH:$JAVA_HOME/bin:$ZOOKEEPER_HOME/bin:$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin

    source /etc/profile
    hive-site.xml配置文件
    /usr/local/hive/conf
    [root@DM03 conf]# cat hive-site.xml
    <?xml version="1.0" encoding="UTF-8" standalone="no"?>
    <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
    <configuration>
    <property>
    <name>hive.exec.scratchdir</name>
    <value>/tmp/hive2</value> #HDFS DIR
    </property>
    <property>
    <name>hive.metastore.warehouse.dir</name>
    <value>/user/hive2/warehouse</value> #HDFS DIR
    </property>
    <property>
    <name>hive.querylog.location</name>
    <value>/usr/local/hive/log</value> #LOCAL DIR
    </property>
    <property>
    <name>hive.metastore.local</name>
    <value>true</value>
    </property>
    <property>
    <name>javax.jdo.option.ConnectionURL</name>
    <value>jdbc:postgresql://172.25.5.40:5432/metastore_db</value>
    <description>JDBC connect string for a JDBC metastore</description>
    </property>
    <property>
    <name>javax.jdo.option.ConnectionDriverName</name>
    <value>org.postgresql.Driver</value>
    <description>Driver class name for a JDBC metastore</description>
    </property>
    <property>
    <name>javax.jdo.option.ConnectionUserName</name>
    <value>hive_user</value>
    <description>Username to use against metastore database</description>
    </property>
    <property>
    <name>javax.jdo.option.ConnectionPassword</name>
    <value>hive_user</value>
    <description>password to use against metastore database</description>
    </property>

    </configuration>
    创建HDFS目录
    <property>
    <name>hive.exec.scratchdir</name>
    <value>/tmp/hive2</value> #HDFS DIR
    </property>
    <property>
    <name>hive.metastore.warehouse.dir</name>
    <value>/user/hive2/warehouse</value> #HDFS DIR
    </property>
    hdfs dfs -mkdir -p /tmp/hive2
    hdfs dfs -chmod -R 777 /tmp/hive2
    hdfs dfs -mkdir -p /user/hive2/warehouse
    hdfs dfs -chmod -R 777 /user/hive2/warehouse
    创建本地临时目录
    cd /usr/local/hive/
    mkdir tmp
    chmod -R 777 tmp/
    hive 环境变量配置
    hive-env.sh
    export HADOOP_HOME=/usr/local/hadoop-2.7.3
    export HIVE_CONF_DIR=/usr/local/hive/conf
    export HIVE_AUX_JARS_PATH=/usr/local/hive/lib

    1.在Postgres中为元数据增加用户及数据库
    首先在PostgreSQL中为HIVE的元数据建立帐号和DB
    --以管理员身份登入PG:
    psql postgres -U postgres
    --创建用户hive_user:
    create user hive_user;
    --创建DB metastore_db,owner为hive_user:
    create database metastore_db with owner=hive_user;
    --设置hive_user的密码:
    password hive_user
    PostgreSQL的pg_hba.conf中的配置允许HIVE所在的机器ip可以访问PG
    /usr/local/pgsql/data/pg_hba.conf
    host all all 0.0.0.0/0 trust
    下载Postgresql的jdbc驱动
    https://jdbc.postgresql.org/download.html
    将下载的postgreqljdbc驱动放到hive的lib目录中
    [root@DM03 hive]# pwd
    /usr/local/hive
    [root@DM03 hive]# ll lib/postgresql-42.2.2.jar
    初始化Hive元数据
    [root@DM03 bin]# ./schematool -initSchema -dbType postgres
    SLF4J: Class path contains multiple SLF4J bindings.
    SLF4J: Found binding in [jar:file:/usr/local/hive/lib/log4j-slf4j-impl-2.6.2.jar!/org/slf4j/impl/StaticLoggerBinder.class]
    SLF4J: Found binding in [jar:file:/usr/local/hadoop-2.7.3/share/hadoop/common/lib/slf4j-log4j12-1.7.10.jar!/org/slf4j/impl/StaticLoggerBinder.class]
    SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
    SLF4J: Actual binding is of type [org.apache.logging.slf4j.Log4jLoggerFactory]
    Metastore connection URL: jdbc:postgresql://172.25.5.40:5432/metastore_db
    Metastore Connection Driver : org.postgresql.Driver
    Metastore connection User: hive_user
    Starting metastore schema initialization to 2.3.0
    Initialization script hive-schema-2.3.0.postgres.sql
    Initialization script completed
    schemaTool completed
    查看初始化的Hive元数据
    [root@DM04 local]# psql -h 172.25.5.40 -Uhive_user -p 5432 -W -d metastore_db
    Password for user hive_user:
    psql (9.2.24, server 11.8)
    WARNING: psql version 9.2, server version 11.0.
    Some psql features might not work.
    Type "help" for help.

    metastore_db=> d
    List of relations
    Schema | Name | Type | Owner
    --------+---------------------------+----------+-----------
    public | BUCKETING_COLS | table | hive_user
    public | CDS | table | hive_user
    public | COLUMNS_V2 | table | hive_user
    public | DATABASE_PARAMS | table | hive_user
    public | DBS | table | hive_user
    public | DB_PRIVS | table | hive_user
    public | DELEGATION_TOKENS | table | hive_user
    public | FUNCS | table | hive_user
    public | FUNC_RU | table | hive_user
    public | GLOBAL_PRIVS | table | hive_user
    public | IDXS | table | hive_user
    public | INDEX_PARAMS | table | hive_user
    public | KEY_CONSTRAINTS | table | hive_user
    public | MASTER_KEYS | table | hive_user
    public | MASTER_KEYS_KEY_ID_seq | sequence | hive_user
    public | NOTIFICATION_LOG | table | hive_user
    public | NOTIFICATION_SEQUENCE | table | hive_user
    public | NUCLEUS_TABLES | table | hive_user
    public | PARTITIONS | table | hive_user
    public | PARTITION_EVENTS | table | hive_user
    public | PARTITION_KEYS | table | hive_user
    public | PARTITION_KEY_VALS | table | hive_user
    public | PARTITION_PARAMS | table | hive_user
    public | PART_COL_PRIVS | table | hive_user
    public | PART_COL_STATS | table | hive_user
    public | PART_PRIVS | table | hive_user
    public | ROLES | table | hive_user
    public | ROLE_MAP | table | hive_user
    public | SDS | table | hive_user
    public | SD_PARAMS | table | hive_user
    public | SEQUENCE_TABLE | table | hive_user
    public | SERDES | table | hive_user
    public | SERDE_PARAMS | table | hive_user
    public | SKEWED_COL_NAMES | table | hive_user
    public | SKEWED_COL_VALUE_LOC_MAP | table | hive_user
    public | SKEWED_STRING_LIST | table | hive_user
    public | SKEWED_STRING_LIST_VALUES | table | hive_user
    public | SKEWED_VALUES | table | hive_user
    public | SORT_COLS | table | hive_user
    public | TABLE_PARAMS | table | hive_user
    public | TAB_COL_STATS | table | hive_user
    public | TBLS | table | hive_user
    public | TBL_COL_PRIVS | table | hive_user
    public | TBL_PRIVS | table | hive_user
    public | TYPES | table | hive_user
    public | TYPE_FIELDS | table | hive_user
    public | VERSION | table | hive_user
    public | aux_table | table | hive_user
    public | compaction_queue | table | hive_user
    public | completed_compactions | table | hive_user
    public | completed_txn_components | table | hive_user
    public | hive_locks | table | hive_user
    public | next_compaction_queue_id | table | hive_user
    public | next_lock_id | table | hive_user
    public | next_txn_id | table | hive_user
    public | txn_components | table | hive_user
    public | txns | table | hive_user
    public | write_set | table | hive_user
    (58 rows)
    [root@DM03 bin]# ./hive
    which: no hbase in (/root/perl5/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/usr/local/jdk1.8.0_261/bin:/usr/local/hadoop-2.7.3/bin:/usr/local/hadoop-2.7.3/sbin:/usr/local/spark-2.4.3-bin-hadoop2.7/bin:/data1/db/python37/bin:/usr/local/hive/bin:/root/bin)
    SLF4J: Class path contains multiple SLF4J bindings.
    SLF4J: Found binding in [jar:file:/usr/local/hive/lib/log4j-slf4j-impl-2.6.2.jar!/org/slf4j/impl/StaticLoggerBinder.class]
    SLF4J: Found binding in [jar:file:/usr/local/hadoop-2.7.3/share/hadoop/common/lib/slf4j-log4j12-1.7.10.jar!/org/slf4j/impl/StaticLoggerBinder.class]
    SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
    SLF4J: Actual binding is of type [org.apache.logging.slf4j.Log4jLoggerFactory]

    Logging initialized using configuration in file:/usr/local/hive/conf/hive-log4j2.properties Async: true
    Hive-on-MR is deprecated in Hive 2 and may not be available in the future versions. Consider using a different execution engine (i.e. spark, tez) or using Hive 1.X releases.
    hive> show databases;
    OK
    default
    Time taken: 7.171 seconds, Fetched: 1 row(s)
    hive> create database testdb;
    OK
    Time taken: 0.366 seconds
    hive> use testdb;
    OK
    Time taken: 0.044 seconds
    hive>
    hive> create table user_sample
    > (
    > user_num bigint,
    > user_name string,
    > user_gender string,
    > user_age int
    > ) row format delimited fields terminated by ',';
    OK
    Time taken: 0.985 seconds

  • 相关阅读:
    UI/UE对个性化推荐的影响
    毫秒转换为天、小时、分、秒
    查生字
    探秘推荐引擎之协同过滤算法小综述
    给文献添加上标
    雅可比迭代和高斯赛德尔迭代
    广义二项式定理求解系数
    关于最大流的EdmondsKarp算法详解
    海量数据处理利器之布隆过滤器
    [leetcode] Path sum路径之和
  • 原文地址:https://www.cnblogs.com/songyuejie/p/14252319.html
Copyright © 2011-2022 走看看