zoukankan      html  css  js  c++  java
  • Apache Hive 建表操作的简单描述

    客户端连接hive
    [root@bigdata-02 bin]# ./beeline 
    Beeline version 1.2.1 by Apache Hive
    beeline> ! connect jdbc:hive2://bigdata-01:10000
    Connecting to jdbc:hive2://bigdata-01:10000
    Enter username for jdbc:hive2://bigdata-01:10000: root
    Enter password for jdbc:hive2://bigdata-01:10000: ******
    Connected to: Apache Hive (version 1.2.1)
    Driver: Hive JDBC (version 1.2.1)
    Transaction isolation: TRANSACTION_REPEAtable_READ
    0: jdbc:hive2://bigdata-01:10000>
    
    create database hive_test;
    show databases;
    use hive_test;
    
    创建表
    create table t_a1(id int,name string) row format delimited fields terminated by ',';
    加载数据  如果在本地加local  如果不在本地 不加local   load data只针对内部表
    load data local inpath '/root/1.txt' into table t_a1
    
    hadoop fs -put 1.txt /user/hive/warehouse/hive_test.db/t_a1  
    
    1.txt
    1,张学友
    2,刘德华
    3,黎明
    4,郭富城
    
    0: jdbc:hive2://bigdata-01:10000> select * from t_a1;
    +----------+------------+--+
    | t_a1.id  | t_a1.name  |
    +----------+------------+--+
    | 1        | 张学友        |
    | 2        | 刘德华        |
    | 3        | 黎明         |
    | 4        | 郭富城        |
    +----------+------------+--+
    4 rows selected (1.358 seconds)
    
    
    
    //创建外部表
    create external table t_a2(id int,name string) row format delimited fields terminated by ',' location '/test/';
    
    
    hadoop fs -mkdir /test
    hadoop fs -put 1.txt /test
    
    0: jdbc:hive2://bigdata-01:10000> select * from t_a2;
    +----------+------------+--+
    | t_a2.id  | t_a2.name  |
    +----------+------------+--+
    | 1        | 张学友        |
    | 2        | 刘德华        |
    | 3        | 黎明         |
    | 4        | 郭富城        |
    +----------+------------+--+
    4 rows selected (0.638 seconds)
    
    
    
    区别 
    内部表的数据文件必须放到 指定的位置
    外部表的数据文件 可以自己指定位置
    外部表 drop table t_a2 后 数据文件依然存在 内部表 直接连表带数据文件一起删除
    
    
    //分区表
    create table t_user(id int,name string,area string) partitioned by(region string) row format delimited fields terminated by ',';
    //加载数据
     load data local inpath '/root/beijing.txt' into table t_user partition(region='beijing');
     load data local inpath '/root/shanghai.txt' into table t_user partition(region='shanghai');
    
    0: jdbc:hive2://bigdata-01:10000> select * from t_user;
    +----------+------------+------------+--------------+--+
    | t_user.id  | t_user.name  | t_user.area  | t_user.region  |
    +----------+------------+------------+--------------+--+
    | 1        | 张学友        | 北京         | beijing      |
    | 2        | 刘德华        | 北京         | beijing      |
    | 3        | 黎明         | 北京         | beijing      |
    | 4        | 郭富城        | 北京         | beijing      |
    | 5        | 诸葛亮        | 上海         | shanghai     |
    | 6        | 司马懿        | 上海         | shanghai     |
    | 7        | 周瑜         | 上海         | shanghai     |
    +----------+------------+------------+--------------+--+
    7 rows selected (0.445 seconds)
     
     
     
     
    //多分区 
    create table day_hour_table (id int, content string) partitioned by (dt string, hour string);
    load data local inpath '/root/900101_08.txt' into table day_hour_table PARTITION(dt='1990-01-01', hour='08');
    
    
    
    //分桶表
    开启分桶功能:set hive.enforce.bucketing = true;
    设置reduce个数等于分桶的个数:set mapreduce.job.reduces=4;
    创建表
    create table stu_buck(Sno int,Sname string,Sex string,Sage int,Sdept string) clustered by(Sno) into 4 buckets row format delimited fields terminated by ',';
    加载方式:
    1,首先创建一个普通的过渡中间表 把对应的文件映射上去
    create table student(Sno int,Sname string,Sex string,Sage int,Sdept string) row format delimited fields terminated by ',';
    hadoop fs -put students.txt /user/hive/warehouse/hive_test.db/student
    2,真正映射分桶表(insert+select)
    insert overwrite table stu_buck select * from student cluster by(Sno);
    
    
    测试的时候可以设置本地模式
    set hive.exec.mode.local.auto=true;
  • 相关阅读:
    Eclipse关于怎么调出web project
    RMI(远程方法调用)入门
    XSS攻击
    微信小程序
    java——IO流01
    06 python下
    06python上
    05python下
    05python上
    Nginx & Apache_网站用户认证
  • 原文地址:https://www.cnblogs.com/blazeZzz/p/9797873.html
Copyright © 2011-2022 走看看