Hive QL - 走看看

zoukankan html css js c++ java

Hive QL

将查询结果写入到本地指定目录(也可以写入hdfs)

insert overwrite local directory '/root/tmpdata/queryout_20180607' select * from gamedw.t_name where id=1

一个源可以同时插入到多个目标表或目标文件，多目标insert可以用一句话来完成

FROM src
INSERT OVERWRITE TABLE test1 SELECT src.* WHERE sex=1

INSERT OVERWRITE TABLE test3 PARTITION(city='shenzhen') SELECT src.name WHERE src.city='sz'

INSERT OVERWRITE LOCAL DIRECTORY '/tmp/test SELECT src.* WHERE id>100;

//删除一个非空库，hive不允许删除一个包含表的库，需要删除表，再删除库，加上CASCADE就会在删除数据库前先删除表。

hive> drop database if exists test20180917 CASCADE;
OK
Time taken: 0.003 seconds

//创建一个表而不拷贝数据

hive> create table if not exists cust_1 like cust;
OK
Time taken: 0.286 seconds

hive> show tables in sqoopdb;
OK
tab_name
role
t_name
Time taken: 0.112 seconds, Fetched: 2 row(s)

//如果分区和分区的个数都非常大的话，执行一个包含所有分区的查询就会生成一个巨大的mapreduce。使用严格模式,没加分区过滤就会禁止提交任务。

hive> show partitions customers;
OK
partition
city=guangzhen
city=kaifeng
city=longhua
city=nanjing
city=qingdao
city=shenzhen
city=wuhan
Time taken: 0.254 seconds, Fetched: 7 row(s)

hive> set hive.mapred.mode=strict;

hive> select * from customers;
FAILED: SemanticException Queries against partitioned tables without a partition filter are disabled for safety reasons. If you know what you are doing, please make sure that hive.strict.checks.large.query is set to false and that hive.mapred.mode is not set to 'strict' to enable them. No partition predicate for Alias "customers" Table "customers"

hive> select * from customers where city='shenzhen';
OK
customers.custname      customers.sex   customers.age   customers.city
tianyt_touch100 1       50      shenzhen
wangwu 1       85      shenzhen
zhangsan        1       20      shenzhen
liuqin 0       56      shenzhen
wangwu 0       47      shenzhen
liuyang 1       32      shenzhen

load data local inpath '.......' into table tablename partition(.......)

//允许分区都是动态的

hive> set hive.exec.dynamic.partition.mode=nonstrict;

//开启动态分区功能
hive> set hive.exec.dynamic.partition=true;

//创建表并导入数据

create table if not exists tb_test1 as select * from ... where ....;

//导入数据到本地

hive> insert overwrite local directory '/root/aa.txt' select * from cust;

//设置map端聚合,提高聚合性能

hive> set hive.map.aggr=true;

// . 表示任意字符，*表示重复左边的字符串0次到无限次

hive> select * from cust where custname rlike '.*(ni|ll).*';
OK
cust.custname   cust.sex        cust.nianling
hello   0       100
nihao   1       5
hello   0       100
nihao   1       5
Time taken: 0.256 seconds, Fetched: 4 row(s)

hive> select * from cust where custname like '%he%';
OK
cust.custname   cust.sex        cust.nianling
hello   0       100
hello   0       100
Time taken: 0.342 seconds, Fetched: 2 row(s)

当对三个以上的表进行jion操作时，如果on使用相同的连接键的话，只会生成一个mapreduce job。

hive 可以显示的标记哪张是大表告诉查询优化器。hive将尝试将表cust作为驱动表，即使在查询中的位置不在最后面。

scala> hivecon.sql("select /*+STREAMTABLE(a)*/ a.custname,b.sex from gamedw.cust a inner join gamedw.cust1 b on a.custname=b.custname").show
+---------------+---+
|       custname|sex|
+---------------+---+
|tianyt_touch100| 50|
|         wangwu| 47|
|         wangwu| 85|
|       zhangsan| 20|
|         liuqin| 56|
|         wangwu| 47|
|         wangwu| 85|
|        liuyang| 32|
|          hello|100|
|tianyt_touch100| 50|
|         wangwu| 47|
|         wangwu| 85|
|       zhangsan| 20|
|         liuqin| 56|
|         wangwu| 47|
|         wangwu| 85|
|        liuyang| 32|
|          hello|100|
+---------------+---+

目前semi只支持left semi join,只返回左边表的记录，比inner join更高效，左表一旦匹配到右边表的记录立刻停止扫描。
scala> hivecon.sql("select * from gamedw.cust a left semi join gamedw.cust1 b on a.custname=b.custname").show
+---------------+---+--------+
|       custname|sex|nianling|
+---------------+---+--------+
|tianyt_touch100| 1|      50|
|         wangwu| 1|      85|
|       zhangsan| 1|      20|
|         liuqin| 0|      56|
|         wangwu| 0|      47|
|        liuyang| 1|      32|
|          hello| 0|     100|
|tianyt_touch100| 1|      50|
|         wangwu| 1|      85|
|       zhangsan| 1|      20|
|         liuqin| 0|      56|
|         wangwu| 0|      47|
|        liuyang| 1|      32|
|          hello| 0|     100|
+---------------+---+--------+

map-side join将小表加载到内存中，在map端执行连接过程，忽略掉reduce过程。

scala> hivecon.sql("select /*+mapjoin(b)*/ a.custname,b.sex from gamedw.cust a inner join gamedw.cust1 b on a.custname=b.custname").show
+---------------+---+
|       custname|sex|
+---------------+---+
|tianyt_touch100| 50|
|         wangwu| 47|
|         wangwu| 85|
|       zhangsan| 20|
|         liuqin| 56|
|         wangwu| 47|
|         wangwu| 85|
|        liuyang| 32|
|          hello|100|
|tianyt_touch100| 50|
|         wangwu| 47|
|         wangwu| 85|
|       zhangsan| 20|
|         liuqin| 56|
|         wangwu| 47|
|         wangwu| 85|
|        liuyang| 32|
|          hello|100|
+---------------+---+

//设置自动识别较小的表。

hive> set hive.auto.convert.join=true;

//也可以设置小表的大小，单位为字节

hive> set hive.mapjoin.smalltable.filesize=3000000;

查看全文

相关阅读:
第七天安卓 4大组件
 第六天页面跳转和数据传递
 第五天断点续传和下载
 objective-c里的protocol
Cocos2d-x的屏幕适配
 CocosBuilder的Inspector及让Text View实时更新内容＋binding控件到基类成员
 几个输出注意点
 Xcode
Category、Extension
iOS内存管理

原文地址：https://www.cnblogs.com/playforever/p/9150744.html