hiveql basic - 走看看

zoukankan html css js c++ java

hiveql basic

set hive.cli.print.current.db=true;
set hive.mapred.mode=strict;
set hive.mapred.mode=nonstrict;
SHOW PARTITIONS tablename;

--Dynamic Partition Inserts --by position not by names

INSERT OVERWRITE TABLE employees
PARTITION (country, state)
SELECT ..., se.cnty, se.st
FROM staged_employees se;

SET hive.map.aggr=true;

----with this way , we can not generate the temporary table

FROM (
SELECT upper(name), salary, deductions["Federal Taxes"] as fed_taxes,
round(salary * (1 - deductions["Federal Taxes"])) as salary_minus_fed_taxes
FROM employees
) e
SELECT e.name, e.salary_minus_fed_taxes
WHERE e.salary_minus_fed_taxes > 70000;

--When Hive Can Avoid MapReduce

set hive.exec.mode.local.auto=true;

--Hive supports the classic SQL JOINstatement, but only equi-joinsare supported.
--Hive also assumes that the lasttable in the query is the largest
--It attempts to buffer the other tables and then stream the last table through
-- you should structure your join queries so the largest table is last.

SELECT /*+ STREAMTABLE(s) */ s.ymd, s.symbol, s.price_close, d.dividend
FROM stocks s JOIN dividends d ON s.ymd = d.ymd AND s.symbol = d.symbol
WHERE s.symbol = 'AAPL';

set hive.auto.convert.join=true;
hive.mapjoin.smalltable.filesize=25000000;--table size less than this can use in map phase
SELECT /*+ MAPJOIN(d) */ s.ymd, s.symbol, s.price_close, d.dividend
FROM stocks s JOIN dividends d ON s.ymd = d.ymd AND s.symbol = d.symbol
WHERE s.symbol = 'AAPL';

set hive.input.format=org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat;
set hive.optimize.bucketmapjoin=true;
set hive.optimize.bucketmapjoin.sortedmerge=true;

--Using DISTRIBUTE BY ... SORT BYor the shorthand CLUSTER BYclauses is a way to exploit
--the parallelism of SORT BY, yet achieve a total ordering across the output files.
--this method is better than use order by (just one reducer);

--Queries that Sample Data

SELECT * from numbers TABLESAMPLE(BUCKET 3 OUT OF 10 ON rand()) s;
SELECT * FROM numbersflat TABLESAMPLE(0.1 PERCENT) s;--block sampling

--index

CREATE INDEX employees_index
ON TABLE employees (country)
AS 'org.apache.hadoop.hive.ql.index.compact.CompactIndexHandler'
WITH DEFERRED REBUILD;

Looking for a job working at Home about MSBI

查看全文

相关阅读:
A+B for Input-Output Practice (VIII)
A+B for Input-Output Practice (VI)
A+B for Input-Output Practice (VII)
A+B for Input-Output Practice (IV)
1.1.4 A+B for Input-Output Practice (V)
1.1.3 A+B for Input-Output Practice (III)
基础练习龟兔赛跑预测
 基础练习回形取数
 Python实用黑科技——以某个字段进行分组
 Python黑科技神奇去除马赛克

原文地址：https://www.cnblogs.com/huaxiaoyao/p/4361736.html