0.创建数据库
hive>create table hive.test(id int);
hive>load data local inpath '/home/hyxy/test_order.txt' into table hive.test;
1.order by 全局排序
hive>select * from hive.test order by id;
2.sort by 局部排序(reduce)
hive>set mapreduce.job.reduces=3;
hive>select * from hive.test sort by id;
索引
1.创建索引
hive>create index test_id_index on table hive.test(id) as 'org.apache.hadoop.hive.ql.index.compact.CompactIndexHandler' with deferred rebuild;
2.默认查询索引表:在hive数据库的INDEX_TABLE下自动生成索引表
hive>select * from hive.hive__test_test_test_id_index__;
说明:默认生成的索引表无数据,空白状态;
索引表有三个字段:
id:表示index字段
_bucketname:表示数据所在的location位置
_offsets:表示当前数据所处的偏移量
3.重构index表,目的生成index数据
hive>alter index test_id_index on hive.test rebuild;
hive>select * from hive.hive__test_test_id_index__;
4.删除索引
hive>drop index stocks_id_index on table stocks;