转载于:http://blog.csdn.net/lovelovelovelovelo/article/details/52234971
数据类型
基本数据类型
集合类型,array、map、struct
文件格式,textfile、sequencefile、rcfile
创建表(内部表)
create table employee( name string comment 'name', salary float, subordinates array<string>, deductions map<string,float>, address struct<street:string,city:string,state:string,zip:int> ) row format delimited fields termited by ' ' lines terminated by ' ' stored as textfile;
从文件加载数据,覆盖源表
load data local infile 'path' overwrite into table 'table'
创建外部表
create external table employee( name string comment 'name', salary float, subordinates array<string>, deductions map<string,float>, address struct<street:string,city:string,state:string,zip:int> ) row format delimited fields terminated by ' ' collection items terminated by ',' map keys terminated by ':' lines terminated by ' ' stored as textfile location '/data/';
表中数据
lucy 11000 tom,jack,dave,kate tom:1200,jack:1560 beijing,changanjie,xichengqu,10000 lily 13000 dave,kate dave:1300,kate:1260 beijing,changanjie,xichengqu,10000
和我们熟悉的关系型数据库不一样,Hive现在还不支持在insert语句里面直接给出一组记录的文字形式,也就是说,hive并不支持INSERT INTO …. VALUES形式的语句。
新建employee.txt,将数据存入文件中,注意字段间用tab,行间换行enter
通过hive命令加载数据
hive> load data local inpath '/root/employee.txt' into table employee; hive> select * from employee; OK lucy 11000.0 ["tom","jack","dave","kate"] {"tom":1200.0,"jack":1560.0} {"street":"beijing","city":"changanjie","state":"xichengqu","zip":10000} lily 13000.0 ["dave","kate"] {"dave":1300.0,"kate":1260.0} {"street":"beijing","city":"changanjie","state":"xichengqu","zip":10000} Time taken: 0.054 seconds, Fetched: 2 row(s)
select * from table不走mapreduce
由一个表创建另一个表
create table table2 like table1;
从其他表查询创建表
create table table2 as select name,age,add from table1;
hive不同文件读取
stored as textfile: hadoop fs -text stored as sequencefile: hadoop fs -text stored as rcfile: hive -service rcfilecat path stored as input format 'class': outformat 'class'
分区表操作
alter table employee add if not exists partition(country='') alter table employee drop if exists partition(country='')
hive分桶
create table bucket_table( id int, name string ) clustered by(id) sorted by(name) into 4 buckets row format delimited fields terminated by ' ' stored as textfile; set hive.enforce.bucketing=true;
创建分区表
create table partitionTable( name string, age int ) partitioned by(dt string) row format delimited fields terminated by ' ' lines terminated by ' ' stored as textfile;