hive> create table f1 (name string);
hive> load data local inpath '/home/hadoop/f11.txt' into table f1;
hive> select * from f1;
hive> drop table f1;
create table q1(id int,name string,addr string)
row format delimited
fields terminated by ','
stored as textfile;
create table q2(id int,age int,sal bigint)
row format delimited
fields terminated by ','
stored as textfile;
array:
Temperature.txt
1/2/18 Kaarnataka 23.2,22.3,20.5,25.5,24.5,20.3
2/5/18 Kerala 25.5,23.1,26.5,27.5,28.5,29.1
create table Temperature(date string,city string,MyTemp array<double>)
row format delimited fields terminated by ‘ ’
collection items terminated by ‘,’;
load data local inpath ‘/home/acadgild/Desktop/Temperature.txt’ into table Temperature;
Map:
Schools.txt
Secondary Assam Male 2015:56344,2016:57573,2017:58389
Secondary Assam Female 2015:19747,2016:20284,2017:37727
create table MySchools(schooltype string,state string,gender string, total map<int,int>)
row format delimited fields terminated by ‘ ’
collection items terminated by ‘,’
map keys terminated by ‘:’;
load data local inpath ‘/home/acadgild/Desktop/School_Data.txt’ into table MySchools;
Map查询:
select t.total['2016'] from MySchools t;
select * from jr_aggr2_instrument where concat_ws(',',map_values(mfi_clasf)) like '%MifidAssetClassDerived%' and datasource = 'FEED_ESMA' limit 3;
Struct:
Struct是一种记录类型,它封装了一组可以是任何原始数据类型的命名字段。
Bikes.txt
Yahama Ray-Z Aircooled,149.0,14.0,0
Hero Maestro Aircooled,155.0,14.8,0
create table MyBikes(name string, BikeFeatures struct<EngineType:string,cc:float,power:float,gears:int>)
row format delimited fields terminated by ‘ ’
collection items terminated by ‘,’;
load data local inpath ‘/home/acadgild/Desktop/Bikes.txt’ into table MyBikes;
Struct查询:
select BikeFeatures.EngineType from MyBikes;
2外部表
hive> create external table ef1(name string);
hive> load data local inpath '/home/hadoop/f11.txt' into table ef1;
3.分区表
create table logs(ts bigint,line string)
partitioned by (dt string,country string)
row format delimited
fields terminated by ','
stored as textfile;
hive> load data local inpath '/home/hadoop/f4.txt' into table logs partition(dt='2016-02-22',country='hk');
hive> select * from logs where country='cn';
4.桶(源数据文件拆分成多个文件)
hive> set hive.enforce.bucketing = true;
hive> create table st(id int,age int,name string) row format delimited
> fields terminated by ','
> stored as textfile;
hive> load data local inpath '/home/hadoop/f5.txt' into table st;
create table student(id int,age int,name string)
partitioned by (sj string)
clustered by(id) sorted by(age) into 2 buckets//clustered by聚类,sorted by排序
row format delimited
fields terminated by ',';
hive> from st insert overwrite table student partition(sj="20160221") select id,age,name;
hive导入,导出
1.
Hive的几种常见的数据导入方式
这里介绍四种:
(1)、从本地文件系统中导入数据到Hive表;
(2)、从HDFS上导入数据到Hive表;
(3)、从别的表中查询出相应的数据并导入到Hive表中;
(4)、在创建表的时候通过从别的表中查询出相应的记录并插入到所创建的表中
1.从本地文件系统中导入数据到Hive表
1.1
[hadoop@h91 hive-0.9.0-bin]$ bin/hive
创建ha表
hive> create table ha(id int,name string)
> row format delimited
> fields terminated by ' '
> stored as textfile;
[ROW FORMAT DELIMITED]关键字,是用来设置创建的表在加载数据的时候,支持的列分隔符。
[STORED AS file_format]关键字是用来设置加载数据的数据类型,默认是TEXTFILE,如果文件数据是纯文本,就是使用 [STORED AS TEXTFILE],然后从本地直接拷贝到HDFS上,hive直接可以识别数据。
1.2
操作系统中的文本
[hadoop@h91 ~]$ cat haha.txt
101 zs
102 ls
103 ww
1.3导入数据
hive> load data local inpath '/home/hadoop/haha.txt' into table ha;
hive> select * from ha;
*****
和我们熟悉的关系型数据库不一样,Hive现在还不支持在insert语句里面直接给出一组记录的文字形式,也就是说,Hive并不支持INSERT INTO …. VALUES形式的语句。
*****
--------------------------------------------------
2.
从HDFS上导入数据到Hive表;
2.1
[hadoop@h91 hadoop-0.20.2-cdh3u5]$ bin/hadoop fs -mkdir abc
[hadoop@h91 ~]$ cat hehe.txt
1001 aa
1002 bb
1003 cc
[hadoop@h91 hadoop-0.20.2-cdh3u5]$ bin/hadoop fs -put /home/hadoop/hehe.txt abc/.
(上传到 hdfs中)
2.2
hive> create table he(id int,name string)
> row format delimited
> fields terminated by ' '
> stored as textfile;
导入
hive> load data inpath '/user/hadoop/abc/hehe.txt' into table he;
---------------------------------------------------------
3.从别的表中查询出相应的数据并导入到Hive表中
3.1
hive> select * from he;
OK
1001 aa
1002 bb
1003 cc
hive> create table heihei(id int,name string)
> row format delimited
> fields terminated by ' '
> stored as textfile;
3.2
hive> insert into table heihei select * from he;
或
hive> insert overwrite table heihei select * from ha;
(insert overwrite 会覆盖数据)
--------------------------------------------------
4.在创建表的时候通过从别的表中查询出相应的记录并插入到所创建的表中
hive> create table gaga as select * from he;
================================================================
导出数据
(1)、导出到本地文件系统;
(2)、导出到HDFS中;
(3)、导出到Hive的另一个表中。
1.导出到本地文件系统;
hive> insert overwrite local directory '/home/hadoop/he1' select * from he;
[hadoop@h91 ~]$ cd he1(he1为目录,目录下有000000_0文件 )
[hadoop@h91 he1]$ cat 000000_0
(发现 列之间没有分割 )
可以下面的方式增加分割
hive> insert overwrite local directory '/home/hadoop/he1' select id,concat(' ',name) from he;
hive -e "select rowkey from hive_hbase.vrecord where mintUptJson is not null and rowkey like '2%'" >> /usr/pkgs/ops/jieranli/work/mm/aa.csv 或者txt
hive -f "文件" .sql
******
和导入数据到Hive不一样,不能用insert into来将数据导出
******
---------------------------------------------------------
2.导出到HDFS中。
hive> insert overwrite directory '/user/hadoop/abc' select * from he;
(/user/hadoop/abc 为hdfs下目录)
[hadoop@h91 hadoop-0.20.2-cdh3u5]$ bin/hadoop fs -ls abc
[hadoop@h91 hadoop-0.20.2-cdh3u5]$ bin/hadoop fs -cat abc/000000_0
-------------------------------------------------------------
3.导出到Hive的另一个表中
hive> insert into table he12 select * from he;