Hive复杂数据类型
1、Array数据类型的使用
1.1、创建数据库表,以array作为数据类型
hive (hive_demo1)> create table stu_test(name array<string>,phone array<string>) > row format delimited fields terminated by' ' > collection items terminated by','; OK
1.2、在/opt/datas/test目录下创建stu_info.txt文件,并将文件内容导入hive的stu_test表中
[liupeng@tonyliu test]$ pwd /opt/datas/test [liupeng@tonyliu test]$ ls person.txt stu_info.txt [liupeng@tonyliu test]$ more stu_info.txt //创建数据并查看 小明,小王,小张 15975319964,18665851264,13278659963 tony, tom,jack 18677549911,15923458765,18665851989 [liupeng@tonyliu test]$
hive (hive_demo1)> load data local inpath'/opt/datas/test/stu_info.txt' into table stu_test; //load数据到stu_test表中 Copying data from file:/opt/datas/test/stu_info.txt Copying file: file:/opt/datas/test/stu_info.txt Loading data to table hive_demo1.stu_test Table hive_demo1.stu_test stats: [numFiles=1, numRows=0, totalSize=108, rawDataSize=0] OK Time taken: 0.439 seconds
1.3、查询stu_info表
hive (hive_demo1)> select * from stu_test; //查看stu_test所有数据 OK stu_test.name stu_test.phone ["小明","小王","小张"] ["15975319964","18665851264","13278659963"] ["tony"," tom","jack"] ["18677549911","15923458765","18665851989"] Time taken: 0.057 seconds, Fetched: 2 row(s)
1.4、查询stu_info表中array数据类型字段的指定列
hive (hive_demo1)> select name[0],phone[0] from stu_test; //显示stu_info中的name,phone arraylist中的第一个元素 OK _c0 _c1 小明 15975319964 tony 18677549911 Time taken: 0.117 seconds, Fetched: 2 row(s)
1.5、查询array数据类型字段的长度
hive (hive_demo1)> select name,size(phone) from stu_test; //size()是用来判断长度的 OK name _c1 ["小明","小王","小张"] 3 ["tony"," tom","jack"] 3 Time taken: 0.071 seconds, Fetched: 2 row(s) hive (hive_demo1)> select size(name),size(phone) from stu_test; OK _c0 _c1 3 3 3 3 Time taken: 0.08 seconds, Fetched: 2 row(s)
1.6、查询包含array数据类型字段指定列的一行数据
hive (hive_demo1)> select name[1],phone[1] from stu_test where array_contains(name,'小王'); //具体指定arraylist中第2个元素并指定符合条件的contains条目。 OK _c0 _c1 小王 18665851264 Time taken: 0.079 seconds, Fetched: 1 row(s)
1.7、查看表结构
hive (hive_demo1)> desc stu_test; OK col_name data_type comment name array<string> phone array<string> Time taken: 0.095 seconds, Fetched: 2 row(s)
2、Map数据类型的使用
2.1、创建表的同时使用Map数据类型
//创建 per_test表 hive (hive_demo1)> create table per_test(name string,info map<string,string>) > row format delimited fields terminated by' ' > collection items terminated by' 73' //因为我的数据字段分隔符中含有';',因为hdfs文件的的格式就是用分号隔开的。因此冲突情况下会报错。为了解决这个问题找到分号的asc码值 : http://blog.csdn.net/lxpbs8851/article/details/11525501
(其他字符有同样问题 也可以这样做) 找到的是073 那么将定义表的语句修改为:row format delimited fields terminated by ' 73'
> map keys terminated by':'; OK Time taken: 0.09 seconds
2.2、在/opt/datas/test中编辑person.txt文件
[liupeng@tonyliu test]$ pwd /opt/datas/test [liupeng@tonyliu test]$ ls person.txt stu_info.txt [liupeng@tonyliu test]$ more person.txt 小明 年龄:18;身高:1米8;地址:北京 小红 年龄:30;身高:1米72;地址:上海 小李 年龄:27;身高:1米90;地址:深圳 [liupeng@tonyliu test]$
2.3、将person.txt文件中的数据导入hive中的per_test表中
hive (hive_demo1)> load data local inpath'/opt/datas/test/person.txt'into table per_test; Copying data from file:/opt/datas/test/person.txt Copying file: file:/opt/datas/test/person.txt Loading data to table hive_demo1.per_test Table hive_demo1.per_test stats: [numFiles=1, numRows=0, totalSize=134, rawDataSize=0] OK Time taken: 0.269 seconds
2.4、查询per_test表中全部数据
hive (hive_demo1)> select * from per_test; OK per_test.name per_test.info 小明 {"年龄":"18","身高":"1米8","地址":"北京"} 小红 {"年龄":"30","身高":"1米72","地址":"上海"} 小李 {"年龄":"27","身高":"1米90","地址":"深圳"} Time taken: 0.049 seconds, Fetched: 3 row(s)
2.5、查询per_test表中数据
//取per_test表中某个字段的值(name) hive (hive_demo1)> select name from per_test; OK name 小明 小红 小李 Time taken: 0.062 seconds, Fetched: 3 row(s) //取per_test表中某个字段的值(info) //因为info在我们数据中有多个字段,中间是通过,号做了分割。因此直接取info的话会把所有字段返回。 hive (hive_demo1)> select info from per_test; OK info {"年龄":"18","身高":"1米8","地址":"北京"} {"年龄":"30","身高":"1米72","地址":"上海"} {"年龄":"27","身高":"1米90","地址":"深圳"} Time taken: 0.039 seconds, Fetched: 3 row(s)
//也可以指定具体字段,以及字段中子字段的value进行输出。子字段是通过指定key的值来识别并输出value的 hive (hive_demo1)> select name,info['年龄']from per_info; OK name _c1 小明 18 小红 30 小李 27 Time taken: 0.049 seconds, Fetched: 3 row(s) //同上,某个字段中也可以输出多个子字段的value值。通过指定key hive (hive_demo1)> select name,info['年龄'],info['身高'],info['地址']from per_info; OK name _c1 _c2 _c3 小明 18 1米8 北京 小红 30 1米72 上海 小李 27 1米90 深圳 Time taken: 0.051 seconds, Fetched: 3 row(s)
3、Struct数据类型的使用
3.1、创建表的同时使用struct数据类型
hive (hive_demo1)> create table struct_info( > id int,info struct<key:string,value:int>) //info为字段标示名,struct<key,value> key指定子字段的键,value指定子字段对应键的值 > row format delimited fields terminated by'.' > collection items terminated by':'; OK Time taken: 0.125 seconds
3.2、创建stu_struct.txt文件,并将文件数据导入到hive的stu_struct表中
[liupeng@tonyliu test]$ pwd /opt/datas/test [liupeng@tonyliu test]$ ls person_map.txt stu_list.txt stu_struct.txt [liupeng@tonyliu test]$ more stu_struct.txt 1.小明:90 2.小红:100 3.小方:70 4.小白:50 5.小兰:60 6.小花:85 [liupeng@tonyliu test]$
hive (hive_demo1)> load data local inpath'/opt/datas/test/stu_struct.txt'into table struct_info; Copying data from file:/opt/datas/test/stu_struct.txt Copying file: file:/opt/datas/test/stu_struct.txt Loading data to table hive_demo1.struct_info Table hive_demo1.struct_info stats: [numFiles=1, numRows=0, totalSize=73, rawDataSize=0] OK Time taken: 0.256 seconds
3.3、查询struct_info表中数据(全部查询,部分查询及子元素的查询)
(1) 显示全表
hive (hive_demo1)> select * from struct_info; OK struct_info.id struct_info.info 1 {"key":"小明","value":90} 2 {"key":"小红","value":100} 3 {"key":"小方","value":70} 4 {"key":"小白","value":50} 5 {"key":"小兰","value":60} 6 {"key":"小花","value":85} Time taken: 0.059 seconds, Fetched: 6 row(s)
(2) 显示表中字段
hive (hive_demo1)> select id from struct_info; //显示id 这个字段的信息 OK id 1 2 3 4 5 6 Time taken: 0.065 seconds, Fetched: 6 row(s) hive (hive_demo1)> select info from struct_info; //显示info这个字段的信息 OK info {"key":"小明","value":90} {"key":"小红","value":100} {"key":"小方","value":70} {"key":"小白","value":50} {"key":"小兰","value":60} {"key":"小花","value":85} Time taken: 0.056 seconds, Fetched: 6 row(s)
(3) 显示子字段key与value的字段信息
hive (hive_demo1)> select info.key from struct_info; //显示key的信息 OK key 小明 小红 小方 小白 小兰 小花 Time taken: 0.063 seconds, Fetched: 6 row(s)
hive (hive_demo1)> select info.value from struct_info; //显示value的信息 OK value 90 100 70 50 60 85 Time taken: 0.056 seconds, Fetched: 6 row(s)
(4) 通过where条件语句过滤出指定显示的语句
hive (hive_demo1)> select id,info from struct_info where id=1; //加上where条件语句显示其中1条指定信息 OK id info 1 {"key":"小明","value":90} Time taken: 0.112 seconds, Fetched: 1 row(s)
(5) 选择value作为范围取指定key的值
hive (hive_demo1)> select info from struct_info where info.key='小明'; OK info {"key":"小明","value":90} Time taken: 0.042 seconds, Fetched: 1 row(s)