zoukankan html css js c++ java

大数据（Hive数据库、表的详解及其Hive数据导入导出）

数据库详解

数据库相关

#/user/hive/warehouse/db_name.db
create database if not exists db_name
#指定HDFS目录 作为hive的数据库目录
create database if not exists db_name location '/t'
#删除空数据库
drop database db_name
#删除非空的数据库
drop database db_name cascade;
#显示所有数据库
show databases
#应用数据库 
use db_name

表Table详解

#查看表的信息
desc table_name
describe  table_name
#查看表中更为详细的信息
desc extended table_name
#格式化查看表中更为详细的信息
desc formatted table_name

管理表 ()

基本语法

create table if not exists table_name(
id int,
name string
)row format delimited fields termimated by '	';

基于as子查询的建表方式

#1. 以后面子查询的查询列,作为表的结构
#2. 同时会把查询结果的数据,插入新表中
create table if not exists table_name as select id from t_user;

基于like关键字建表

#复制t_user的表结构,但不复制数据
create table if not exists table_name like t_user;

指定表格位置[技巧]

#默认建表的位置 在当前数据库中
#/user/hive/warehouse/lhc_140.db/t_user
#/user/hive/warehouse/lhc_140.db/t_user_as
#/user/hive/warehouse/lhc_140.db/t_user_like

#指定表格的创建位置 location

create table if not exists table_name(
id int,
name string
)row format delimited fields termimated by '	' location '/test1';


#存在目录 套上一张表 [重点]

create table if not exists t_user_suns(
id int,
name string
)row format delimited fields terminated by '	' location '/suns';

注意[技巧]

hdfs上同一个目录下的所有文件,hive表统一操作处理

外部表

#基本创建语法
create external table if not exists table_name(
id int,
name string
)row format delimited fields termimated by '	';

#子查询方式
create external table if not exists table_name as select id from t_user;

#like创建方式
create external table if not exists table_name like t_user;

管理表与外部表的区别

#删除管理表 ,HDFS上的目录同样删除 drop table t_user_as; #删除外部表,HDFS上的目录及数据保留,删除了metastore drop table t_user_ex;

3.分区表 (优化)
create table t_user_part(
id int,
name string)
partitioned by (time string) row format delimited fields terminated by '	';

load data local inpath '/root/data3' into table t_user_part  partition (time='18');
load data local inpath '/root/data3' into table t_user_part  partition (time='19');

select * from t_user_part 
#操作具体分区  where 分区条件 
select count(*) from t_user_part where time='18' and id >3 ;

桶表 (抽样,了解)

临时表(

Hive中的数据导入和导出

数据的导入 import

load data local[重点]

#linux系统中向hdfs中的hive导入数据
load data local inpath '' into table table_name

load data

#从hdfs向hive导入数据
load data inpath 'hdfs_path' into table table_name 

#本质上就是把某一个目录下的文件  移动到 新表的目录中
load data inpath '/suns/data3' into table t_user_hdfs

创建表的过程中通过as

insert 关键字导入数据[重点]

# 与 as 通过子查询导入数据 关键区别在于通过insert操作,表已经存在.
insert into table t_user_2 select id,name from t_user;

通过hdfs put文件

bin/hdfs dfs -put /root/data3 /user/hive/warsehouse/lhc_140/t_user

数据的导出

insert方式【】

insert overwrite local directory '/root/xiaohei' select name from t_user; 
#底层应用mr,那么所有xiaohei目录不能存在,自动生成文件名 000000_0

insert overwrite directory 'hdfs_path' select name from t_user;

Hive导入导出命令【】

通过hdfs get文件

bin/hdfs dfs -get  /user/hive/warsehouse/lhc_140/t_user  /root

通过hive的启动参数导出数据

bin/hive --database 'lhc125' -f /root/hive.sql > /root/result

1. 基本sql 
   select * from t_user       #不启动mr            
   select id,name from t_user #启动mr
2. 条件查询
   select * from t_user where name='sss';
3. 谓词  between and  in  not in  is null is not null
   select * from t_user where id between 1 and 3
   select * from t_user where id in (1,2)
   select * from t_user where id is null 
4. 比较运算  >  <  >= <= !=
5. 逻辑运算 and or 
6. 排序  order by  desc|asc
7. 分页 limit   
   select * from t_user limit 2;
8. sql函数 show functions;
   select substring(name,1,1) from t_user
   select upper(name) from t_user
9. 多表联合查询  inner join  left outer join right outer join full join
   select e.id,e.name,d.id,d.dname
   from t_emp e
   inner join t_dept d
   on e.d_id = d.id;
10. count avg sum max min

查看全文

相关阅读:
1004 Counting Leaves
1003 Emergency (25分)
1002 A+B for Polynomials (25分)
1001 A+B Format
Weekly Contest 139
491. Increasing Subsequences
488. Zuma Game
servlet总结
 firefox插件Firebug的使用教程
 KMP---POJ 3461 Oulipo

原文地址：https://www.cnblogs.com/lhc-hhh/p/10371780.html