  • 【HIVE】(1)建表、导入数据、外部表、导出数据


    1). 本地

    load data local inpath "/root/example/hive/data/dept.txt" into table dept;

    2). HDFSload data inpath "/user/hive/warehouse/functiontest.db/dept1/dept.txt" into table dept1;


    create table dept1 like dept;
    load data inpath "/user/hive/warehouse/functiontest.db/dept1/dept.txt" into table dept1;
    0: jdbc:hive2://bigboss3:10000> load data inpath "/user/hive/warehouse/test01.db/emp/emp.txt" into table hdfsload;
    No rows affected (0.74 seconds)
    0: jdbc:hive2://bigboss3:10000> select * from hdfsload;
    | hdfsload.id  | hdfsload.name  | hdfsload.job  | hdfsload.mgr  | hdfsload.hiredate  | hdfsload.salary  | hdfsload.bonus  | hdfsload.deptid  |
    | 7369         | SMITH          | CLERK         | 7902          | 1980-12-17         | 800.0            | NULL            | 20               |
    | 7499         | ALLEN          | SALESMAN      | 7698          | 1981-2-20          | 1600.0           | 300.0           | 30               |
    | 7521         | WARD           | SALESMAN      | 7698          | 1981-2-22          | 1250.0           | 500.0           | 30               |
    | 7566         | JONES          | MANAGER       | 7839          | 1981-4-2           | 2975.0           | NULL            | 20               |
    | 7654         | MARTIN         | SALESMAN      | 7698          | 1981-9-28          | 1250.0           | 1400.0          | 30               |
    | 7698         | BLAKE          | MANAGER       | 7839          | 1981-5-1           | 2850.0           | NULL            | 30               |
    | 7782         | CLARK          | MANAGER       | 7839          | 1981-6-9           | 2450.0           | NULL            | 10               |
    | 7788         | SCOTT          | ANALYST       | 7566          | 1987-4-19          | 3000.0           | NULL            | 20               |
    | 7839         | KING           | PRESIDENT     | NULL          | 1981-11-17         | 5000.0           | NULL            | 10               |
    | 7844         | TURNER         | SALESMAN      | 7698          | 1981-9-8           | 1500.0           | 0.0             | 30               |
    | 7876         | ADAMS          | CLERK         | 7788          | 1987-5-23          | 1100.0           | NULL            | 20               |
    | 7900         | JAMAES         | CLERK         | 7698          | 1981-12-3          | 950.0            | NULL            | 30               |
    | 7902         | FORD           | ANALYST       | 7566          | 1981-12-3          | 3000.0           | NULL            | 20               |
    | 7934         | MILLER         | CLERK         | 7782          | 1982-1-23          | 1300.0           | NULL            | 10               |
    14 rows selected (0.247 seconds)
    select * from dept1;
    drop table dept1;	
    • 直接从本地导入;
      • 将数据复制到数据默认目录,/user/hive/warehouse/table_name
      • 从hdfs已有目录导入,建表时指定: location “hdfspath”;
      • create like: 复制表结构;
      • create as select: 复制表结构和数据;
      • insert into select:表结构已存在,从select插入数据。

    3). truncate table dept1; // 清空表数据,drop是直接删除表

    		insert overwrite,into
    		create table dept1 like dept;
    		insert overwrite table dept1 select * from dept;
    		hdfs dfs -ls /user/hive/warehouse/functiontest.db/dept1
    		select * from dept1;
    		truncate table dept1;         // 清空表数据,drop是直接删除表
    		select * from dept1;
    		insert into table dept1 select * from dept;
    		select * from dept1;


    4). CREATE … AS …

    create table dept2 as select * from dept;
    create table dept1 like dept;
    insert overwrite table dept1 select * from dept;

    5). 外部表 LOCATION

    create external table dept3 like dept location "/user/root/input/hive_external_data";
    select * from dept3;
    hdfs dfs -ls /user/hive/warehouse/functiontest.db/				//warehouse中没有数据
    drop table dept3;
    hdfs dfs -ls  /user/root/input/hive_external_data					// 查看外部数据还在

    - 新建时,带external都是外部表,否则为管理(内部)表;
    - 对于外部表,Hive在drop table时,不会删除数据,仅仅删除表结构(元数据);
    - 为什么会出现这种情况:
    * 在数据分析的时候,不仅仅是Hive分析,Hive尽量不要对原始数据做删除操作。
    * 工作中,建议优先使用external表;



    1). 常见导出

    -- 使用overwrite关键字前一定要确保是空目录。因为如果原目录不空,则该目录下所有文件都会被覆盖掉。
    		* 导出到本地目录
    			insert overwrite local directory '' select * from emp_partition;
    		* 导出到HDFS目录
    			insert overwrite directory "/user/root/input/hive_external_data" select * from dept;
    		* hive -e "select * from ..." > /opt/datas/xx.txt

    2). sqoop

    Hadoop 和关系数据库服务器之间传送数据
    - EXPORT 和 IMPORT -- 数据迁移
    	* EXPORT命令将hive表数据和元数据导出到外部目录
    	* 其他Hadoop集群可以拷贝该目录,然后使用IMPORT将表结构和数据导入
    			hdfs dfs -mkdir  /user/root/input/hive_export_data/
    		export table dept to "/user/root/input/hive_export_data/";
    			hdfs dfs -ls  /user/root/input/hive_export_data/
    			hdfs dfs -cat  /user/root/input/hive_export_data/_metadata  
    			hdfs dfs -cat  /user/root/input/hive_export_data/data/dept.txt
    		import table dept_import from "/user/root/input/hive_export_data/";
    			select * from dept_import;
