zoukankan      html  css  js  c++  java
  • Hive Data Definition Language

    HiveQL DDL statements are documented here, including:
    
        CREATE DATABASE/SCHEMA, TABLE, VIEW, FUNCTION, INDEX
        DROP DATABASE/SCHEMA, TABLE, VIEW, INDEX
        TRUNCATE TABLE
        ALTER DATABASE/SCHEMA, TABLE, VIEW
        MSCK REPAIR TABLE (or ALTER TABLE RECOVER PARTITIONS)
        SHOW DATABASES/SCHEMAS, TABLES, TBLPROPERTIES, PARTITIONS, FUNCTIONS, INDEX[ES], COLUMNS, CREATE TABLE
        DESCRIBE DATABASE/SCHEMA, table_name, view_name
    
    PARTITION statements are usually options of TABLE statements, except for SHOW PARTITIONS.
    
    
    1.Create/Drop/Alter/Use Database
    CREATE (DATABASE|SCHEMA) [IF NOT EXISTS] database_name
      [COMMENT database_comment]
      [LOCATION hdfs_path]
      [WITH DBPROPERTIES (property_name=property_value, ...)];
      
    2.Drop Database
    DROP (DATABASE|SCHEMA) [IF EXISTS] database_name [RESTRICT|CASCADE];
    
    3.Alter Database
    ALTER (DATABASE|SCHEMA) database_name SET DBPROPERTIES (property_name=property_value, ...);   -- (Note: SCHEMA added in Hive 0.14.0)
     
    ALTER (DATABASE|SCHEMA) database_name SET OWNER [USER|ROLE] user_or_role;   -- (Note: Hive 0.13.0 and later; SCHEMA added in Hive 0.14.0)
    
    4.Use Database
    USE database_name;
    USE DEFAULT;
    To check which database is currently being used: SELECT current_database() (as of Hive 0.13.0).
    
    5.Create Table
    CREATE [TEMPORARY] [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name    -- (Note: TEMPORARY available in Hive 0.14.0 and later)
      [(col_name data_type [COMMENT col_comment], ...)]
      [COMMENT table_comment]
      [PARTITIONED BY (col_name data_type [COMMENT col_comment], ...)]
      [CLUSTERED BY (col_name, col_name, ...) [SORTED BY (col_name [ASC|DESC], ...)] INTO num_buckets BUCKETS]
      [SKEWED BY (col_name, col_name, ...)                  -- (Note: Available in Hive 0.10.0 and later)]
         ON ((col_value, col_value, ...), (col_value, col_value, ...), ...)
         [STORED AS DIRECTORIES]
      [
       [ROW FORMAT row_format] 
       [STORED AS file_format]
         | STORED BY 'storage.handler.class.name' [WITH SERDEPROPERTIES (...)]  -- (Note: Available in Hive 0.6.0 and later)
      ]
      [LOCATION hdfs_path]
      [TBLPROPERTIES (property_name=property_value, ...)]   -- (Note: Available in Hive 0.6.0 and later)
      [AS select_statement];   -- (Note: Available in Hive 0.5.0 and later; not supported for external tables)
     
    CREATE [TEMPORARY] [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name
      LIKE existing_table_or_view_name
      [LOCATION hdfs_path];
     
    data_type
      : primitive_type
      | array_type
      | map_type
      | struct_type
      | union_type  -- (Note: Available in Hive 0.7.0 and later)
     
    primitive_type
      : TINYINT
      | SMALLINT
      | INT
      | BIGINT
      | BOOLEAN
      | FLOAT
      | DOUBLE
      | STRING
      | BINARY      -- (Note: Available in Hive 0.8.0 and later)
      | TIMESTAMP   -- (Note: Available in Hive 0.8.0 and later)
      | DECIMAL     -- (Note: Available in Hive 0.11.0 and later)
      | DECIMAL(precision, scale)  -- (Note: Available in Hive 0.13.0 and later)
      | DATE        -- (Note: Available in Hive 0.12.0 and later)
      | VARCHAR     -- (Note: Available in Hive 0.12.0 and later)
      | CHAR        -- (Note: Available in Hive 0.13.0 and later)
     
    array_type
      : ARRAY < data_type >
     
    map_type
      : MAP < primitive_type, data_type >
     
    struct_type
      : STRUCT < col_name : data_type [COMMENT col_comment], ...>
     
    union_type
       : UNIONTYPE < data_type, data_type, ... >  -- (Note: Available in Hive 0.7.0 and later)
     
    row_format
      : DELIMITED [FIELDS TERMINATED BY char [ESCAPED BY char]] [COLLECTION ITEMS TERMINATED BY char]
            [MAP KEYS TERMINATED BY char] [LINES TERMINATED BY char]
            [NULL DEFINED AS char]   -- (Note: Available in Hive 0.13 and later)
      | SERDE serde_name [WITH SERDEPROPERTIES (property_name=property_value, property_name=property_value, ...)]
     
    file_format:
      : SEQUENCEFILE
      | TEXTFILE    -- (Default, depending on hive.default.fileformat configuration)
      | RCFILE      -- (Note: Available in Hive 0.6.0 and later)
      | ORC         -- (Note: Available in Hive 0.11.0 and later)
      | PARQUET     -- (Note: Available in Hive 0.13.0 and later)
      | AVRO        -- (Note: Available in Hive 0.14.0 and later)
      | INPUTFORMAT input_format_classname OUTPUTFORMAT output_format_classname
    
    
    6.Partitioned Tables
    create table table_name (
      id                int,
      dtDontQuery       string,
      name              string
    )
    partitioned by (date string)
    
    CREATE TABLE page_view(viewTime INT, userid BIGINT,
         page_url STRING, referrer_url STRING,
         ip STRING COMMENT 'IP Address of the User')
     COMMENT 'This is the page view table'
     PARTITIONED BY(dt STRING, country STRING)
     ROW FORMAT DELIMITED
       FIELDS TERMINATED BY '01'
    STORED AS SEQUENCEFILE;
    
    
    7.External Tables
    
    CREATE EXTERNAL TABLE page_view(viewTime INT, userid BIGINT,
         page_url STRING, referrer_url STRING,
         ip STRING COMMENT 'IP Address of the User',
         country STRING COMMENT 'country of origination')
     COMMENT 'This is the staging page view table'
     ROW FORMAT DELIMITED FIELDS TERMINATED BY '54'
     STORED AS TEXTFILE
     LOCATION '<hdfs_location>';
     
     
     8.Create Table As Select (CTAS)
     
     CREATE TABLE new_key_value_store
       ROW FORMAT SERDE "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe"
       STORED AS RCFile
       AS
    SELECT (key % 1024) new_key, concat(key, value) key_value_pair
    FROM key_value_store
    SORT BY new_key, key_value_pair;
    
    
    9.Create Table Like
    CREATE TABLE empty_key_value_store
    LIKE key_value_store;
    
    10.Bucketed Sorted Tables
    
    CREATE TABLE page_view(viewTime INT, userid BIGINT,
         page_url STRING, referrer_url STRING,
         ip STRING COMMENT 'IP Address of the User')
     COMMENT 'This is the page view table'
     PARTITIONED BY(dt STRING, country STRING)
     CLUSTERED BY(userid) SORTED BY(viewTime) INTO 32 BUCKETS
     ROW FORMAT DELIMITED
       FIELDS TERMINATED BY '01'
       COLLECTION ITEMS TERMINATED BY '02'
       MAP KEYS TERMINATED BY '03'
     STORED AS SEQUENCEFILE;
     
     



    More info please refer to page  :   https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL

  • 相关阅读:
    函数的命名空间和作用域
    python 各个地方导航(方便查询,持续更新!)
    零基础学虚幻4(UE4):蓝图+VR 丁树凯教程
    UE4打包后的游戏,无法打卡其他关卡的解决办法
    【精辟】进制转换
    Git仓库的初始化
    【编程】杂碎知识点
    MFC制作带界面的DLL库
    StartImage.DLL使用说明
    MFC对话框程序:实现程序启动画面
  • 原文地址:https://www.cnblogs.com/TendToBigData/p/10501407.html
Copyright © 2011-2022 走看看