zoukankan      html  css  js  c++  java
  • tpch-kudu

    1.在impala里建立好文本表:

    create external table customer (C_CUSTKEY INT, C_NAME STRING, C_ADDRESS STRING, C_NATIONKEY INT, C_PHONE STRING, C_ACCTBAL DOUBLE, C_MKTSEGMENT STRING, C_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '/user/training/tpch_10g/customer';
    create external table lineitem (L_ORDERKEY INT, L_PARTKEY INT, L_SUPPKEY INT, L_LINENUMBER INT, L_QUANTITY DOUBLE, L_EXTENDEDPRICE DOUBLE, L_DISCOUNT DOUBLE, L_TAX DOUBLE, L_RETURNFLAG STRING, L_LINESTATUS STRING, L_SHIPDATE STRING, L_COMMITDATE STRING, L_RECEIPTDATE STRING, L_SHIPINSTRUCT STRING, L_SHIPMODE STRING, L_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '/user/training/tpch_10g/lineitem';
    create external table nation (N_NATIONKEY INT, N_NAME STRING, N_REGIONKEY INT, N_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '/user/training/tpch_10g/nation';
    create external table orders (O_ORDERKEY INT, O_CUSTKEY INT, O_ORDERSTATUS STRING, O_TOTALPRICE DOUBLE, O_ORDERDATE STRING, O_ORDERPRIORITY STRING, O_CLERK STRING, O_SHIPPRIORITY INT, O_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '/user/training/tpch_10g/orders';
    create external table part (P_PARTKEY INT, P_NAME STRING, P_MFGR STRING, P_BRAND STRING, P_TYPE STRING, P_SIZE INT, P_CONTAINER STRING, P_RETAILPRICE DOUBLE, P_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '/user/training/tpch_10g/part';
    create external table partsupp (PS_PARTKEY INT, PS_SUPPKEY INT, PS_AVAILQTY INT, PS_SUPPLYCOST DOUBLE, PS_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION'/user/training/tpch_10g/partsupp';
    create external table region (R_REGIONKEY INT, R_NAME STRING, R_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '/user/training/tpch_100g/region';
    create external table supplier (S_SUPPKEY INT, S_NAME STRING, S_ADDRESS STRING, S_NATIONKEY INT, S_PHONE STRING, S_ACCTBAL DOUBLE, S_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '/user/training/tpch_10g/supplier';

    3.建立kudu-impala表

    create table customer_kudu (C_CUSTKEY INT, C_NAME STRING, C_ADDRESS STRING, C_NATIONKEY INT, C_PHONE STRING, C_ACCTBAL DOUBLE, C_MKTSEGMENT STRING, C_COMMENT STRING)
    TBLPROPERTIES(
      'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
      'kudu.table_name' = 'customer_kudu',
      'kudu.master_addresses' = 'node1:7051',
      'kudu.key_columns' = 'C_CUSTKEY'
    );
    insert into customer_kudu select * from customer;
    
    create table nation_kudu (N_NATIONKEY INT, N_NAME STRING, N_REGIONKEY INT, N_COMMENT STRING)
    TBLPROPERTIES(
      'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
      'kudu.table_name' = 'nation_kudu',
      'kudu.master_addresses' = 'node1:7051',
      'kudu.key_columns' = 'N_NATIONKEY'
    );
    insert into nation_kudu select * from nation;
    
    
    create table part_kudu (P_PARTKEY INT, P_NAME STRING, P_MFGR STRING, P_BRAND STRING, P_TYPE STRING, P_SIZE INT, P_CONTAINER STRING, P_RETAILPRICE DOUBLE, P_COMMENT STRING)
    TBLPROPERTIES(
      'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
      'kudu.table_name' = 'part_kudu',
      'kudu.master_addresses' = 'node1:7051',
      'kudu.key_columns' = 'P_PARTKEY'
    );
    insert into part_kudu select * from part;
    
    
    create table supplier_kudu (S_SUPPKEY INT, S_NAME STRING, S_ADDRESS STRING, S_NATIONKEY INT, S_PHONE STRING, S_ACCTBAL DOUBLE, S_COMMENT STRING)
    TBLPROPERTIES(
      'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
      'kudu.table_name' = 'supplier_kudu',
      'kudu.master_addresses' = 'node1:7051',
      'kudu.key_columns' = 'S_SUPPKEY'
    );
    insert into supplier_kudu select * from supplier;
    
    
    
    create table partsupp_kudu (PS_PARTKEY INT, PS_SUPPKEY INT, PS_AVAILQTY INT, PS_SUPPLYCOST DOUBLE, PS_COMMENT STRING) 
    TBLPROPERTIES(
      'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
      'kudu.table_name' = 'partsupp_kudu',
      'kudu.master_addresses' = 'node1:7051',
      'kudu.key_columns' = 'PS_PARTKEY,PS_SUPPKEY'
    );
    insert into partsupp_kudu select * from partsupp;
    
    create table region_kudu (R_REGIONKEY INT, R_NAME STRING, R_COMMENT STRING) 
    TBLPROPERTIES(
      'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
      'kudu.table_name' = 'region_kudu',
      'kudu.master_addresses' = 'node1:7051',
      'kudu.key_columns' = 'R_REGIONKEY'
    );
    insert into region_kudu select * from region;
    
    
    create table orders_kudu (O_ORDERKEY INT, O_CUSTKEY INT, O_ORDERSTATUS STRING, O_TOTALPRICE DOUBLE, O_ORDERDATE STRING, O_ORDERPRIORITY STRING, O_CLERK STRING, O_SHIPPRIORITY INT, O_COMMENT STRING)
    TBLPROPERTIES(
      'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
      'kudu.table_name' = 'orders_kudu',
      'kudu.master_addresses' = 'node1:7051',
      'kudu.key_columns' = 'O_ORDERKEY'
    );
    insert into orders_kudu select * from orders;
    
    
    create table lineitem_kudu (L_ORDERKEY INT, L_LINENUMBER INT, L_PARTKEY INT, L_SUPPKEY INT, L_QUANTITY DOUBLE, L_EXTENDEDPRICE DOUBLE, L_DISCOUNT DOUBLE, L_TAX DOUBLE, L_RETURNFLAG STRING, L_LINESTATUS STRING, L_SHIPDATE STRING, L_COMMITDATE STRING, L_RECEIPTDATE STRING, L_SHIPINSTRUCT STRING, L_SHIPMODE STRING, L_COMMENT STRING)
    TBLPROPERTIES(
      'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
      'kudu.table_name' = 'lineitem_kudu',
      'kudu.master_addresses' = 'node1:7051',
      'kudu.key_columns' = 'L_ORDERKEY,L_LINENUMBER'
    );
    insert into lineitem_kudu select L_ORDERKEY, L_LINENUMBER, L_PARTKEY, L_SUPPKEY, L_QUANTITY, L_EXTENDEDPRICE, L_DISCOUNT, L_TAX, L_RETURNFLAG, L_LINESTATUS, L_SHIPDATE, L_COMMITDATE, L_RECEIPTDATE, L_SHIPINSTRUCT, L_SHIPMODE, L_COMMENT from lineitem;

    下面是日志信息:

    [node2:21000] > use tpch_10g;
    Query: use tpch_10g
    [node2:21000] > show tables;
    Query: show tables
    +----------+
    | name     |
    +----------+
    | customer |
    | lineitem |
    | nation   |
    | orders   |
    | part     |
    | partsupp |
    | region   |
    | supplier |
    +----------+
    Fetched 8 row(s) in 0.01s
    [node2:21000] > create table customer_kudu (C_CUSTKEY INT, C_NAME STRING, C_ADDRESS STRING, C_NATIONKEY INT, C_PHONE STRING, C_ACCTBAL DOUBLE, C_MKTSEGMENT STRING, C_COMMENT STRING)
                  > TBLPROPERTIES(
                  >   'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
                  >   'kudu.table_name' = 'customer_kudu',
                  >   'kudu.master_addresses' = 'node1:7051',
                  >   'kudu.key_columns' = 'C_CUSTKEY'
                  > );
    Query: create table customer_kudu (C_CUSTKEY INT, C_NAME STRING, C_ADDRESS STRING, C_NATIONKEY INT, C_PHONE STRING, C_ACCTBAL DOUBLE, C_MKTSEGMENT STRING, C_COMMENT STRING)
    TBLPROPERTIES(
    'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
    'kudu.table_name' = 'customer_kudu',
    'kudu.master_addresses' = 'node1:7051',
    'kudu.key_columns' = 'C_CUSTKEY'
    )
    
    Fetched 0 row(s) in 0.68s
    [node2:21000] > insert into customer_kudu select * from customer;
    Query: insert into customer_kudu select * from customer
    Inserted 1500000 row(s) in 19.02s
    [node2:21000] > show tables;
    Query: show tables
    +---------------+
    | name          |
    +---------------+
    | customer      |
    | customer_kudu |
    | lineitem      |
    | nation        |
    | orders        |
    | part          |
    | partsupp      |
    | region        |
    | supplier      |
    +---------------+
    Fetched 9 row(s) in 0.01s
    [node2:21000] > create table nation_kudu (N_NATIONKEY INT, N_NAME STRING, N_REGIONKEY INT, N_COMMENT STRING)
                  > TBLPROPERTIES(
                  >   'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
                  >   'kudu.table_name' = 'nation_kudu',
                  >   'kudu.master_addresses' = 'node1:7051',
                  >   'kudu.key_columns' = 'N_NATIONKEY'
                  > );
    Query: create table nation_kudu (N_NATIONKEY INT, N_NAME STRING, N_REGIONKEY INT, N_COMMENT STRING)
    TBLPROPERTIES(
    'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
    'kudu.table_name' = 'nation_kudu',
    'kudu.master_addresses' = 'node1:7051',
    'kudu.key_columns' = 'N_NATIONKEY'
    )
    
    Fetched 0 row(s) in 0.72s
    [node2:21000] > insert into nation_kudu select * from nation;
    Query: insert into nation_kudu select * from nation
    Inserted 25 row(s) in 4.26s
    [node2:21000] > create table part_kudu (P_PARTKEY INT, P_NAME STRING, P_MFGR STRING, P_BRAND STRING, P_TYPE STRING, P_SIZE INT, P_CONTAINER STRING, P_RETAILPRICE DOUBLE, P_COMMENT STRING)
                  > TBLPROPERTIES(
                  >   'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
                  >   'kudu.table_name' = 'part_kudu',
                  >   'kudu.master_addresses' = 'node1:7051',
                  >   'kudu.key_columns' = 'P_PARTKEY'
                  > );
    Query: create table part_kudu (P_PARTKEY INT, P_NAME STRING, P_MFGR STRING, P_BRAND STRING, P_TYPE STRING, P_SIZE INT, P_CONTAINER STRING, P_RETAILPRICE DOUBLE, P_COMMENT STRING)
    TBLPROPERTIES(
    'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
    'kudu.table_name' = 'part_kudu',
    'kudu.master_addresses' = 'node1:7051',
    'kudu.key_columns' = 'P_PARTKEY'
    )
    
    Fetched 0 row(s) in 0.68s
    [node2:21000] > insert into part_kudu select * from part;
    Query: insert into part_kudu select * from part
    Inserted 2000000 row(s) in 22.71s
    [node2:21000] > create table supplier_kudu (S_SUPPKEY INT, S_NAME STRING, S_ADDRESS STRING, S_NATIONKEY INT, S_PHONE STRING, S_ACCTBAL DOUBLE, S_COMMENT STRING)
                  > TBLPROPERTIES(
                  >   'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
                  >   'kudu.table_name' = 'supplier_kudu',
                  >   'kudu.master_addresses' = 'node1:7051',
                  >   'kudu.key_columns' = 'S_SUPPKEY'
                  > );
    Query: create table supplier_kudu (S_SUPPKEY INT, S_NAME STRING, S_ADDRESS STRING, S_NATIONKEY INT, S_PHONE STRING, S_ACCTBAL DOUBLE, S_COMMENT STRING)
    TBLPROPERTIES(
    'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
    'kudu.table_name' = 'supplier_kudu',
    'kudu.master_addresses' = 'node1:7051',
    'kudu.key_columns' = 'S_SUPPKEY'
    )
    
    Fetched 0 row(s) in 0.73s
    [node2:21000] > insert into supplier_kudu select * from supplier_kudu;
    Query: insert into supplier_kudu select * from supplier_kudu
    Inserted 0 row(s) in 4.67s
    [node2:21000] > create table partsupp_kudu (PS_PARTKEY INT, PS_SUPPKEY INT, PS_AVAILQTY INT, PS_SUPPLYCOST DOUBLE, PS_COMMENT STRING) 
                  > TBLPROPERTIES(
                  >   'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
                  >   'kudu.table_name' = 'partsupp_kudu',
                  >   'kudu.master_addresses' = 'node1:7051',
                  >   'kudu.key_columns' = 'PS_PARTKEY,PS_SUPPKEY'
                  > );
    Query: create table partsupp_kudu (PS_PARTKEY INT, PS_SUPPKEY INT, PS_AVAILQTY INT, PS_SUPPLYCOST DOUBLE, PS_COMMENT STRING)
    TBLPROPERTIES(
    'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
    'kudu.table_name' = 'partsupp_kudu',
    'kudu.master_addresses' = 'node1:7051',
    'kudu.key_columns' = 'PS_PARTKEY,PS_SUPPKEY'
    )
    
    Fetched 0 row(s) in 0.72s
    [node2:21000] > insert into partsupp_kudu select * from partsupp_kudu;
    Query: insert into partsupp_kudu select * from partsupp_kudu
    Inserted 0 row(s) in 3.07s
    [node2:21000] > insert into supplier_kudu select * from supplier;
    Query: insert into supplier_kudu select * from supplier
    Inserted 100000 row(s) in 1.78s
    [node2:21000] > insert into partsupp_kudu select * from partsupp;
    Query: insert into partsupp_kudu select * from partsupp
    Inserted 8000000 row(s) in 64.44s
    [node2:21000] > create table region_kudu (R_REGIONKEY INT, R_NAME STRING, R_COMMENT STRING) 
                  > TBLPROPERTIES(
                  >   'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
                  >   'kudu.table_name' = 'region_kudu',
                  >   'kudu.master_addresses' = 'node1:7051',
                  >   'kudu.key_columns' = 'R_REGIONKEY'
                  > );
    Query: create table region_kudu (R_REGIONKEY INT, R_NAME STRING, R_COMMENT STRING)
    TBLPROPERTIES(
    'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
    'kudu.table_name' = 'region_kudu',
    'kudu.master_addresses' = 'node1:7051',
    'kudu.key_columns' = 'R_REGIONKEY'
    )
    
    Fetched 0 row(s) in 0.72s
    [node2:21000] > insert into region_kudu select * from region;
    Query: insert into region_kudu select * from region
    Inserted 5 row(s) in 4.05s
    
    
    [node2:21000] > create table lineitem_kudu (L_ORDERKEY INT, L_LINENUMBER INT, L_PARTKEY INT, L_SUPPKEY INT, L_QUANTITY DOUBLE, L_EXTENDEDPRICE DOUBLE, L_DISCOUNT DOUBLE, L_TAX DOUBLE, L_RETURNFLAG STRING, L_LINESTATUS STRING, L_SHIPDATE STRING, L_COMMITDATE STRING, L_RECEIPTDATE STRING, L_SHIPINSTRUCT STRING, L_SHIPMODE STRING, L_COMMENT STRING)
                  > TBLPROPERTIES(
                  >   'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
                  >   'kudu.table_name' = 'lineitem_kudu',
                  >   'kudu.master_addresses' = 'node1:7051',
                  >   'kudu.key_columns' = 'L_ORDERKEY,L_LINENUMBER'
                  > );
    Query: create table lineitem_kudu (L_ORDERKEY INT, L_LINENUMBER INT, L_PARTKEY INT, L_SUPPKEY INT, L_QUANTITY DOUBLE, L_EXTENDEDPRICE DOUBLE, L_DISCOUNT DOUBLE, L_TAX DOUBLE, L_RETURNFLAG STRING, L_LINESTATUS STRING, L_SHIPDATE STRING, L_COMMITDATE STRING, L_RECEIPTDATE STRING, L_SHIPINSTRUCT STRING, L_SHIPMODE STRING, L_COMMENT STRING)
    TBLPROPERTIES(
    'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
    'kudu.table_name' = 'lineitem_kudu',
    'kudu.master_addresses' = 'node1:7051',
    'kudu.key_columns' = 'L_ORDERKEY,L_LINENUMBER'
    )
    
    Fetched 0 row(s) in 0.72s
    [node2:21000] > insert into lineitem_kudu select L_ORDERKEY, L_LINENUMBER, L_PARTKEY, L_SUPPKEY, L_QUANTITY, L_EXTENDEDPRICE, L_DISCOUNT, L_TAX, L_RETURNFLAG, L_LINESTATUS, L_SHIPDATE, L_COMMITDATE, L_RECEIPTDATE, L_SHIPINSTRUCT, L_SHIPMODE, L_COMMENT from lineitem;
    Query: insert into lineitem_kudu select L_ORDERKEY, L_LINENUMBER, L_PARTKEY, L_SUPPKEY, L_QUANTITY, L_EXTENDEDPRICE, L_DISCOUNT, L_TAX, L_RETURNFLAG, L_LINESTATUS, L_SHIPDATE, L_COMMITDATE, L_RECEIPTDATE, L_SHIPINSTRUCT, L_SHIPMODE, L_COMMENT from lineitem
    Inserted 59986052 row(s) in 951.42s
    
    [node2:21000] > create table orders_kudu (O_ORDERKEY INT, O_CUSTKEY INT, O_ORDERSTATUS STRING, O_TOTALPRICE DOUBLE, O_ORDERDATE STRING, O_ORDERPRIORITY STRING, O_CLERK STRING, O_SHIPPRIORITY INT, O_COMMENT STRING)
                  > TBLPROPERTIES(
                  >   'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
                  >   'kudu.table_name' = 'orders_kudu',
                  >   'kudu.master_addresses' = 'node1:7051',
                  >   'kudu.key_columns' = 'O_ORDERKEY'
                  > );
    Query: create table orders_kudu (O_ORDERKEY INT, O_CUSTKEY INT, O_ORDERSTATUS STRING, O_TOTALPRICE DOUBLE, O_ORDERDATE STRING, O_ORDERPRIORITY STRING, O_CLERK STRING, O_SHIPPRIORITY INT, O_COMMENT STRING)
    TBLPROPERTIES(
    'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
    'kudu.table_name' = 'orders_kudu',
    'kudu.master_addresses' = 'node1:7051',
    'kudu.key_columns' = 'O_ORDERKEY'
    )
    
    Fetched 0 row(s) in 0.86s
    [node2:21000] > insert into orders_kudu select * from orders;
    Query: insert into orders_kudu select * from orders
    Inserted 15000000 row(s) in 133.11s
    [node2:21000] > 

    3.计算表的统计信息:

    compute stats customer;
    compute stats lineitem;
    compute stats nation;
    compute stats orders;
    compute stats part;
    compute stats partsupp;
    compute stats region;
    compute stats supplier;

     

     compute stats customer_kudu;
     ompute stats lineitem_kudu;
     compute stats nation_kudu;
     compute stats orders_kudu;
     compute stats part_kudu;
     compute stats partsupp_kudu;
     compute stats region_kudu;
     compute stats supplier_kudu;

  • 相关阅读:
    Hadoop伪分布式环境安装
    记一次Xshell配置ssh免密登录时的问题
    HDFS学习笔记二
    HDFS学习笔记一
    十万级百万级数据量的Excel文件导入并写入数据库
    javascript内置对象:Date
    内置对象:Math
    javascript内置对象一:String
    javascript内置对象一: Array数组
    Javascritp 数据结构和二叉树算法
  • 原文地址:https://www.cnblogs.com/littlesuccess/p/4867888.html
Copyright © 2011-2022 走看看