1.在impala里建立好文本表:
create external table customer (C_CUSTKEY INT, C_NAME STRING, C_ADDRESS STRING, C_NATIONKEY INT, C_PHONE STRING, C_ACCTBAL DOUBLE, C_MKTSEGMENT STRING, C_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '/user/training/tpch_10g/customer'; create external table lineitem (L_ORDERKEY INT, L_PARTKEY INT, L_SUPPKEY INT, L_LINENUMBER INT, L_QUANTITY DOUBLE, L_EXTENDEDPRICE DOUBLE, L_DISCOUNT DOUBLE, L_TAX DOUBLE, L_RETURNFLAG STRING, L_LINESTATUS STRING, L_SHIPDATE STRING, L_COMMITDATE STRING, L_RECEIPTDATE STRING, L_SHIPINSTRUCT STRING, L_SHIPMODE STRING, L_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '/user/training/tpch_10g/lineitem'; create external table nation (N_NATIONKEY INT, N_NAME STRING, N_REGIONKEY INT, N_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '/user/training/tpch_10g/nation'; create external table orders (O_ORDERKEY INT, O_CUSTKEY INT, O_ORDERSTATUS STRING, O_TOTALPRICE DOUBLE, O_ORDERDATE STRING, O_ORDERPRIORITY STRING, O_CLERK STRING, O_SHIPPRIORITY INT, O_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '/user/training/tpch_10g/orders'; create external table part (P_PARTKEY INT, P_NAME STRING, P_MFGR STRING, P_BRAND STRING, P_TYPE STRING, P_SIZE INT, P_CONTAINER STRING, P_RETAILPRICE DOUBLE, P_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '/user/training/tpch_10g/part'; create external table partsupp (PS_PARTKEY INT, PS_SUPPKEY INT, PS_AVAILQTY INT, PS_SUPPLYCOST DOUBLE, PS_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION'/user/training/tpch_10g/partsupp'; create external table region (R_REGIONKEY INT, R_NAME STRING, R_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '/user/training/tpch_100g/region'; create external table supplier (S_SUPPKEY INT, S_NAME STRING, S_ADDRESS STRING, S_NATIONKEY INT, S_PHONE STRING, S_ACCTBAL DOUBLE, S_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '/user/training/tpch_10g/supplier';
3.建立kudu-impala表
create table customer_kudu (C_CUSTKEY INT, C_NAME STRING, C_ADDRESS STRING, C_NATIONKEY INT, C_PHONE STRING, C_ACCTBAL DOUBLE, C_MKTSEGMENT STRING, C_COMMENT STRING) TBLPROPERTIES( 'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler', 'kudu.table_name' = 'customer_kudu', 'kudu.master_addresses' = 'node1:7051', 'kudu.key_columns' = 'C_CUSTKEY' ); insert into customer_kudu select * from customer; create table nation_kudu (N_NATIONKEY INT, N_NAME STRING, N_REGIONKEY INT, N_COMMENT STRING) TBLPROPERTIES( 'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler', 'kudu.table_name' = 'nation_kudu', 'kudu.master_addresses' = 'node1:7051', 'kudu.key_columns' = 'N_NATIONKEY' ); insert into nation_kudu select * from nation; create table part_kudu (P_PARTKEY INT, P_NAME STRING, P_MFGR STRING, P_BRAND STRING, P_TYPE STRING, P_SIZE INT, P_CONTAINER STRING, P_RETAILPRICE DOUBLE, P_COMMENT STRING) TBLPROPERTIES( 'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler', 'kudu.table_name' = 'part_kudu', 'kudu.master_addresses' = 'node1:7051', 'kudu.key_columns' = 'P_PARTKEY' ); insert into part_kudu select * from part; create table supplier_kudu (S_SUPPKEY INT, S_NAME STRING, S_ADDRESS STRING, S_NATIONKEY INT, S_PHONE STRING, S_ACCTBAL DOUBLE, S_COMMENT STRING) TBLPROPERTIES( 'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler', 'kudu.table_name' = 'supplier_kudu', 'kudu.master_addresses' = 'node1:7051', 'kudu.key_columns' = 'S_SUPPKEY' ); insert into supplier_kudu select * from supplier; create table partsupp_kudu (PS_PARTKEY INT, PS_SUPPKEY INT, PS_AVAILQTY INT, PS_SUPPLYCOST DOUBLE, PS_COMMENT STRING) TBLPROPERTIES( 'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler', 'kudu.table_name' = 'partsupp_kudu', 'kudu.master_addresses' = 'node1:7051', 'kudu.key_columns' = 'PS_PARTKEY,PS_SUPPKEY' ); insert into partsupp_kudu select * from partsupp; create table region_kudu (R_REGIONKEY INT, R_NAME STRING, R_COMMENT STRING) TBLPROPERTIES( 'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler', 'kudu.table_name' = 'region_kudu', 'kudu.master_addresses' = 'node1:7051', 'kudu.key_columns' = 'R_REGIONKEY' ); insert into region_kudu select * from region; create table orders_kudu (O_ORDERKEY INT, O_CUSTKEY INT, O_ORDERSTATUS STRING, O_TOTALPRICE DOUBLE, O_ORDERDATE STRING, O_ORDERPRIORITY STRING, O_CLERK STRING, O_SHIPPRIORITY INT, O_COMMENT STRING) TBLPROPERTIES( 'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler', 'kudu.table_name' = 'orders_kudu', 'kudu.master_addresses' = 'node1:7051', 'kudu.key_columns' = 'O_ORDERKEY' ); insert into orders_kudu select * from orders; create table lineitem_kudu (L_ORDERKEY INT, L_LINENUMBER INT, L_PARTKEY INT, L_SUPPKEY INT, L_QUANTITY DOUBLE, L_EXTENDEDPRICE DOUBLE, L_DISCOUNT DOUBLE, L_TAX DOUBLE, L_RETURNFLAG STRING, L_LINESTATUS STRING, L_SHIPDATE STRING, L_COMMITDATE STRING, L_RECEIPTDATE STRING, L_SHIPINSTRUCT STRING, L_SHIPMODE STRING, L_COMMENT STRING) TBLPROPERTIES( 'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler', 'kudu.table_name' = 'lineitem_kudu', 'kudu.master_addresses' = 'node1:7051', 'kudu.key_columns' = 'L_ORDERKEY,L_LINENUMBER' ); insert into lineitem_kudu select L_ORDERKEY, L_LINENUMBER, L_PARTKEY, L_SUPPKEY, L_QUANTITY, L_EXTENDEDPRICE, L_DISCOUNT, L_TAX, L_RETURNFLAG, L_LINESTATUS, L_SHIPDATE, L_COMMITDATE, L_RECEIPTDATE, L_SHIPINSTRUCT, L_SHIPMODE, L_COMMENT from lineitem;
下面是日志信息:
[node2:21000] > use tpch_10g; Query: use tpch_10g [node2:21000] > show tables; Query: show tables +----------+ | name | +----------+ | customer | | lineitem | | nation | | orders | | part | | partsupp | | region | | supplier | +----------+ Fetched 8 row(s) in 0.01s [node2:21000] > create table customer_kudu (C_CUSTKEY INT, C_NAME STRING, C_ADDRESS STRING, C_NATIONKEY INT, C_PHONE STRING, C_ACCTBAL DOUBLE, C_MKTSEGMENT STRING, C_COMMENT STRING) > TBLPROPERTIES( > 'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler', > 'kudu.table_name' = 'customer_kudu', > 'kudu.master_addresses' = 'node1:7051', > 'kudu.key_columns' = 'C_CUSTKEY' > ); Query: create table customer_kudu (C_CUSTKEY INT, C_NAME STRING, C_ADDRESS STRING, C_NATIONKEY INT, C_PHONE STRING, C_ACCTBAL DOUBLE, C_MKTSEGMENT STRING, C_COMMENT STRING) TBLPROPERTIES( 'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler', 'kudu.table_name' = 'customer_kudu', 'kudu.master_addresses' = 'node1:7051', 'kudu.key_columns' = 'C_CUSTKEY' ) Fetched 0 row(s) in 0.68s [node2:21000] > insert into customer_kudu select * from customer; Query: insert into customer_kudu select * from customer Inserted 1500000 row(s) in 19.02s [node2:21000] > show tables; Query: show tables +---------------+ | name | +---------------+ | customer | | customer_kudu | | lineitem | | nation | | orders | | part | | partsupp | | region | | supplier | +---------------+ Fetched 9 row(s) in 0.01s [node2:21000] > create table nation_kudu (N_NATIONKEY INT, N_NAME STRING, N_REGIONKEY INT, N_COMMENT STRING) > TBLPROPERTIES( > 'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler', > 'kudu.table_name' = 'nation_kudu', > 'kudu.master_addresses' = 'node1:7051', > 'kudu.key_columns' = 'N_NATIONKEY' > ); Query: create table nation_kudu (N_NATIONKEY INT, N_NAME STRING, N_REGIONKEY INT, N_COMMENT STRING) TBLPROPERTIES( 'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler', 'kudu.table_name' = 'nation_kudu', 'kudu.master_addresses' = 'node1:7051', 'kudu.key_columns' = 'N_NATIONKEY' ) Fetched 0 row(s) in 0.72s [node2:21000] > insert into nation_kudu select * from nation; Query: insert into nation_kudu select * from nation Inserted 25 row(s) in 4.26s [node2:21000] > create table part_kudu (P_PARTKEY INT, P_NAME STRING, P_MFGR STRING, P_BRAND STRING, P_TYPE STRING, P_SIZE INT, P_CONTAINER STRING, P_RETAILPRICE DOUBLE, P_COMMENT STRING) > TBLPROPERTIES( > 'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler', > 'kudu.table_name' = 'part_kudu', > 'kudu.master_addresses' = 'node1:7051', > 'kudu.key_columns' = 'P_PARTKEY' > ); Query: create table part_kudu (P_PARTKEY INT, P_NAME STRING, P_MFGR STRING, P_BRAND STRING, P_TYPE STRING, P_SIZE INT, P_CONTAINER STRING, P_RETAILPRICE DOUBLE, P_COMMENT STRING) TBLPROPERTIES( 'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler', 'kudu.table_name' = 'part_kudu', 'kudu.master_addresses' = 'node1:7051', 'kudu.key_columns' = 'P_PARTKEY' ) Fetched 0 row(s) in 0.68s [node2:21000] > insert into part_kudu select * from part; Query: insert into part_kudu select * from part Inserted 2000000 row(s) in 22.71s [node2:21000] > create table supplier_kudu (S_SUPPKEY INT, S_NAME STRING, S_ADDRESS STRING, S_NATIONKEY INT, S_PHONE STRING, S_ACCTBAL DOUBLE, S_COMMENT STRING) > TBLPROPERTIES( > 'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler', > 'kudu.table_name' = 'supplier_kudu', > 'kudu.master_addresses' = 'node1:7051', > 'kudu.key_columns' = 'S_SUPPKEY' > ); Query: create table supplier_kudu (S_SUPPKEY INT, S_NAME STRING, S_ADDRESS STRING, S_NATIONKEY INT, S_PHONE STRING, S_ACCTBAL DOUBLE, S_COMMENT STRING) TBLPROPERTIES( 'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler', 'kudu.table_name' = 'supplier_kudu', 'kudu.master_addresses' = 'node1:7051', 'kudu.key_columns' = 'S_SUPPKEY' ) Fetched 0 row(s) in 0.73s [node2:21000] > insert into supplier_kudu select * from supplier_kudu; Query: insert into supplier_kudu select * from supplier_kudu Inserted 0 row(s) in 4.67s [node2:21000] > create table partsupp_kudu (PS_PARTKEY INT, PS_SUPPKEY INT, PS_AVAILQTY INT, PS_SUPPLYCOST DOUBLE, PS_COMMENT STRING) > TBLPROPERTIES( > 'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler', > 'kudu.table_name' = 'partsupp_kudu', > 'kudu.master_addresses' = 'node1:7051', > 'kudu.key_columns' = 'PS_PARTKEY,PS_SUPPKEY' > ); Query: create table partsupp_kudu (PS_PARTKEY INT, PS_SUPPKEY INT, PS_AVAILQTY INT, PS_SUPPLYCOST DOUBLE, PS_COMMENT STRING) TBLPROPERTIES( 'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler', 'kudu.table_name' = 'partsupp_kudu', 'kudu.master_addresses' = 'node1:7051', 'kudu.key_columns' = 'PS_PARTKEY,PS_SUPPKEY' ) Fetched 0 row(s) in 0.72s [node2:21000] > insert into partsupp_kudu select * from partsupp_kudu; Query: insert into partsupp_kudu select * from partsupp_kudu Inserted 0 row(s) in 3.07s [node2:21000] > insert into supplier_kudu select * from supplier; Query: insert into supplier_kudu select * from supplier Inserted 100000 row(s) in 1.78s [node2:21000] > insert into partsupp_kudu select * from partsupp; Query: insert into partsupp_kudu select * from partsupp Inserted 8000000 row(s) in 64.44s [node2:21000] > create table region_kudu (R_REGIONKEY INT, R_NAME STRING, R_COMMENT STRING) > TBLPROPERTIES( > 'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler', > 'kudu.table_name' = 'region_kudu', > 'kudu.master_addresses' = 'node1:7051', > 'kudu.key_columns' = 'R_REGIONKEY' > ); Query: create table region_kudu (R_REGIONKEY INT, R_NAME STRING, R_COMMENT STRING) TBLPROPERTIES( 'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler', 'kudu.table_name' = 'region_kudu', 'kudu.master_addresses' = 'node1:7051', 'kudu.key_columns' = 'R_REGIONKEY' ) Fetched 0 row(s) in 0.72s [node2:21000] > insert into region_kudu select * from region; Query: insert into region_kudu select * from region Inserted 5 row(s) in 4.05s [node2:21000] > create table lineitem_kudu (L_ORDERKEY INT, L_LINENUMBER INT, L_PARTKEY INT, L_SUPPKEY INT, L_QUANTITY DOUBLE, L_EXTENDEDPRICE DOUBLE, L_DISCOUNT DOUBLE, L_TAX DOUBLE, L_RETURNFLAG STRING, L_LINESTATUS STRING, L_SHIPDATE STRING, L_COMMITDATE STRING, L_RECEIPTDATE STRING, L_SHIPINSTRUCT STRING, L_SHIPMODE STRING, L_COMMENT STRING) > TBLPROPERTIES( > 'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler', > 'kudu.table_name' = 'lineitem_kudu', > 'kudu.master_addresses' = 'node1:7051', > 'kudu.key_columns' = 'L_ORDERKEY,L_LINENUMBER' > ); Query: create table lineitem_kudu (L_ORDERKEY INT, L_LINENUMBER INT, L_PARTKEY INT, L_SUPPKEY INT, L_QUANTITY DOUBLE, L_EXTENDEDPRICE DOUBLE, L_DISCOUNT DOUBLE, L_TAX DOUBLE, L_RETURNFLAG STRING, L_LINESTATUS STRING, L_SHIPDATE STRING, L_COMMITDATE STRING, L_RECEIPTDATE STRING, L_SHIPINSTRUCT STRING, L_SHIPMODE STRING, L_COMMENT STRING) TBLPROPERTIES( 'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler', 'kudu.table_name' = 'lineitem_kudu', 'kudu.master_addresses' = 'node1:7051', 'kudu.key_columns' = 'L_ORDERKEY,L_LINENUMBER' ) Fetched 0 row(s) in 0.72s [node2:21000] > insert into lineitem_kudu select L_ORDERKEY, L_LINENUMBER, L_PARTKEY, L_SUPPKEY, L_QUANTITY, L_EXTENDEDPRICE, L_DISCOUNT, L_TAX, L_RETURNFLAG, L_LINESTATUS, L_SHIPDATE, L_COMMITDATE, L_RECEIPTDATE, L_SHIPINSTRUCT, L_SHIPMODE, L_COMMENT from lineitem; Query: insert into lineitem_kudu select L_ORDERKEY, L_LINENUMBER, L_PARTKEY, L_SUPPKEY, L_QUANTITY, L_EXTENDEDPRICE, L_DISCOUNT, L_TAX, L_RETURNFLAG, L_LINESTATUS, L_SHIPDATE, L_COMMITDATE, L_RECEIPTDATE, L_SHIPINSTRUCT, L_SHIPMODE, L_COMMENT from lineitem Inserted 59986052 row(s) in 951.42s [node2:21000] > create table orders_kudu (O_ORDERKEY INT, O_CUSTKEY INT, O_ORDERSTATUS STRING, O_TOTALPRICE DOUBLE, O_ORDERDATE STRING, O_ORDERPRIORITY STRING, O_CLERK STRING, O_SHIPPRIORITY INT, O_COMMENT STRING) > TBLPROPERTIES( > 'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler', > 'kudu.table_name' = 'orders_kudu', > 'kudu.master_addresses' = 'node1:7051', > 'kudu.key_columns' = 'O_ORDERKEY' > ); Query: create table orders_kudu (O_ORDERKEY INT, O_CUSTKEY INT, O_ORDERSTATUS STRING, O_TOTALPRICE DOUBLE, O_ORDERDATE STRING, O_ORDERPRIORITY STRING, O_CLERK STRING, O_SHIPPRIORITY INT, O_COMMENT STRING) TBLPROPERTIES( 'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler', 'kudu.table_name' = 'orders_kudu', 'kudu.master_addresses' = 'node1:7051', 'kudu.key_columns' = 'O_ORDERKEY' ) Fetched 0 row(s) in 0.86s [node2:21000] > insert into orders_kudu select * from orders; Query: insert into orders_kudu select * from orders Inserted 15000000 row(s) in 133.11s [node2:21000] >
3.计算表的统计信息:
compute stats customer;
compute stats lineitem;
compute stats nation;
compute stats orders;
compute stats part;
compute stats partsupp;
compute stats region;
compute stats supplier;
compute stats customer_kudu;
ompute stats lineitem_kudu;
compute stats nation_kudu;
compute stats orders_kudu;
compute stats part_kudu;
compute stats partsupp_kudu;
compute stats region_kudu;
compute stats supplier_kudu;