zoukankan      html  css  js  c++  java
  • Hive-json表-处理JSON格式数据

    add jar /home/jasonapp/json-serde-1.3.7-jar-with-dependencies.jar;
    
    create external table if not exists dm_goddog.student(
    student map<string,string> comment "学生信息",
    class map<string,string> comment "课程信息",
    teacher map<string,string> comment "授课老师信息"
    )
    comment "学生课程信息"
    ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
    stored as textfile;

     ps: 如果查询时,出现json无法解析的错误,删除文件中错误json文件中哪些行数据。

    1.下载json-serde包,下载地址如下:

    http://www.congiu.net/hive-json-serde/1.3.7/cdh5/json-serde-1.3.7-jar-with-dependencies.jar

    2.测试建表

    add jar /home/jasonapp/json-serde-1.3.7-jar-with-dependencies.jar;
    
    *******运单信息基础表*******dwd_ids_test.dwd_order_base_info_dtl_di
    create external table if not exists dwd_ids_test.dwd_order_base_info_dtl_di(
      order_id   string,
      order_no   string,
      express_flow_code   string,
      source_zone_code   string,
      dest_zone_code   string,
      src_dist_code   string,
      src_city_code   string,
      src_county   string,
      src_division_code   string,
      src_area_code   string,
      src_hq_code   string,
      src_type_code   string,
      dest_dist_code   string,
      dest_city_code   string,
      dest_county   string,
      dest_division_code   string,
      dest_area_code   string,
      dest_hq_code   string,
      dest_type_code   string,
      src_lgt   string,
      src_lat   string,
      dest_lgt   string,
      dest_lat   string,
      meterage_weight_qty   string,
      real_weight_qty   string,
      quantity   double,
      consignee_emp_code   string,
      consigned_tm   string,
      deliver_emp_code   string,
      signer_name   string,
      signin_tm   string,
      cargo_type_code   string,
      limit_type_code   string,
      distance_type_code   string,
      transport_type_code   string,
      express_type_code   string,
      ackbill_type_code   string,
      volume   string,
      bill_long   string,
      bill_width   string,
      bill_high   string,
      version_no   string,
      suff_flag   string,
      unit_weight   string,
      cons_value   string,
      cons_value_currency_code   string,
      product_code   string,
      airport_code   string,
      order_type   string,
      input_tm   string,
      inputer_emp_code   string,
      input_tm_gmt   string,
      modified_tm   string,
      order_remark   string,
      order_no   string,
      contacts_id   string,
      consignor_comp_name   string,
      consignor_addr   string,
      consignor_phone   string,
      consignor_cont_name   string,
      consignor_mobile   string,
      consignee_comp_name   string,
      consignee_addr   string,
      consignee_phone   string,
      consignee_cont_name   string,
      consignee_mobile   string,
      consignor_addr_native   string,
      consignee_addr_native   string,
      addressee_team_code   string,
      parent_order_no   string,
      order_tag   string,
      all_fee_rmb   double,
      cod_fee_rmb   double,
      freight_rmb   double,
      meterage_weight_qty_kg   double,
      real_weight_qty_kg   double,
      addresseeaoicode   string,
      addresseeaoitype   string,
      service_prod_code   array<string>,
      freight_payment_type_code   string,
      freight_monthly_acct_code   string,
      inc_day   string
    )
    ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
    stored as textfile;
    
    
    desc formatted dwd_ids_test.dwd_order_base_info_dtl_di;
    
    

    dfs -ls hdfs://test-cluster/user/hive/warehouse/dwd_ids_test.db/dwd_order_base_info_dtl_di;
    dfs -rm -f hdfs://test-cluster/user/hive/warehouse/dwd_ids_test.db/dwd_order_base_info_dtl_di/*;

    
    

    hdfs dfs -put /home/jasonapp/dwd_order_base_info_dtl_di*.json hdfs://test-cluster/user/hive/warehouse/dwd_ids_test.db/dwd_order_base_info_dtl_di;

    select * from dwd_ids_test.dwd_order_base_info_dtl_di limit 1;

    select * from dwd_ids_test.dwd_order_base_info_dtl_di where inc_day='20200103' limit 1;

    set hive.exec.dynamic.partition=true;
    set hive.exec.dynamic.partition.mode=nonstrict;
    set hive.fetch.task.conversion=more;
    set hive.exec.parallel=true;
    set mapreduce.output.fileoutputformat.compress.codec=org.apache.hadoop.io.compress.SnappyCodec;
    set mapreduce.output.fileoutputformat.compress.type=BLOCK;
    
    DROP TABLE IF EXISTS dwd_ids_test.dwd_order_base_info_dtl_di_parquet;
    CREATE EXTERNAL TABLE IF NOT EXISTS dwd_ids_test.dwd_order_base_info_dtl_di_parquet(
      order_id   string,
      order_no   string,
      express_flow_code   string,
      source_zone_code   string,
      dest_zone_code   string,
      src_dist_code   string,
      src_city_code   string,
      src_county   string,
      src_division_code   string,
      src_area_code   string,
      src_hq_code   string,
      src_type_code   string,
      dest_dist_code   string,
      dest_city_code   string,
      dest_county   string,
      dest_division_code   string,
      dest_area_code   string,
      dest_hq_code   string,
      dest_type_code   string,
      src_lgt   string,
      src_lat   string,
      dest_lgt   string,
      dest_lat   string,
      meterage_weight_qty   string,
      real_weight_qty   string,
      quantity   double,
      consignee_emp_code   string,
      consigned_tm   string,
      deliver_emp_code   string,
      signer_name   string,
      signin_tm   string,
      cargo_type_code   string,
      limit_type_code   string,
      distance_type_code   string,
      transport_type_code   string,
      express_type_code   string,
      ackbill_type_code   string,
      volume   string,
      bill_long   string,
      bill_width   string,
      bill_high   string,
      version_no   string,
      suff_flag   string,
      unit_weight   string,
      cons_value   string,
      cons_value_currency_code   string,
      product_code   string,
      airport_code   string,
      order_type   string,
      input_tm   string,
      inputer_emp_code   string,
      input_tm_gmt   string,
      modified_tm   string,
      order_remark   string,
      order_no   string,
      contacts_id   string,
      consignor_comp_name   string,
      consignor_addr   string,
      consignor_phone   string,
      consignor_cont_name   string,
      consignor_mobile   string,
      consignee_comp_name   string,
      consignee_addr   string,
      consignee_phone   string,
      consignee_cont_name   string,
      consignee_mobile   string,
      consignor_addr_native   string,
      consignee_addr_native   string,
      addressee_team_code   string,
      parent_order_no   string,
      order_tag   string,
      all_fee_rmb   double,
      cod_fee_rmb   double,
      freight_rmb   double,
      meterage_weight_qty_kg   double,
      real_weight_qty_kg   double,
      addresseeaoicode   string,
      addresseeaoitype   string,
      service_prod_code   array<string>,
      freight_payment_type_code   string,
      freight_monthly_acct_code   string
    )
    PARTITIONED BY (
      inc_day string COMMENT 'inc_day used by partition'
    )
    STORED AS parquet
    TBLPROPERTIES('parquet.compression'='SNAPPY');
    
    INSERT OVERWRITE TABLE dwd_ids_test.dwd_order_base_info_dtl_di_parquet PARTITION (inc_day)
    select * from dwd_ids_test.dwd_order_base_info_dtl_di;
    
    dfs -ls  hdfs://test-cluster/user/hive/warehouse/dwd_ids_test.db/dwd_order_base_info_dtl_di_parquet;
    
    
    ALTER TABLE dwd_ids_test.dwd_order_base_info_dtl_di_parquet PARTITION (inc_day='20200101') RENAME TO PARTITION (inc_day='20201125');
    ALTER TABLE dwd_ids_test.dwd_order_base_info_dtl_di_parquet PARTITION (inc_day='20200102') RENAME TO PARTITION (inc_day='20201126');
    ALTER TABLE dwd_ids_test.dwd_order_base_info_dtl_di_parquet PARTITION (inc_day='20200103') RENAME TO PARTITION (inc_day='20201127');
    ALTER TABLE dwd_ids_test.dwd_order_base_info_dtl_di_parquet PARTITION (inc_day='20200104') RENAME TO PARTITION (inc_day='20201128');
    ALTER TABLE dwd_ids_test.dwd_order_base_info_dtl_di_parquet PARTITION (inc_day='20200105') RENAME TO PARTITION (inc_day='20201129');
    --------订单宽表------dwd.dwd_pub_order_dtl_di
    add jar /home/jasonapp/json-serde-1.3.7-jar-with-dependencies.jar;
    drop table if exists dwd_ids_test.dwd_pub_order_dtl_di;
    create table if not exists dwd_ids_test.dwd_pub_order_dtl_di(
    inner_order_no string,
    src_order_no string,
    src_sys_type string,
    src_sys_name string,
    src_subsys_code string,
    order_type_code string,
    access_code string,
    online_chnl_code string,
    online_chnl_name string,
    pickup_type_code string,
    order_status_code string,
    pay_type_code string,
    order_no array<string>, --array<string>  string
    client_code string,
    client_name string,
    monthly_card_no string,
    sender_name string,
    sender_tel string,
    sender_mobile string,
    sender_company string,
    sender_city_code string,
    sender_province_name string,
    sender_city_name string,
    sender_area_name string,
    sender_address string,
    receiver_name string,
    receiver_tel string,
    receiver_mobile string,
    receiver_company string,
    receiver_city_code string,
    receiver_province_name string,
    receiver_city_name string,
    receiver_area_name string,
    receiver_address string,
    cargo_name string,
    cargo_quantity double,
    declared_value_amt double,
    declared_currency_code string,
    is_insurance_flag int,
    insurance_amt double,
    est_weight double,
    est_price double,
    is_cod_flag int,
    cod_card_no string,
    cod_amt double,
    is_signback_flag int,
    is_under_call int,
    call_tm string,
    appoint_start_tm string,
    appoint_finish_tm string,
    appoint_pickup_tm string,
    latest_pickup_tm string,
    appoint_mobile string,
    appoint_name string,
    appoint_city string,
    pickup_emp_code string,
    dept_code string,
    unitarea_code string,
    aoi_code string,
    is_cancel_flag int,
    cancel_reason string,
    cancel_tm string,
    order_ip string,
    limit_type_code string,
    limit_type_name string,
    create_tm string,
    order_tm string,
    modify_tm string,
    is_photo_flag int,
    member_id string,
    load_tm string,
    cx_order_entry string,
    inc_day string
    )
    ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
    stored as textfile;
    
    
    
    desc formatted dwd_ids_test.dwd_pub_order_dtl_di;
    
    

    dfs -ls hdfs://test-cluster/user/hive/warehouse/dwd_ids_test.db/dwd_pub_order_dtl_di;
    dfs -rm -f hdfs://test-cluster/user/hive/warehouse/dwd_ids_test.db/dwd_pub_order_dtl_di/*;

    
    

    hdfs dfs -put /home/jasonapp/dwd_pub_order_dtl_di*.json hdfs://test-cluster/user/hive/warehouse/dwd_ids_test.db/dwd_pub_order_dtl_di;

    select * from dwd_ids_test.dwd_pub_order_dtl_di limit 1;
    set hive.exec.dynamic.partition=true;
    set hive.exec.dynamic.partition.mode=nonstrict;
    set hive.fetch.task.conversion=more;
    set hive.exec.parallel=true;
    set mapreduce.output.fileoutputformat.compress.codec=org.apache.hadoop.io.compress.SnappyCodec;
    set mapreduce.output.fileoutputformat.compress.type=BLOCK;
    
    DROP TABLE IF EXISTS dwd_ids_test.dwd_pub_order_dtl_di_parquet;
    CREATE EXTERNAL TABLE IF NOT EXISTS dwd_ids_test.dwd_pub_order_dtl_di_parquet(
    inner_order_no string,
    src_order_no string,
    src_sys_type string,
    src_sys_name string,
    src_subsys_code string,
    order_type_code string,
    access_code string,
    online_chnl_code string,
    online_chnl_name string,
    pickup_type_code string,
    order_status_code string,
    pay_type_code string,
    order_no array<string>, --array<string>  string
    client_code string,
    client_name string,
    monthly_card_no string,
    sender_name string,
    sender_tel string,
    sender_mobile string,
    sender_company string,
    sender_city_code string,
    sender_province_name string,
    sender_city_name string,
    sender_area_name string,
    sender_address string,
    receiver_name string,
    receiver_tel string,
    receiver_mobile string,
    receiver_company string,
    receiver_city_code string,
    receiver_province_name string,
    receiver_city_name string,
    receiver_area_name string,
    receiver_address string,
    cargo_name string,
    cargo_quantity double,
    declared_value_amt double,
    declared_currency_code string,
    is_insurance_flag int,
    insurance_amt double,
    est_weight double,
    est_price double,
    is_cod_flag int,
    cod_card_no string,
    cod_amt double,
    is_signback_flag int,
    is_under_call int,
    call_tm string,
    appoint_start_tm string,
    appoint_finish_tm string,
    appoint_pickup_tm string,
    latest_pickup_tm string,
    appoint_mobile string,
    appoint_name string,
    appoint_city string,
    pickup_emp_code string,
    dept_code string,
    unitarea_code string,
    aoi_code string,
    is_cancel_flag int,
    cancel_reason string,
    cancel_tm string,
    order_ip string,
    limit_type_code string,
    limit_type_name string,
    create_tm string,
    order_tm string,
    modify_tm string,
    is_photo_flag int,
    member_id string,
    load_tm string,
    cx_order_entry string
    )
    PARTITIONED BY (
      inc_day string COMMENT 'inc_day used by partition'
    )
    STORED AS parquet
    TBLPROPERTIES('parquet.compression'='SNAPPY');
    
    INSERT OVERWRITE TABLE dwd_ids_test.dwd_pub_order_dtl_di_parquet PARTITION (inc_day)
    select * from dwd_ids_test.dwd_pub_order_dtl_di;
    
    dfs -ls hdfs://test-cluster/user/hive/warehouse/dwd_ids_test.db/dwd_pub_order_dtl_di_parquet;
    
    
    ALTER TABLE dwd_ids_test.dwd_pub_order_dtl_di_parquet PARTITION (inc_day='20200101') RENAME TO PARTITION (inc_day='20201125');
    ALTER TABLE dwd_ids_test.dwd_pub_order_dtl_di_parquet PARTITION (inc_day='20200102') RENAME TO PARTITION (inc_day='20201126');
    ALTER TABLE dwd_ids_test.dwd_pub_order_dtl_di_parquet PARTITION (inc_day='20200103') RENAME TO PARTITION (inc_day='20201127');
    ALTER TABLE dwd_ids_test.dwd_pub_order_dtl_di_parquet PARTITION (inc_day='20200104') RENAME TO PARTITION (inc_day='20201128');
    ALTER TABLE dwd_ids_test.dwd_pub_order_dtl_di_parquet PARTITION (inc_day='20200105') RENAME TO PARTITION (inc_day='20201129');
    
    
    select * from dwd_ids_test.dwd_pub_order_dtl_di_parquet limit 11;
    
    select * from dwd_ids_test.dwd_pub_order_dtl_di limit 11;
    ----------日期维表------dim_ids_test.dim_calendar
    add jar /home/jasonapp/json-serde-1.3.7-jar-with-dependencies.jar;
    drop table if exists dim_ids_test.dim_calendar;
    create table if not exists dim_ids_test.dim_calendar(
    day_wid string,
    day_name string,
    day_date_chn string,
    day_date string,
    day_name_of_week string,
    day_of_week string,
    day_of_month string,
    day_of_year string,
    week_wid string,
    week_name string,
    week_no string,
    week_start_date_wid string,
    week_start_date string,
    week_end_date_wid string,
    week_end_date string,
    month_wid string,
    month_name string,
    month_no string,
    month_days string,
    month_start_date_wid string,
    month_start_date string,
    month_end_date_wid string,
    month_end_date string,
    quarter_wid string,
    quarter_name string,
    quarter_no string,
    quarter_start_date_wid string,
    quarter_start_date string,
    quarter_end_date_wid string,
    quarter_end_date string,
    year_wid string,
    year_name string,
    year_start_date_wid string,
    year_start_date string,
    year_end_date_wid string,
    year_end_date string,
    is_last_day_of_week string,
    is_last_day_of_month string,
    is_last_day_of_year string,
    is_weekend string,
    holiday_name string,
    day_ago_date_wid string,
    day_ago_date string,
    week_ago_date_wid string,
    week_ago_date string,
    month_ago_date_wid string,
    month_ago_date string,
    quarter_ago_date_wid string,
    quarter_ago_date string,
    year_ago_date_wid string,
    year_ago_date string,
    language string,
    w_insert_date string,
    w_update_date string,
    is_work string,
    is_business_peak string,
    public_holiday string,
    work_day double,
    inc_year string
    )
    ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
    stored as textfile;
    
    
    desc formatted dim_ids_test.dim_calendar;
    
    dfs -put /home/jasonapp/dim_calendar.json hdfs://test-cluster/user/hive/warehouse/dim_ids_test.db/dim_calendar;
    
    select * from dim_ids_test.dim_calendar limit 1;
    set hive.exec.dynamic.partition=true;
    set hive.exec.dynamic.partition.mode=nonstrict;
    set hive.fetch.task.conversion=more;
    set hive.exec.parallel=true;
    set mapreduce.output.fileoutputformat.compress.codec=org.apache.hadoop.io.compress.SnappyCodec;
    set mapreduce.output.fileoutputformat.compress.type=BLOCK;
    
    DROP TABLE IF EXISTS dim_ids_test.dim_calendar_parquet;
    CREATE EXTERNAL TABLE IF NOT EXISTS dim_ids_test.dim_calendar_parquet(
    day_wid string,
    day_name string,
    day_date_chn string,
    day_date string,
    day_name_of_week string,
    day_of_week string,
    day_of_month string,
    day_of_year string,
    week_wid string,
    week_name string,
    week_no string,
    week_start_date_wid string,
    week_start_date string,
    week_end_date_wid string,
    week_end_date string,
    month_wid string,
    month_name string,
    month_no string,
    month_days string,
    month_start_date_wid string,
    month_start_date string,
    month_end_date_wid string,
    month_end_date string,
    quarter_wid string,
    quarter_name string,
    quarter_no string,
    quarter_start_date_wid string,
    quarter_start_date string,
    quarter_end_date_wid string,
    quarter_end_date string,
    year_wid string,
    year_name string,
    year_start_date_wid string,
    year_start_date string,
    year_end_date_wid string,
    year_end_date string,
    is_last_day_of_week string,
    is_last_day_of_month string,
    is_last_day_of_year string,
    is_weekend string,
    holiday_name string,
    day_ago_date_wid string,
    day_ago_date string,
    week_ago_date_wid string,
    week_ago_date string,
    month_ago_date_wid string,
    month_ago_date string,
    quarter_ago_date_wid string,
    quarter_ago_date string,
    year_ago_date_wid string,
    year_ago_date string,
    language string,
    w_insert_date string,
    w_update_date string,
    is_work string,
    is_business_peak string,
    public_holiday string,
    work_day double
    )
    PARTITIONED BY (
      inc_year string COMMENT 'inc_year used by partition'
    )
    STORED AS parquet
    TBLPROPERTIES('parquet.compression'='SNAPPY');
    
    INSERT OVERWRITE TABLE dim_ids_test.dim_calendar_parquet PARTITION (inc_year)
    select * from dim_ids_test.dim_calendar;
    ----------城市维表------dim_ids_test.dim_city
    add jar /home/jasonapp/json-serde-1.3.7-jar-with-dependencies.jar;
    drop table if exists dim_ids_test.dim_city;
    create table if not exists dim_ids_test.dim_city(
    dist_id string,
    dist_code string,
    dist_name string,
    dist_alias string,
    type_code string,
    type_name string,
    county_code string,
    county_name string,
    city_code string,
    city_name string,
    province_code string,
    province_name string,
    country_code string,
    country_name string,
    fbq_area_code string,
    ywq_area_code string,
    phone_area_code string,
    zip_area_code string,
    city_level string,
    city_desc string,
    currency_code string,
    weight_unit string,
    capital_status string,
    provincial_status string,
    valid_flag string,
    created_emp_code string,
    created_time string,
    modified_emp_code string,
    modified_time string,
    fbq_area_name string,
    ywq_area_name string,
    parent_dist_code string,
    inc_day string
    )
    ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
    stored as textfile;
    
    
    desc formatted dim_ids_test.dim_city;
    
    

    dfs -ls hdfs://test-cluster/user/hive/warehouse/dim_ids_test.db/dim_city;
    dfs -rm -f hdfs://test-cluster/user/hive/warehouse/dim_ids_test.db/dim_city/*;

    
    

    hdfs dfs -put /home/jasonapp/dim_city.*.json hdfs://test-cluster/user/hive/warehouse/dim_ids_test.db/dim_city;

    select * from dim_ids_test.dim_city limit 1;

    select * from dim_ids_test.dim_city where inc_day='20200103' limit 1;

    set hive.exec.dynamic.partition=true;
    set hive.exec.dynamic.partition.mode=nonstrict;
    set hive.fetch.task.conversion=more;
    set hive.exec.parallel=true;
    set mapreduce.output.fileoutputformat.compress.codec=org.apache.hadoop.io.compress.SnappyCodec;
    set mapreduce.output.fileoutputformat.compress.type=BLOCK;
    
    DROP TABLE IF EXISTS dim_ids_test.dim_city_parquet;
    CREATE EXTERNAL TABLE IF NOT EXISTS dim_ids_test.dim_city_parquet(
    dist_id string,
    dist_code string,
    dist_name string,
    dist_alias string,
    type_code string,
    type_name string,
    county_code string,
    county_name string,
    city_code string,
    city_name string,
    province_code string,
    province_name string,
    country_code string,
    country_name string,
    fbq_area_code string,
    ywq_area_code string,
    phone_area_code string,
    zip_area_code string,
    city_level string,
    city_desc string,
    currency_code string,
    weight_unit string,
    capital_status string,
    provincial_status string,
    valid_flag string,
    created_emp_code string,
    created_time string,
    modified_emp_code string,
    modified_time string,
    fbq_area_name string,
    ywq_area_name string,
    parent_dist_code string
    )
    PARTITIONED BY (
      inc_day string COMMENT 'inc_day used by partition'
    )
    STORED AS parquet
    TBLPROPERTIES('parquet.compression'='SNAPPY');
    
    INSERT OVERWRITE TABLE dim_ids_test.dim_city_parquet PARTITION (inc_day)
    select * from dim_ids_test.dim_city;
    
    
    ALTER TABLE dim_ids_test.dim_city_parquet PARTITION (inc_day='20200101') RENAME TO PARTITION (inc_day='20201125');
    ALTER TABLE dim_ids_test.dim_city_parquet PARTITION (inc_day='20200102') RENAME TO PARTITION (inc_day='20201126');
    ALTER TABLE dim_ids_test.dim_city_parquet PARTITION (inc_day='20200103') RENAME TO PARTITION (inc_day='20201127');
    ALTER TABLE dim_ids_test.dim_city_parquet PARTITION (inc_day='20200104') RENAME TO PARTITION (inc_day='20201128');
    ALTER TABLE dim_ids_test.dim_city_parquet PARTITION (inc_day='20200105') RENAME TO PARTITION (inc_day='20201129');
    ----------SAP公司分公司信息表维表------dim_ids_test.dim_company
    add jar /home/jasonapp/json-serde-1.3.7-jar-with-dependencies.jar;
    drop table if exists dim_ids_test.dim_company;
    create table if not exists dim_ids_test.dim_company(
    comp_code string,
    comp_name string,
    city string,
    area string,
    currency string,
    spras string,
    ktopl string,
    waabw string,
    periv string,
    kokfi string,
    rcomp string,
    adrnr string,
    fikrs string,
    xfmca string,
    fmhrdate string,
    xfdis string,
    xvalv string,
    xgjrv string,
    xfdmm string,
    bukrs_glob string,
    fstva string,
    opvar string,
    xcovr string,
    mwskv string,
    mwska string,
    xnegp string,
    fstvare string,
    offsacct string,
    xcos string,
    xsplt string,
    dw_insert_tm string,
    dw_update_tm string,
    dw_datasource_code string
    )
    ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
    stored as textfile;
    
    
    desc formatted dim_ids_test.dim_company;
    
    dfs -put /home/jasonapp/dim_company.json hdfs://test-cluster/user/hive/warehouse/dim_ids_test.db/dim_company;
    
    select * from dim_ids_test.dim_company limit 1;
    drop table if exists dim_ids_test.dim_company_parquet;
    create table if not exists dim_ids_test.dim_company_parquet(
    comp_code string,
    comp_name string,
    city string,
    area string,
    currency string,
    spras string,
    ktopl string,
    waabw string,
    periv string,
    kokfi string,
    rcomp string,
    adrnr string,
    fikrs string,
    xfmca string,
    fmhrdate string,
    xfdis string,
    xvalv string,
    xgjrv string,
    xfdmm string,
    bukrs_glob string,
    fstva string,
    opvar string,
    xcovr string,
    mwskv string,
    mwska string,
    xnegp string,
    fstvare string,
    offsacct string,
    xcos string,
    xsplt string,
    dw_insert_tm string,
    dw_update_tm string,
    dw_datasource_code string
    )
    STORED AS parquet
    TBLPROPERTIES('parquet.compression'='SNAPPY');
    
    INSERT OVERWRITE TABLE dim_ids_test.dim_company_parquet
    select * from dim_ids_test.dim_company;

    增加分区,并将数据copy到分区,使用调度平台,每日增量分区数据

    ALTER TABLE dim_ids_test.dim_city_parquet PARTITION (inc_day='${dt1}') RENAME TO PARTITION (inc_day='${dt2}');
    ALTER TABLE dwd_ids_test.dwd_pub_order_dtl_di_parquet PARTITION (inc_day='${dt1}') RENAME TO PARTITION (inc_day='${dt2}');
    ALTER TABLE dwd_ids_test.dwd_order_base_info_dtl_di_parquet PARTITION (inc_day='${dt1}') RENAME TO PARTITION (inc_day='${dt2}');
    
    
    
    ALTER TABLE dim_ids_test.dim_city_parquet ADD PARTITION (inc_day='20201122') PARTITION (inc_day='20201123') PARTITION (inc_day='20201124');
    ALTER TABLE dwd_ids_test.dwd_pub_order_dtl_di_parquet ADD PARTITION (inc_day='20201122') PARTITION (inc_day='20201123') PARTITION (inc_day='20201124');
    ALTER TABLE dwd_ids_test.dwd_order_base_info_dtl_di_parquet ADD PARTITION (inc_day='20201122') PARTITION (inc_day='20201123') PARTITION (inc_day='20201124');
    
    
    dfs -cp hdfs://test-cluster/user/hive/warehouse/dim_ids_test.db/dim_city_parquet/inc_day=20200101/* hdfs://test-cluster/user/hive/warehouse/dim_ids_test.db/dim_city_parquet/inc_day=20201122;
    dfs -cp hdfs://test-cluster/user/hive/warehouse/dim_ids_test.db/dim_city_parquet/inc_day=20200102/* hdfs://test-cluster/user/hive/warehouse/dim_ids_test.db/dim_city_parquet/inc_day=20201123;
    dfs -cp hdfs://test-cluster/user/hive/warehouse/dim_ids_test.db/dim_city_parquet/inc_day=20200103/* hdfs://test-cluster/user/hive/warehouse/dim_ids_test.db/dim_city_parquet/inc_day=20201124;
    dfs -ls  hdfs://test-cluster/user/hive/warehouse/dim_ids_test.db/dim_city_parquet;
    
    
    dfs -cp hdfs://test-cluster/user/hive/warehouse/dwd_ids_test.db/dwd_pub_order_dtl_di_parquet/inc_day=20200101/* hdfs://test-cluster/user/hive/warehouse/dwd_ids_test.db/dwd_pub_order_dtl_di_parquet/inc_day=20201122;
    dfs -cp hdfs://test-cluster/user/hive/warehouse/dwd_ids_test.db/dwd_pub_order_dtl_di_parquet/inc_day=20200102/* hdfs://test-cluster/user/hive/warehouse/dwd_ids_test.db/dwd_pub_order_dtl_di_parquet/inc_day=20201123;
    dfs -cp hdfs://test-cluster/user/hive/warehouse/dwd_ids_test.db/dwd_pub_order_dtl_di_parquet/inc_day=20200103/* hdfs://test-cluster/user/hive/warehouse/dwd_ids_test.db/dwd_pub_order_dtl_di_parquet/inc_day=20201124;
    dfs -ls  hdfs://test-cluster/user/hive/warehouse/dwd_ids_test.db/dwd_pub_order_dtl_di_parquet/*;
    
    
    dfs -cp hdfs://test-cluster/user/hive/warehouse/dwd_ids_test.db/dwd_order_base_info_dtl_di_parquet/inc_day=20200101/* hdfs://test-cluster/user/hive/warehouse/dwd_ids_test.db/dwd_order_base_info_dtl_di_parquet/inc_day=20201122;
    dfs -cp hdfs://test-cluster/user/hive/warehouse/dwd_ids_test.db/dwd_order_base_info_dtl_di_parquet/inc_day=20200102/* hdfs://test-cluster/user/hive/warehouse/dwd_ids_test.db/dwd_order_base_info_dtl_di_parquet/inc_day=20201123;
    dfs -cp hdfs://test-cluster/user/hive/warehouse/dwd_ids_test.db/dwd_order_base_info_dtl_di_parquet/inc_day=20200103/* hdfs://test-cluster/user/hive/warehouse/dwd_ids_test.db/dwd_order_base_info_dtl_di_parquet/inc_day=20201124;
    dfs -ls  hdfs://test-cluster/user/hive/warehouse/dwd_ids_test.db/dwd_order_base_info_dtl_di_parquet/*;
    
    
    --按年inc_year分区
    show partitions dim_ids_test.dim_calendar_parquet;   
    --无分区
    show partitions dim_ids_test.dim_company_parquet;    
    --按天inc_day分区
    show partitions dim_ids_test.dim_city_parquet;       
    
    --按天inc_day分区
    show partitions dwd_ids_test.dwd_pub_order_dtl_di_parquet;      
    --按天inc_day分区            
    show partitions dwd_ids_test.dwd_order_base_info_dtl_di_parquet;          
  • 相关阅读:
    LeetCode:删除有序数组中的重复项
    ABAP新语法之内联声明
    SAL实战练习-全选及按钮事件等
    固定资产创建BAPI_FIXEDASSET_CREATE--含扩展结构字段EXTENSIONIN
    SAP-采购订单-数据输入校验
    外围系统传SAP---OUT2SAP接口测试
    SAP2OUT异步接口测试
    SAP2OUT同步接口测试
    SAP-批量创建货源清单
    BDC-用户锁定及有效期设置程序
  • 原文地址:https://www.cnblogs.com/LIAOBO/p/14046449.html
Copyright © 2011-2022 走看看