zoukankan      html  css  js  c++  java
  • hive不分区增量更新

    insert overwrite table ods.zeg_so 
    select 
    *,
    case when zsm.id is not null then cast(current_timestamp as string) else zs.etl_update end etl_update 
    from ods.zeg_so_mid zsm 
    full join ods.zeg_so zs on zsm.id=zs.id
    ----------------------------------------------------------
    insert overwrite table data_center.test_no_partition 
    select tmp.id,tmp.name,tmp.age
    from tmp.temp_test_no_partition tmp
    full join data_center.test_no_partition org
    on tmp.id=org.id

    #hive全连接


    insert overwrite table data_center.test_no_partition 
    select tmp.id,tmp.name,tmp.age from tmp.temp_test_no_partition tmp 
    union all 
    select org.id,org.name,org.age from data_center.test_no_partition org 
    left outer join tmp.temp_test_no_partition b 
    on org.id=b.id
    -------------------------------------------------------------- 
    insert overwrite table data_center.test_no_partition 
    select tmp.id,tmp.name,tmp.age from tmp.temp_test_no_partition tmp 
    union all 
    select b.id,b.name,b.age from data_center.test_no_partition org 
    left outer join tmp.temp_test_no_partition b 
    on org.id=b.id
    ----------------------------------------------------------------
    insert overwrite table data_center.test_no_partition 
    select tmp.id,tmp.name,tmp.age from tmp.temp_test_no_partition tmp
    full join data_center.test_no_partition org on tmp.id=org.id
    --------------------------------------------------------------------
    insert overwrite table data_center.test_no_partition 
    select org.id,org.name,org.age from tmp.temp_test_no_partition tmp
    full join data_center.test_no_partition org on tmp.id=org.id
    -------------------------------------------------------------------------------- 
    不分区增量更新 OK
    insert overwrite table data_center.test_no_partition 
    select tmp.id,tmp.name,tmp.age from tmp.temp_test_no_partition tmp 
    union all 
    select org.id,org.name,org.age from data_center.test_no_partition org 
    left outer join tmp.temp_test_no_partition b 
    on org.id=b.id
    where b.id is null;

    分区:增量更新
    INSERT OVERWRITE TABLE %s.%s PARTITION(%s) select %s from %s''' % (job_info_map["w_database"], job_info_map["w_table"], job_info_map["w_partition_name"],
    colums_str, "temp." + job_info_map["temp_table"])

    hive_sql = "set hive.exec.dynamic.partition.mode=nonstrict;"
    hive_sql += "set hive.exec.max.dynamic.partitions.pernode=1000;"
    hive_sql += "set mapreduce.reduce.shuffle.input.buffer.percent=0.5;"
    hive_sql += "INSERT OVERWRITE TABLE " + hive_table + " PARTITION(" + job_info_map["w_partition_name"] + ")" + " SELECT " + colums_str + " FROM " + temp_table + " UNION ALL SELECT a.* FROM " + hive_table + " a LEFT OUTER JOIN " + temp_table + " b on "

    #增量取数据条件
    update_time>=subdate(current_date,1)
    COALESCE(update_time,create_Time)>=subdate(current_date,1)

    -------------------------------------------------------------------
    #分区增量更新语句:
    insert overwrite table data_center.test_partition partition(date_id='2017-06-13')
    select tmp.id,tmp.name,tmp.age from tmp.temp_test_partition tmp
    union all select org.id,org.name,org.age from data_center.test_partition org
    left outer join tmp.temp_test_partition b
    on org.id=b.id
    where b.id is null;
    报错:SemanticException [Error 10044]: Line 1:23 Cannot insert into target table because column number/types are different ''2017-06-13'': Table insclause-0 has 3 columns, 
    but query has 4 columns

    CREATE TABLE `tmp.temp_test_partition`( 
    `id` int, 
    `name` 
    string, 
    `age` int) 
    ROW FORMAT SERDE 
    'org.apache.hadoop.hive.serde2.l 
    azy.LazySimpleSerDe' 
    WITH SERDEPROPERTIES ( 'field.delim'=',', 'serialization.format'=',') 
    STORED AS INPUTFORMAT 
    'org.apache.hadoop.mapred.TextInputF 
    ormat' 
    OUTPUTFORMAT 
    'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFor 
    mat' 
    LOCATION 
    'hdfs://master:9000/user/hive/warehouse/tmp.db/temp_test_partiti 
    on' TBLPROPERTIES ( 'transient_lastDdlTime'='1497492633')


    alter table tmp.temp_test_partition set serdeproperties ('field.delim'=',','serialization.format'=',')

    #分区增量更新语句:OK
    set hive.exec.dynamic.partition.mode=nonstrict;
    set hive.exec.max.dynamic.partitions.pernode=1000;
    set mapreduce.reduce.shuffle.input.buffer.percent=0.5;
    insert overwrite table data_center.test_partition partition(date_id) 
    select tmp.id,tmp.name,tmp.age,'2017-06-15' as date_id from tmp.temp_test_partition tmp 
    union all select a.* from data_center.test_partition a
    left outer join tmp.temp_test_partition b on a.id=b.id where b.id is null and a.date_id in ('2017-06-14','2017-06-15','2017-06-16');


    #删除部分数据
    set hive.exec.dynamic.partition.mode=nonstrict;
    set hive.exec.max.dynamic.partitions.pernode=1000;
    set mapreduce.reduce.shuffle.input.buffer.percent=0.5;
    insert overwrite table data_center.test_partition partition (date_id) select id,name,age,date_id from data_center.test_partition limit 2;


    set hive.exec.dynamic.partition.mode=nonstrict;
    set hive.exec.max.dynamic.partitions.pernode=1000;
    set mapreduce.reduce.shuffle.input.buffer.percent=0.5;
    insert overwrite table data_center.test_partition partition (date_id) select id,name,age,date_id from data_center.test_partition where name in ('lisi','ccc')

  • 相关阅读:
    P4611 [COCI2011-2012#7] TRAMPOLIN
    P3119 [USACO15JAN]草鉴定Grass Cownoisseur
    P4417 [COCI2006-2007#2] STOL
    P4645 [COCI2006-2007 Contest#3] BICIKLI
    P1155 双栈排序
    P4610 [COCI2011-2012#7] KAMPANJA
    P4329 [COCI2006-2007#1] Bond
    EZOJ #227
    EZOJ #226
    p4980 polya定理
  • 原文地址:https://www.cnblogs.com/ngy0217/p/10051364.html
Copyright © 2011-2022 走看看