zoukankan      html  css  js  c++  java
  • shell脚本中向hive动态分区插入数据

    在hive上建表与普通分区表创建方法一样;

     1 CREATE  TABLE `dwa_m_user_association_circle`(
     2   `device_number` string, 
     3   `oppo_number` string, 
     4   `prov_id_oppo` string, 
     5   `area_id_oppo` string, 
     6   `dealer_oppo` string, 
     7   `short_call_nums` bigint, 
     8   `long3_call_nums` bigint, 
     9   `long5_call_nums` bigint, 
    10   `long10_call_nums` bigint, 
    11   `short_total_nums` bigint, 
    12   `long3_total_nums` bigint, 
    13   `long5_total_nums` bigint, 
    14   `long10_total_nums` bigint, 
    15   `area_id` string)
    16 PARTITIONED BY ( 
    17   `month_id` string, 
    18   `prov_id` string, 
    19   `dealer` string)
    20 ROW FORMAT DELIMITED 
    21   FIELDS TERMINATED BY '|' 
    22   NULL DEFINED AS '' 
    23 STORED AS INPUTFORMAT 
    24   'org.apache.hadoop.mapred.TextInputFormat' 
    25 OUTPUTFORMAT 
    26   'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
    27 LOCATION
    28   'hdfs://beh/user/hive/warehouse/all_ana_pro.db/dwa_m_user_association_circle'
    29 TBLPROPERTIES (
    30   'numPartitions'='248', 
    31   'numFiles'='648', 
    32   'transient_lastDdlTime'='1542952067', 
    33   'totalSize'='247584222644', 
    34   'numRows'='0', 
    35   'rawDataSize'='0')

    在shell脚本中,需设置的参数: 

    set hive.exec.dynamic.partition=true;                     #开启动态分区,默认是false
    set hive.exec.dynamic.partition.mode=nostrict;      #开启允许所有分区都是动态的,否则必须要有静态分区才能使用。

    set hive.exec.max.created.files=1000000;             #允许创建的最大文件数,当分区是2个或三个分区时,文件会被分成很多小文件,该设置就是将文件的最大数目设成100w;

     1 insert overwrite table ALL_ANA_PRO.dwa_m_user_association_circle  PARTITION (month_id=${v_month},prov_id,dealer) 
     2 select device_number,
     3         oppo_number  ,
     4         prov_id_oppo ,
     5         area_id_oppo ,
     6         dealer_oppo ,
     7         short_call_nums,
     8         long3_call_nums,
     9         long5_call_nums,
    10         long10_call_nums,
    11         sum(short_call_nums) over(distribute by device_number)  short_total_nums,
    12         sum(long3_call_nums ) over(distribute by device_number) long3_total_nums,
    13         sum(long5_call_nums) over(distribute by  device_number) long5_total_nums,
    14         sum(long10_call_nums) over(distribute by device_number) long10_total_nums,
    15         area_id      ,
    16         prov_id      ,
    17         dealer   
    18 from 
    19         (SELECT device_number,
    20                 prov_id      ,
    21                 area_id      ,
    22                 dealer       ,
    23                 oppo_number  ,
    24                 prov_id_oppo ,
    25                 area_id_oppo ,
    26                 dealer_oppo  ,
    27                 sum(case when t.bill_times < 60 then 1 else 0 end ) short_call_nums,
    28                 sum(case when t.bill_times >180 then 1 else 0 end ) long3_call_nums,
    29                 sum(case when t.bill_times >300 then 1 else 0 end ) long5_call_nums,
    30                 sum(case when t.bill_times >600 then 1 else 0 end ) long10_call_nums
    31         FROM ( SELECT prov_id      ,
    32                       area_id      ,
    33                       device_number,
    34                       0 dealer     ,
    35                       prov_id_oppo ,
    36                       area_id_oppo ,
    37                       oppo_number  ,
    38                       dealer_oppo  ,
    39                       bill_times
    40         FROM   ALL_ANA_PRO.DWA_M_CALL_RING_BASE t
    41         WHERE  month_id = '${v_month}'
    42         AND    ticket_type = 1
    43         AND    dealer_oppo > -1
    44         UNION ALL
    45         SELECT prov_id_oppo prov_id      ,
    46               area_id_oppo area_id      ,
    47               oppo_number device_number ,
    48               dealer_oppo dealer        ,
    49               prov_id prov_id_oppo      ,
    50               area_id area_id_oppo      ,
    51               device_number oppo_number ,
    52               0 dealer_oppo             ,
    53               bill_times
    54         FROM   ALL_ANA_PRO.DWA_M_CALL_RING_BASE t
    55         WHERE  month_id = '${v_month}'
    56         AND    ticket_type = 1
    57         AND    dealer_oppo > -1
    58            ) t
    59         GROUP BY
    60               device_number,
    61               prov_id      ,
    62               area_id      ,
    63               dealer       ,
    64               oppo_number  ,
    65               prov_id_oppo ,
    66               area_id_oppo ,
    67               dealer_oppo
    68         ) t;

     这里,需要说明的是,向目标表插数:

    insert overwrite table ALL_ANA_PRO.dwa_m_user_association_circle  PARTITION (month_id=${v_month},prov_id,dealer)
    其中select语句中动态分区prov_id,dealer两个字段一般在最后面;

  • 相关阅读:
    SpringBoot学习历程
    日期和时间库Joda Time
    apache commons validator
    apache commons fileupload
    apache commons io
    apache commons compress
    apache commons codec
    apache commons email
    Http协议介绍
    Java原生Socket API
  • 原文地址:https://www.cnblogs.com/xiao02fang/p/10007008.html
Copyright © 2011-2022 走看看