zoukankan      html  css  js  c++  java
  • 华为云如何建表并创建作业定时调度抽取数据

    首先新建DLI SQL脚本

    DLI SQL是在hive、spark基础上封装的脚本工具

    创建库表并导入数据

    create table if not exists akc_dw.rpt_mkt_xf_promo_merchant_data_1h_share_test_case2
    (
    data_time_key string comment '调度执行计划的时间 yyyy-mm-dd hh:mi:ss',
    promo_activity_no string comment '营销活动编号',
    team_id string comment '分校校长id',
    merchant_id string comment '商家id',
    share_cnt bigint comment '转发次数'
    ) comment '销返看板数据之每个社群群主转发次数-指定商品维度'
    partitioned by (mt string)--定义分区类型
    stored as orc
    ;
    insert overwrite table akc_dw.rpt_mkt_xf_promo_merchant_data_1h_share_test_case2 partition (mt='${cyctime}')


    --计算已报名的分校校长人数,和分校校长转发次数
    SELECT
    to_date1('${cyctime}','yyyymmddhhmiss') as data_time_key--调度执行计划的时间,这个参数是提前在作业管理页面的配置管理中定义好的,当前时间就是计划调度时间
    ,sum(case when t4.team_id is not null then t1.share_cnt end )as share_cnt
    , t1.promo_activity_no
    ,t4.team_id--需要获取流量转发人的校长id
    ,t1.merchant_id
    FROM --获取流量关联的营销活动
    (select
    t1.union_id
    ,t1.buyer_id
    ,t1.distributor_id
    ,coalesce(t1.user_id,t3.user_id) as user_id--每一条转发记录对应的小bid,直接从t3店铺信息表取
    ,t1.user_code
    ,t1.activity_id
    ,t1.activity_name
    ,t1.product_id
    ,t1.product_name
    ,t1.brand_id
    ,t1.brand_name
    ,t1.share_cnt
    ,t2.promo_activity_no
    ,t2.promo_activity_type
    ,t2.promo_rank_type--1是销返2是满返
    ,t2.target_group
    ,t2.start_time
    ,t2.end_time
    ,t2.merchant_id
    ,t1.dt
    ,t1.shop_id
    from ( --获取流量数据
    select
    t1.union_id
    ,t1.buyer_id
    ,t1.distributor_id
    ,t1.user_id
    ,t1.user_code
    ,t1.activity_id
    ,t2.live_name as activity_name
    ,t1.product_id
    ,t3.product_name
    ,t3.brand_id
    ,t3.brand_name
    ,t1.share_cnt
    ,t1.dt
    ,t1.shop_id
    from
    (--获取某idol近30天的流量数据
    select
    t1.union_id
    ,t1.buyer_id
    ,t1.distributor_id
    ,t1.user_id
    ,t1.user_code
    ,if(event_type rlike 'click|share|add_to_cart' , click_activity_id,activity_id ) as activity_id
    ,if(event_type rlike 'click|share|add_to_cart' , click_product_id,product_id ) as product_id
    ,t1.dt
    ,t1.shop_id
    ,count(1) share_cnt--每一天每一个商品/活动用户的转发次数
    from
    akc_dw.edw_trfc_traffic_detail_all_plt_di_15m t1--流量表
    where
    t1.dt between to_char(dateadd(to_date1('${cyctime}','yyyymmddhhmiss'),-20,'dd'),'yyyymmdd') and '${gmtdate}'
    and t1.share_type = '商品'
    and t1.event_type = 'share'
    AND t1.etl_time<=to_date1('${cyctime}','yyyymmddhhmiss')--小于等于调度时间

    group by
    t1.union_id
    ,t1.buyer_id
    ,t1.distributor_id
    ,t1.user_id
    ,t1.user_code
    ,if( event_type rlike 'click|share|add_to_cart' , click_activity_id,activity_id )
    ,if( event_type rlike 'click|share|add_to_cart' , click_product_id,product_id )
    ,t1.dt
    ,t1.shop_id
    ) t1
    join
    (
    select live_id,live_name
    from akc_dw.frt_dim_live_info --活动表获得流量表活动名称
    ) t2
    on t1.activity_id = t2.live_id
    left join
    akdc.dim_product_info t3 --商品信息表获取流量表商品品牌信息
    on t1.product_id = t3.product_id
    )t1
    inner join
    (
    SELECT
    t1.product_skc_id as product_id
    ,t1.activity_id as activity_id
    ,t2.promo_activity_config_id as promo_activity_no --营销活动no
    ,t2.name as promo_activity_name --营销活动名称
    ,t2.type as promo_activity_type --营销活动类型 1排名 2满返
    ,t2.rank_type as promo_rank_type
    ,t2.start_time --营销活动开始时间
    ,t2.end_time --营销活动结束时间
    ,t2.after_sale_deadline --营销活动售后截止时间
    ,t2.target_group --营销活动参与的主体 1社群 2单店店主 3分校
    ,t2.target_gmv --营销活动的目标值
    ,t1.merchant_id
    ,t1.merchant_rate
    ,t1.platform_rate
    FROM
    akc_dw.frt_ods_treasurebox_akc_treasurebox_rule_activity_product_df t1--参与销返的活动的指定商品
    inner join
    akc_dw.frt_ods_treasurebox_akc_treasurebox_config_rule_df t2
    on t1.promo_activity_config_id = t2.promo_activity_config_id
    and t2.status = 1
    and t2.is_deleted = 0
    and t2.dt = max_pt('akc_dw.frt_ods_treasurebox_akc_treasurebox_config_rule_df')
    where
    t1.dt = max_pt('akc_dw.frt_ods_treasurebox_akc_treasurebox_rule_activity_product_df')
    and t1.status = 1--启用状态
    and t1.is_deleted = 0 --未删除
    )t2
    on t1.product_id = t2.product_id
    and t1.activity_id = t2.activity_id
    left join
    akc_dw.dim_shop_info t3--为了获取流量数据是店铺维度下所有人的转发
    on t1.shop_id = t3.shop_id
    where
    coalesce(t1.user_id,t3.user_id) is not null--如果是爱豆自己转发则取爱豆自己的user_id,否则取爱豆店主下的其他人转发
    and t1.dt between to_char(t2.start_time,'yyyymmdd') and to_char(t2.end_time,'yyyymmdd')
    and t2.target_group = 1--营销主体为分校
    and t2.promo_activity_type = 1 --限定是排名的营销活动
    )t1
    left join
    (--获得社群战队的数据
    select
    user_id
    ,depart_id as team_id
    ,depart_name as team_name
    from
    akcdw.user_code_fish_group_channel
    where dt = max_pt('akcdw.user_code_fish_group_channel')
    and channel = '战队'
    ) t4
    on t1.user_id = t4.user_id
    group by
    t1.promo_activity_no
    ,t4.team_id
    ,t1.merchant_id

     提交保存脚本,脚本名字和表名要一致

     

    作业管理创建作业目录

     

    新建作业

     

    然后进入新建作业界面

     点击添加的dli sql编辑作业信息

    ...中搜索脚本名称即表名即可

     配置自动调度

    并发数选100,意思是截止上次失败后多少次内还会继续执行

  • 相关阅读:
    fiddler抓取https请求(android/ios)
    AngularJs_自定义注入对象_笔记1
    freeRTOS中文实用教程3--中断管理之中断嵌套
    freeRTOS中文实用教程3--中断管理之中断服务例程中使用队列
    freeRTOS中文实用教程3--中断管理之计数信号量
    MCS-51单片机存储地址空间划分
    STM32F103X datasheet学习笔记---Flexible static memory controller (FSMC)
    STM32F103X datasheet学习笔记---Interrupts and events
    STM32F103X datasheet学习笔记---DMA
    STM32F103X datasheet学习笔记---RCC(reset and clock control)
  • 原文地址:https://www.cnblogs.com/T-CYP/p/15208136.html
Copyright © 2011-2022 走看看