zoukankan      html  css  js  c++  java
  • dws_user_action_daycount

    每日会员行为

    类似的有:每日商品行为

    因为宽表要关联很多张表,但在hive中join操作会变得很慢

    提供如下写法

    检索多张表,采用union all + group by id 的形式累加数据,速度较快

    drop table if exists dws_user_action_daycount; 
    create external table dws_user_action_daycount 
    ( 
    user_id string comment '用户 id', 
    login_count bigint comment '登录次数', 
    cart_count bigint comment '加入购物车次数', 
    cart_amount double comment '加入购物车金额', 
    order_count bigint comment '下单次数', 
    order_amount decimal(16,2) comment '下单金额', 
    payment_count bigint comment '支付次数', 
    payment_amount decimal(16,2) comment '支付金额' 
    ) 
    COMMENT '每日用户行为' 
    PARTITIONED BY (`dt` string) 
    stored as parquet 
    location '/ecdw/dws/dws_user_action_daycount/' 
    tblproperties ("parquet.compression"="lzo");
    
    
    -- 插入
    with tmp_login as 
    ( 
        select 
            user_id, 
            count(*) login_count 
        from dwd_start_log 
        where dt='2020-03-10' 
        and user_id is not null 
        group by user_id 
    ),
    
    tmp_cart as ( 
        select 
            user_id, 
            count(*) cart_count,
            sum(cart_price*sku_num) cart_amount 
        from dwd_fact_cart_info 
        where dt='2020-03-10' 
        and user_id is not null 
        and date_format(create_time,'yyyy-MM-dd')='2020-03-10' 
        group by user_id 
    ),
    
    tmp_order as 
    (
        select 
            user_id, 
            count(*) order_count, 
            sum(final_total_amount) order_amount 
            from dwd_fact_order_info 
        where dt='2020-03-10' 
        and user_id is not null
        group by user_id 
    ) , 
    tmp_payment as 
    ( 
        select 
            user_id, 
            count(*) payment_count, 
            sum(payment_amount) payment_amount 
        from dwd_fact_payment_info 
        where dt='2020-03-10'
        and user_id is not null
        group by user_id 
    )
    
    insert into table dws_user_action_daycount 
    partition(dt='2020-03-10') 
    select 
        user_actions.user_id, 
        sum(user_actions.login_count), 
        sum(user_actions.cart_count), 
        sum(user_actions.cart_amount), 
        sum(user_actions.order_count), 
        sum(user_actions.order_amount), 
        sum(user_actions.payment_count), 
        sum(user_actions.payment_amount) 
    from 
    ( 
    select 
        user_id, 
        login_count, 
        0 cart_count, 
        0 cart_amount, 
        0 order_count, 
        0 order_amount, 
        0 payment_count, 
        0 payment_amount 
    from tmp_login 
    union all 
    
    select 
        user_id, 
        0 login_count, 
        cart_count, 
        cart_amount, 
        0 order_count, 
        0 order_amount, 
        0 payment_count, 
        0 payment_amount 
    from tmp_cart
    union all 
    
    select 
        user_id, 
        0 login_count, 
        0 cart_count, 
        0 cart_amount, 
        order_count, 
        order_amount, 
        0 payment_count, 
        0 payment_amount 
    from tmp_order 
    union all 
    
    select 
        user_id, 
        0 login_count, 
        0 cart_count, 
        0 cart_amount, 
        0 order_count, 
        0 order_amount, 
        payment_count, 
        payment_amount 
    from tmp_payment 
    ) user_actions 
    group by user_id;
  • 相关阅读:
    初学git(一):创建本地“仓库”
    git常用命令
    Linux学习(一):从图形界面进入命令行及命令行进入图形界面
    继承与派生
    数据类型和表达式
    UDP
    网络与通信
    枚举类型
    结构体
    第一个随笔
  • 原文地址:https://www.cnblogs.com/ldy233/p/14448579.html
Copyright © 2011-2022 走看看