每日会员行为
类似的有:每日商品行为
因为宽表要关联很多张表,但在hive中join操作会变得很慢
提供如下写法
检索多张表,采用union all + group by id 的形式累加数据,速度较快
drop table if exists dws_user_action_daycount; create external table dws_user_action_daycount ( user_id string comment '用户 id', login_count bigint comment '登录次数', cart_count bigint comment '加入购物车次数', cart_amount double comment '加入购物车金额', order_count bigint comment '下单次数', order_amount decimal(16,2) comment '下单金额', payment_count bigint comment '支付次数', payment_amount decimal(16,2) comment '支付金额' ) COMMENT '每日用户行为' PARTITIONED BY (`dt` string) stored as parquet location '/ecdw/dws/dws_user_action_daycount/' tblproperties ("parquet.compression"="lzo"); -- 插入 with tmp_login as ( select user_id, count(*) login_count from dwd_start_log where dt='2020-03-10' and user_id is not null group by user_id ), tmp_cart as ( select user_id, count(*) cart_count, sum(cart_price*sku_num) cart_amount from dwd_fact_cart_info where dt='2020-03-10' and user_id is not null and date_format(create_time,'yyyy-MM-dd')='2020-03-10' group by user_id ), tmp_order as ( select user_id, count(*) order_count, sum(final_total_amount) order_amount from dwd_fact_order_info where dt='2020-03-10' and user_id is not null group by user_id ) , tmp_payment as ( select user_id, count(*) payment_count, sum(payment_amount) payment_amount from dwd_fact_payment_info where dt='2020-03-10' and user_id is not null group by user_id ) insert into table dws_user_action_daycount partition(dt='2020-03-10') select user_actions.user_id, sum(user_actions.login_count), sum(user_actions.cart_count), sum(user_actions.cart_amount), sum(user_actions.order_count), sum(user_actions.order_amount), sum(user_actions.payment_count), sum(user_actions.payment_amount) from ( select user_id, login_count, 0 cart_count, 0 cart_amount, 0 order_count, 0 order_amount, 0 payment_count, 0 payment_amount from tmp_login union all select user_id, 0 login_count, cart_count, cart_amount, 0 order_count, 0 order_amount, 0 payment_count, 0 payment_amount from tmp_cart union all select user_id, 0 login_count, 0 cart_count, 0 cart_amount, order_count, order_amount, 0 payment_count, 0 payment_amount from tmp_order union all select user_id, 0 login_count, 0 cart_count, 0 cart_amount, 0 order_count, 0 order_amount, payment_count, payment_amount from tmp_payment ) user_actions group by user_id;