23.1 DWS层
使用点击日志表作为DWS层数据源
23.2 ADS层
23.2.1 建表语句
drop table if exists ads_goods_display_top10;
create external table ads_goods_display_top10 (
`dt` string COMMENT '日期',
`category` string COMMENT '品类',
`goodsid` string COMMENT '商品id',
`goods_count` string COMMENT '商品点击次数'
)
row format delimited fields terminated by ' '
location '/warehouse/gmall/ads/ads_goods_display_top10';
23.2.2 导入数据
insert into table ads_goods_display_top10
select
'2019-10-03',
category,
goodsid,
count
from(
select
category,
goodsid,
count,
rank() over(partition by category order by count desc) rk
from(
select
category,
goodsid,
count(*) count
from dwd_display_log
where dt='2019-10-03' and action=2
group by category, goodsid
)t1
)t2
where rk<=10;
23.2.3 导入数据脚本
1)创建脚本ads_goods_display_top10.sh
vim ads_goods_display_top10.sh
2)增加脚本执行权限
chmod 777 ads_goods_display_top10.sh
3)脚本使用
ads_goods_display_top10.sh 2019-02-20
4)查询结果
select * from ads_goods_display_top10;
5)脚本执行时间
企业开发中一般在每天凌晨30分~1点
第24章 需求十三:点击次数最多的10个用户点击的商品次数top10
24.1 DWS层
使用日志数据用户行为宽表作为DWS层表
24.2 ADS层
24.2.1 建表语句
drop table if exists ads_goods_user_count;
create external table ads_goods_user_count(
`dt` string COMMENT '统计日期',
`mid_id` string COMMENT '用户id',
`u_ct` string COMMENT '用户总点击次数',
`goodsid` string COMMENT '商品id',
`d_ct` string COMMENT '各个商品点击次数'
)
row format delimited fields terminated by ' '
location '/warehouse/gmall/ads/ads_goods_user_count';
24.2.2 导入数据
insert into table ads_goods_user_count
select
'2019-10-03',
mid_id,
u_ct,
goodsid,
d_ct
from(
select
mid_id,
u_ct,
goodsid,
d_ct,
row_number() over(partition by mid_id order by d_ct desc ) rn
from(
select
dl.mid_id,
u_ct,
dl.goodsid,
count(*) d_ct
from dwd_display_log dl join (
select
mid_id,
count(*) u_ct
from dws_user_action_wide_log
group by mid_id
order by u_ct desc
limit 10
)t1
on dl.mid_id=t1.mid_id
group by dl.mid_id, u_ct, dl.goodsid
) t2
) t3
where rn<=10
24.2.3 导入数据脚本
1)创建脚本ads_goods_user_count.sh
vim ads_goods_user_count.sh
2)增加脚本执行权限
chmod 777 ads_goods_user_count.sh
3)脚本使用
ads_goods_user_count.sh 2019-02-20
4)查询结果
select * from ads_goods_user_count;
5)脚本执行时间 企业开发中一般在每天凌晨30分~1点
第25章 需求十四:月活跃率
月活跃用户与截止到该月累计的用户总和之间的比例
25.1 DWS层
使用DWS层月活表以及ADS新增用户表作为DWS层
25.2 ADS层
25.2.1 建表语句
drop table if exists ads_mn_ratio_count;
create external table ads_mn_ratio_count(
`dt` string COMMENT '统计日期',
`mn` string COMMENT '统计月活跃率的月份',
`ratio` string COMMENT '活跃率'
)
row format delimited fields terminated by ' '
location '/warehouse/gmall/ads/ads_mn_ratio_count';
25.2.2 导入数据
insert into table ads_mn_ratio_count
select
'2019-10-03',
date_format('2019-10-03','yyyy-MM'),
mn_count/sum_user*100 mn_percent
from
(select count(*) mn_count from dws_uv_detail_mn where mn=date_format('2019-10-03','yyyy-MM')) t1,
(select sum(new_mid_count) sum_user from ads_new_mid_count) t2;
25.2.3 导入数据脚本
1)创建脚本ads_mn_ratio_count.sh
vim ads_mn_ratio_count.sh
2)增加脚本执行权限
chmod 777 ads_mn_ratio_count.sh
3)脚本使用
ads_mn_ratio_count 2019-02-20
4)查询结果
select * from ads_mn_ratio_count;
5)脚本执行时间
企业开发中一般在每天凌晨30分~1点