取每个用户最喜欢购买的前三个product
select * FROM (select user_id, product_id, row_number() over(partition BY user_id ORDER BY top_cnt DESC) AS row_num FROM (select ord.user_id AS user_id, pri.product_id AS product_id, count(1) over(partition BY user_id,product_id) AS top_cnt FROM orders ord JOIN priors pri ON ord.order_id=pri.order_id) new_t) new_t2 WHERE row_num<4 LIMIT 10;
结果输出
new_t2.user_id new_t2.product_id new_t2.top_cnt new_t2.row_num 100001 21137 41 1 100001 13176 41 2 100001 13176 41 3 100005 21709 10 1 100005 21709 10 2 100005 21709 10 3 100009 24852 8 1 100009 24852 8 2 100009 24852 8 3 100012 41148 24 1
把每个用户的前三条记录输出为一行, 列转行
select user_id,collect_list(product_id) as collect_list FROM (select user_id, product_id, row_number() over(partition BY user_id ORDER BY top_cnt DESC) AS row_num FROM (select ord.user_id AS user_id, pri.product_id AS product_id, count(1) over(partition BY user_id,product_id) AS top_cnt FROM orders ord JOIN priors pri ON ord.order_id=pri.order_id) new_t) new_t2 WHERE row_num<4 group by user_id LIMIT 10;
结果如下
user_id collect_list 1 ["12427","12427","12427"] 10 ["47526","47526","47526"] 100 ["27344","27344","27344"] 1000 ["49683","49683","49683"] 10000 ["21137","21137","21137"] 100000 ["10151","10151","10151"] 100001 ["21137","13176","13176"] 100002 ["26172","26172","26172"] 100003 ["47598","17616","17616"] 100004 ["19660","19660","19660"]
参考:
https://blog.csdn.net/doveyoung8/article/details/80022579