已知用户表t_user, uid int 用户id
订单表:t_order
oid int 订单id
uid int 用户id
otime date 订单时间
Amount double 订单金额
其中用户表和订单表是一对多的关系
需求:
结果集要求:
计算在2018年1月下过订单,2月份没有下过订单的用户在3月份的订单金额分布。
具体字段如下:
uid,三月份订单金额超过10的订单数,3月份首次下单的金额,3月份最后一次下单的金额
测试数据如下:
create table t_order (oid int , uid int , otime date, oamount int )partitioned by (dt string) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' '; insert into table t_order partition(dt ='2018-01-01') values(1003,2,'2018-01-01',100); insert into table t_order partition(dt ='2018-01-02') values(1004,2,'2018-01-02',20); insert into table t_order partition(dt ='2018-01-02') values(1005,2,'2018-01-02',100); insert into table t_order partition(dt ='2018-01-02') values(1006,4,'2018-01-02',30); insert into table t_order partition(dt ='2018-01-03') values(1007,1,'2018-01-03',130); insert into table t_order partition(dt ='2018-01-03') values(1008,2,'2018-01-03',5); insert into table t_order partition(dt ='2018-01-03') values(1009,2,'2018-01-03',5);
insert into table t_order partition(dt ='2018-02-01') values(1001,5,'2018-02-01',110); insert into table t_order partition(dt ='2018-02-01') values(1002,3,'2018-02-01',110); insert into table t_order partition(dt ='2018-02-03') values(1003,3,'2018-02-03',100); insert into table t_order partition(dt ='2018-02-03') values(1004,3,'2018-02-03',20); insert into table t_order partition(dt ='2018-02-04') values(1005,3,'2018-02-04',30); insert into table t_order partition(dt ='2018-02-04') values(1006,6,'2018-02-04',100); insert into table t_order partition(dt ='2018-02-04') values(1007,6,'2018-02-04',130); insert into table t_order partition(dt ='2018-03-01') values(1001,1,'2018-03-01',120); insert into table t_order partition(dt ='2018-03-03') values(1002,2,'2018-03-03',5); insert into table t_order partition(dt ='2018-03-03') values(1003,2,'2018-03-03',11); insert into table t_order partition(dt ='2018-03-03') values(1004,3,'2018-03-03',1); insert into table t_order partition(dt ='2018-03-04') values(1005,3,'2018-03-04',20); insert into table t_order partition(dt ='2018-03-04') values(1006,4,'2018-03-04',30); insert into table t_order partition(dt ='2018-03-04') values(1007,1,'2018-03-04',50);
实现:
select uid, sum(if(date_format(otime,'yyyy-MM')='2018-01',1,0)) as month1_order_cnt, sum(if(date_format(otime,'yyyy-MM')='2018-02',1,0)) as month2_order_cnt, sum(if(date_format(otime,'yyyy-MM')='2018-03' and oamount>10,oamount,0)) as month3_order_amount, sum(if(date_format(otime,'yyyy-MM')='2018-03' and rk=1,oamount,0)) as month3_first, sum(if(date_format(otime,'yyyy-MM')='2018-03' and rk=cnt,oamount,0)) as month3_last from( select count(*) over(partition by uid,date_format(otime,'yyyy-MM')) as cnt, row_number() over(partition by uid,date_format(otime,'yyyy-MM')) as rk, * from t_order ) as t group by uid having month1_order_cnt>0 and month2_order_cnt=0