说明:环境为oracle
一、需求
二、思路
思路一:
- 需要判断连续发布的两次利率是否一致,如果一致,就只保留最先发布的一次记录
- 需要附带排序后的时间的编号,以便实现错位相减的效果
- 让去重后的数据进行关联,有得取舍
思路二:
-
如果为了能在数据获取后,对其进行序号打标,在mysql等支持procedure的函数中,可以通过如下方式
begin select a.column,(@seq:=@seq+1) seq from table_a,(select @seq:=0 from dual) b end
三、实现
3.1 数据准备
-- 创建表
create table TABLE1
(
publish_date DATE,
rate VARCHAR2(20)
)
-- 插入数据
insert into TABLE1 (PUBLISH_DATE, RATE)
values (to_date('01-01-2010', 'dd-mm-yyyy'), '5.1%');
insert into TABLE1 (PUBLISH_DATE, RATE)
values (to_date('01-10-2010', 'dd-mm-yyyy'), '5.1%');
insert into TABLE1 (PUBLISH_DATE, RATE)
values (to_date('01-01-2011', 'dd-mm-yyyy'), '6.0%');
insert into TABLE1 (PUBLISH_DATE, RATE)
values (to_date('31-10-2012', 'dd-mm-yyyy'), '6.0%');
insert into TABLE1 (PUBLISH_DATE, RATE)
values (to_date('10-11-2012', 'dd-mm-yyyy'), '6.0%');
insert into TABLE1 (PUBLISH_DATE, RATE)
values (to_date('31-12-2012', 'dd-mm-yyyy'), '6.0%');
insert into TABLE1 (PUBLISH_DATE, RATE)
values (to_date('31-03-2013', 'dd-mm-yyyy'), '5.9%');
insert into TABLE1 (PUBLISH_DATE, RATE)
values (to_date('01-09-2013', 'dd-mm-yyyy'), '5.5%');
insert into TABLE1 (PUBLISH_DATE, RATE)
values (to_date('01-05-2014', 'dd-mm-yyyy'), '5.5%');
insert into TABLE1 (PUBLISH_DATE, RATE)
values (to_date('01-01-2015', 'dd-mm-yyyy'), '5.1%');
insert into TABLE1 (PUBLISH_DATE, RATE)
values (to_date('01-06-2016', 'dd-mm-yyyy'), '5.1%');
insert into TABLE1 (PUBLISH_DATE, RATE)
values (to_date('01-09-2017', 'dd-mm-yyyy'), '5.1%');
3.2 求取步骤
-
获取排序后的时间
select t1.publish_date,t1.rate,row_number() over(order by t1.publish_date) seq from table1 t1
-
数据错位实现
select t3.publish_date,t3.rate,row_number() over(order by t3.publish_date) + 1 seq from table1 t3
-
关联
步骤1
和步骤2
获取到的数据,得到关联数据select t2.*,t4.* from ( select t1.publish_date,t1.rate,row_number() over(order by t1.publish_date) seq from table1 t1 ) t2 left join ( select t3.publish_date,t3.rate,row_number() over(order by t3.publish_date) + 1 seq from table1 t3 ) t4 on t4.seq = t2.seq order by t2.publish_date asc
-
对
步骤3
获取到的数据进行处理,排除连续时间下,利率相同的数据select t2.publish_date,t2.rate from ( select t1.publish_date,t1.rate,row_number() over(order by t1.publish_date) seq from table1 t1 ) t2 left join ( select t3.publish_date,t3.rate,row_number() over(order by t3.publish_date) + 1 seq from table1 t3 ) t4 on t4.seq = t2.seq where t2.rate <> NVL(t4.rate,'-1') -- 对于首次的时间,通过这样让其不被排除 order by t2.publish_date asc
-
通过
步骤4
获取到,每次利率变更后的初始时间点,然后通过lead函数进行处理,获取变更节点的前一天作为结束时间select a.publish_date start_date, nvl(to_char(lead(a.publish_date,1) over(order by a.publish_date)-1,'yyyy-mm-dd'),'9999-12-31') end_date, a.rate from ( select t2.publish_date,t2.rate from ( select t1.publish_date,t1.rate,row_number() over(order by t1.publish_date) seq from table1 t1 ) t2 left join ( select t3.publish_date,t3.rate,row_number() over(order by t3.publish_date) + 1 seq from table1 t3 ) t4 on t4.seq = t2.seq where t2.rate <> NVL(t4.rate,'-1') order by t2.publish_date asc ) a