【TransactSQL】统计某字段中的值第一次出现后的2小时内出现的次数

table1
name               createdate
a              2011-03-01 10:00:00
a              2011-03-01 11:00:00
a              2011-03-01 14:00:00
b              2011-03-01 13:00:00
b              2011-03-01 13:20:00
b              2011-03-01 14:00:00
 
查询结果为
name             createdate                    count
a             2011-03-01 10:00:00               2
a             2011-03-01 14:00:00               1
b             2011-03-01 13:00:00               3

就相当于是统计name字段中的值在第一次出现后的2小时内，总共出现了几次？

这个是网上的解答：

declare @table1 table(name nvarchar,createdate smalldatetime)

insert into @table1
select 'a','2011-03-01 10:00:00'
union all select 'a','2011-03-01 11:00:00'
union all select 'a','2011-03-01 14:00:00'
union all select 'b','2011-03-01 13:00:00'
union all select 'b','2011-03-01 13:20:00'
union all select 'b','2011-03-01 14:00:00'

select name,
       createdate,
       (select count(createdate)
        from @table1 b 
        where a.name=b.name and 
              a.createdate<=b.createdate and 
              dateadd(hh,2,a.createdate) >= b.createdate
       ) as count

from @table1 a 
where not exists 
         (select 1 from 
          @table1 b 
          where a.name=b.name and 
                a.createdate>b.createdate and 
                a.createdate<dateadd(hh,2,b.createdate)) 
group by name,createdate

但是这个解答其实是有问题的，当把临时表中的第3条数据的createdate改为'2011-03-01 12:00:00'，那么显示的结果是：

name      createdate                           count
a               2011-03-01 10:00:00        3
b                2011-03-01 13:00:00        3

在其中没有包括createdate为'2011-03-01 12:00:00'的记录，因为这个时间到为'2011-03-01 10:00:00'是超过2个小时了，也就是说为'2011-03-01 10:00:00'是第一个出现时间，到为'2011-03-01 11:59:59'为止，接下来应该是从'2011-03-01 12:00:00'开始的下个区间了，而这里显然是有问题的。

以下是我写的解法，虽然效率不是太高，但是能解决这个问题：

declare @table1 table(name nvarchar,createdate smalldatetime)

insert into @table1
select 'a','2011-03-01 10:00:00'
union all select 'a','2011-03-01 11:00:00'
union all select 'a','2011-03-01 12:00:00'
union all select 'b','2011-03-01 13:10:00'
union all select 'b','2011-03-01 13:20:00'
union all select 'b','2011-03-01 14:30:00'
union all select 'b','2011-03-01 15:15:00'
union all select 'b','2011-03-01 16:00:00'
union all select 'b','2011-03-01 17:00:00'


  
;with aa  --按照name分区，同时按照createdate排序编号
as
(
select name,
       createdate,
       ROW_NUMBER() over(partition by name
                             order by createdate) as k1
from @table1 
),

r
as
(
select v.name,
       starts=v.k1,  --区间开始的编号
       
       ends=isnull(
               min(case when v.k1<a.k1 
                             and DATEADD(hour,2,v.createdate) <= a.createdate 
                             then a.k1 
                         else null 
                   end)-1,

               max(case when v.k1<a.k1 
                             then a.k1 
                        else v.k1 
                   end)
             ),     --区间结尾的编号      
                             
           isnull(
               min(case when v.k1<a.k1 
                             and DATEADD(hour,2,v.createdate) <= a.createdate 
                             then a.k1 
                         else null 
                   end)-1,

               max(case when v.k1<a.k1 
                             then a.k1 
                        else v.k1 
                   end)
             ) - v.k1 as diff     --区间结尾编号与区间开始编号之间的差值
                              
from aa v  
inner join aa a 
        on v.name = a.name  --只关联name相等的
group by v.name,
         v.k1
having isnull(
               min(case when v.k1<a.k1 
                             and DATEADD(hour,2,v.createdate) <= a.createdate 
                             then a.k1 
                         else null 
                   end)-1,

               max(case when v.k1<a.k1 
                             then a.k1 
                        else v.k1 
                   end)
             )  >=v.k1   
        and            
        isnull(
               max(case when v.k1>a.k1 and 
                             DATEADD(hour,-2,v.createdate) >= a.createdate
                             then v.k1 - 1 
                        else null 
                   end) + 1,

               min(case when v.k1>a.k1 
                             then a.k1 
                        else v.k1 
                   end)
              ) = v.k1
)

--select * from r


select aa.name,
       aa.createdate,
       diff + 1
from r
inner join aa 
        on aa.name = r.name
           and aa.k1 =r.starts
where not exists
		(select 1
		 from r rr
		 where rr.name = r.name and
		       rr.starts <> r.starts and
		       rr.starts < r.starts and
		       (rr.ends = r.ends or
		        rr.ends = r.starts)
		)

不过发现我的这个解法也是有问题的，最大的问题在与不能准确的确定上限在那里，还得继续考虑问题的解法。

下面这个也是有问题的：

declare @table1 table(name nvarchar,createdate smalldatetime)

insert into @table1
select 'a','2011-03-01 10:00:00'
union all select 'a','2011-03-01 11:00:00'
union all select 'a','2011-03-01 12:00:00'

--union all select 'b','2011-03-01 13:10:00'
--union all select 'b','2011-03-01 13:20:00'
--union all select 'b','2011-03-01 14:30:00'
--union all select 'b','2011-03-01 15:15:00'
--union all select 'b','2011-03-01 16:00:00'
--union all select 'b','2011-03-01 17:15:00'


  
;with a  --按照name分区，同时按照createdate排序编号
as
(
select name,
       createdate,
       ROW_NUMBER() over(partition by name
                             order by createdate) as k1
from @table1 
),

c
as
(
select a1.name,
       a1.createdate,
       a1.k1,
       MIN(a2.createdate) as nextCreatedate,
       MIN(a2.k1) as nextK1
from a a1
inner join a a2
        on a1.name = a2.name
           and a2.createdate < DATEADD(hour,2,a1.createdate)
           and a2.createdate >= a1.createdate
           and a1.k1 <= a2.k1
group by a1.name,
         a1.createdate,
         a1.k1
),

w
as
(
select name,
       createdate,
       k1
       
       --null,
       --null,
       --null
from a
where k1 = 1

union all

select c.name,
       c.nextCreatedate,
       c.nextK1
       
from W
inner join a
        on a.name = w.name
           and dateadd(hour,2,w.createdate) <= a.createdate
           and w.k1 <= a.k1
           and w.createdate <> '2011-03-01 12:00:00' 
inner join c
        on w.name = c.name 
           and w.createdate = c.createdate
           and w.k1 = c.k1
where w.k1 <=3
)

SELECT * 
FROM w

下面的解法是正确的，不过用的是T-SQL，不是纯sql了：

declare @table1 table(name nvarchar(100),createdate smalldatetime)

declare @table2 table(name nvarchar(100),createdate smalldatetime,rnum bigint)

declare @temp table(name nvarchar(100),createdate smalldatetime,rnum bigint)

declare @i int = 1;

insert into @table1
select 'a','2011-03-01 10:00:00'
union all select 'a','2011-03-01 11:00:00'
union all select 'a','2011-03-01 12:00:00'

union all select 'b','2011-03-01 13:10:00'
union all select 'b','2011-03-01 13:20:00'
union all select 'b','2011-03-01 14:30:00'
union all select 'b','2011-03-01 15:15:00'
union all select 'b','2011-03-01 16:00:00'
union all select 'b','2011-03-01 17:16:00'
union all select 'b','2011-03-01 17:15:00'

  
;with a  --按照name分区，同时按照createdate排序编号
as
(
select name,
       createdate,
       ROW_NUMBER() over(partition by name
                             order by createdate) as k1
from @table1 
)

insert into @table2
select * from a

insert into @temp
select name,
       createdate,
       rnum
from @table2
where rnum = 1

--select * from @temp


while @i <= (select MAX(rnum) from @table2)
begin
    insert into @temp
    
	select t2.name,
	       min(t2.createdate),
	       @i +1	
	from @temp t1
	inner join @table2 t2        
	        on t1.name = t2.name
	           and t2.createdate >= dateadd(hour,2,t1.createdate)	           
	where t1.rnum = @i     
	group by t2.name
	            
	set @i = @i + 1	
end

;with r
as
(
select name,
       createdate
from @temp
group by name,
         createdate
)

select r.name,
       r.createdate,
       COUNT(1)
from r
inner join @table1 t
        on t.name = r.name
           and t.createdate >= r.createdate
           and t.createdate <DATEADD(HOUR,2,r.createdate)
group by r.name,
         r.createdate

其实这个问题是个递归问题，由上一个找到下一个，但是得构造一下：

declare @table1 table(name nvarchar,createdate smalldatetime)

insert into @table1
select 'a','2011-03-01 10:00:00'
union all select 'a','2011-03-01 11:00:00'
union all select 'a','2011-03-01 12:00:00'
union all select 'a','2011-03-01 12:20:00'

union all select 'b','2011-03-01 13:10:00'
union all select 'b','2011-03-01 13:20:00'
union all select 'b','2011-03-01 14:30:00'
union all select 'b','2011-03-01 15:15:00'
union all select 'b','2011-03-01 16:00:00'
union all select 'b','2011-03-01 17:20:00'
union all select 'b','2011-03-01 17:15:00'
union all select 'b','2011-03-01 19:16:00'
union all select 'b','2011-03-01 17:15:00'

  
;with a  --按照name分区，同时按照createdate排序编号
as
(
select name,
       createdate,
       ROW_NUMBER() over(partition by name
                             order by createdate) as k1
from @table1 
),

c   --对于每个时间,找到大于这个时间2小时的时间中最小那个时间
as
(
select a1.name,
       a1.createdate,
       a1.k1,
       MIN(a2.createdate) as nextCreatedate,
       MIN(a2.k1) as nextK1
from a a1
inner join a a2
        on a1.name = a2.name
           and a2.createdate >= DATEADD(hour,2,a1.createdate)
           
group by a1.name,
         a1.createdate,
         a1.k1

union all

select a.name,null,null,a.createdate,1  --构造递归运行时需要的层级
from a
where k1 = 1
),

w   --递归查询
as
(
select c.name,
       c.createdate,
       c.k1,
       c.nextCreatedate,
       c.nextK1,
       1 as lev
from c
where createdate is null
      and k1 is null

union all

select c.name,
       c.createdate,
       c.k1,
       c.nextCreatedate,
       c.nextK1,
       lev + 1
from W
inner join c
        on w.name = c.name
           and w.nextCreatedate = c.createdate

)

SELECT distinct name,
       nextCreatedate,
       nextK1 
FROM w