在项目中,我们经常遇到或用到分页,那么在大数据量(百万级以上)下,哪种分页算法效率最优呢?我们不妨用事实说话。
测试环境
硬件:CPU 酷睿双核T5750 内存:2G
软件:Windows server 2003 + Sql server 2005
OK,我们首先创建一数据库:data_Test,并在此数据库中创建一表:tb_TestTable
1
create database data_Test --创建数据库data_Test
2
GO
3
use data_Test
4
GO
5
create table tb_TestTable --创建表
6
(
7
id int identity(1,1) primary key,
8
userName nvarchar(20) not null,
9
userPWD nvarchar(20) not null,
10
userEmail nvarchar(40) null
11
)
12
GO

2

3

4

5

6

7

8

9

10

11

12

然后我们在数据表中插入2000000条数据:
1
--插入数据
2
set identity_insert tb_TestTable on
3
declare @count int
4
set @count=1
5
while @count<=2000000
6
begin
7
insert into tb_TestTable(id,userName,userPWD,userEmail) values(@count,'admin','admin888','lli0077@yahoo.com.cn')
8
set @count=@count+1
9
end
10
set identity_insert tb_TestTable off

2

3

4

5

6

7

8

9

10

我首先写了五个常用存储过程:
1,利用select top 和select not in进行分页,具体代码如下:
1
create procedure proc_paged_with_notin --利用select top and select not in
2
(
3
@pageIndex int, --页索引
4
@pageSize int --每页记录数
5
)
6
as
7
begin
8
set nocount on;
9
declare @timediff datetime --耗时
10
declare @sql nvarchar(500)
11
select @timediff=Getdate()
12
set @sql='select top '+str(@pageSize)+' * from tb_TestTable where(ID not in(select top '+str(@pageSize*@pageIndex)+' id from tb_TestTable order by ID ASC)) order by ID'
13
execute(@sql) --因select top后不支技直接接参数,所以写成了字符串@sql
14
select datediff(ms,@timediff,GetDate()) as 耗时
15
set nocount off;
16
end

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

2,利用select top 和 select max(列键)
1
create procedure proc_paged_with_selectMax --利用select top and select max(列)
2
(
3
@pageIndex int, --页索引
4
@pageSize int --页记录数
5
)
6
as
7
begin
8
set nocount on;
9
declare @timediff datetime
10
declare @sql nvarchar(500)
11
select @timediff=Getdate()
12
set @sql='select top '+str(@pageSize)+' * From tb_TestTable where(ID>(select max(id) From (select top '+str(@pageSize*@pageIndex)+' id From tb_TestTable order by ID) as TempTable)) order by ID'
13
execute(@sql)
14
select datediff(ms,@timediff,GetDate()) as 耗时
15
set nocount off;
16
end

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

3,利用select top和中间变量--此方法因网上有人说效果最佳,所以贴出来一同测试
1
create procedure proc_paged_with_Midvar --利用ID>最大ID值和中间变量
2
(
3
@pageIndex int,
4
@pageSize int
5
)
6
as
7
declare @count int
8
declare @ID int
9
declare @timediff datetime
10
declare @sql nvarchar(500)
11
begin
12
set nocount on;
13
select @count=0,@ID=0,@timediff=getdate()
14
select @count=@count+1,@ID=case when @count<=@pageSize*@pageIndex then ID else @ID end from tb_testTable order by id
15
set @sql='select top '+str(@pageSize)+' * from tb_testTable where ID>'+str(@ID)
16
execute(@sql)
17
select datediff(ms,@timediff,getdate()) as 耗时
18
set nocount off;
19
end
20

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

4,利用Row_number() 此方法为SQL server 2005中新的方法,利用Row_number()给数据行加上索引
1
create procedure proc_paged_with_Rownumber --利用SQL 2005中的Row_number()
2
(
3
@pageIndex int,
4
@pageSize int
5
)
6
as
7
declare @timediff datetime
8
begin
9
set nocount on;
10
select @timediff=getdate()
11
select * from (select *,Row_number() over(order by ID asc) as IDRank from tb_testTable) as IDWithRowNumber where IDRank>@pageSize*@pageIndex and IDRank<@pageSize*(@pageIndex+1)
12
select datediff(ms,@timediff,getdate()) as 耗时
13
set nocount off;
14
end
15

2

3

4

5

6

7

8

9

10

11

12

13

14

15

5,利用临时表及Row_number
1
create procedure proc_CTE --利用临时表及Row_number
2
(
3
@pageIndex int, --页索引
4
@pageSize int --页记录数
5
)
6
as
7
set nocount on;
8
declare @ctestr nvarchar(400)
9
declare @strSql nvarchar(400)
10
declare @datediff datetime
11
begin
12
select @datediff=GetDate()
13
set @ctestr='with Table_CTE as
14
(select ceiling((Row_number() over(order by ID ASC))/'+str(@pageSize)+') as page_num,* from tb_TestTable)';
15
set @strSql=@ctestr+' select * From Table_CTE where page_num='+str(@pageIndex)
16
end
17
begin
18
execute sp_executesql @strSql
19
select datediff(ms,@datediff,GetDate())
20
set nocount off;
21
end
22

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

OK,至此,存储过程创建完毕,我们分别在每页10条数据的情况下在第2页,第1000页,第10000页,第100000页,第199999页进行测试,耗时单位:ms 每页测试5次取其平均值
存过 | 第2页耗时 | 第1000页耗时 | 第10000页耗时 | 第100000页耗时 | 第199999页耗时 | 效率排行 |
1用not in | 0ms | 16ms | 47ms | 475ms | 953ms | 3 |
2用select max | 5ms | 16ms | 35ms | 325ms | 623ms | 1 |
3中间变量 | 966ms | 970ms | 960ms | 945ms | 933ms | 5 |
4row_number | 0ms | 0ms | 34ms | 365ms | 710ms | 2 |
4临时表 | 780ms | 796ms | 798ms | 780ms | 805ms | 4 |
测试结果显示:select max >row_number>not in>临时表>中间变量
于是我对效率最高的select max方法用2分法进行了扩展,代码取自互联网,我修改了ASC排序时取不到值的BUG,测试结果:
2分法 | 156ms | 156ms | 180ms | 470ms | 156ms | 1* |
从测试结果来看,使用2分法确实可以提高效率并使效率更为稳定,我又增加了第159999页的测试,用时仅296ms,效果相当的不错!
下面是2分法使用select max的代码,已相当完善。
1
--/*-----存储过程 分页处理 孙伟 2005-03-28创建 -------*/
2
--/*-----存储过程 分页处理 浪尘 2008-9-1修改----------*/
3
--/*----- 对数据进行了2分处理使查询前半部分数据与查询后半部分数据性能相同 -------*/
4
5
alter PROCEDURE proc_paged_2part_selectMax
6
(
7
@tblName nvarchar(200), ----要显示的表或多个表的连接
8
@fldName nvarchar(500) = '*', ----要显示的字段列表
9
@pageSize int = 10, ----每页显示的记录个数
10
@page int = 1, ----要显示那一页的记录
11
@fldSort nvarchar(200) = null, ----排序字段列表或条件
12
@Sort bit = 0, ----排序方法,0为升序,1为降序(如果是多字段排列Sort指代最后一个排序字段的排列顺序(最后一个排序字段不加排序标记)--程序传参如:' SortA Asc,SortB Desc,SortC ')
13
@strCondition nvarchar(1000) = null, ----查询条件,不需where
14
@ID nvarchar(150), ----主表的主键
15
@Dist bit = 0, ----是否添加查询字段的 DISTINCT 默认0不添加/1添加
16
@pageCount int = 1 output, ----查询结果分页后的总页数
17
@Counts int = 1 output ----查询到的记录数
18
)
19
AS
20
SET NOCOUNT ON
21
Declare @sqlTmp nvarchar(1000) ----存放动态生成的SQL语句
22
Declare @strTmp nvarchar(1000) ----存放取得查询结果总数的查询语句
23
Declare @strID nvarchar(1000) ----存放取得查询开头或结尾ID的查询语句
24
25
Declare @strSortType nvarchar(10) ----数据排序规则A
26
Declare @strFSortType nvarchar(10) ----数据排序规则B
27
28
Declare @SqlSelect nvarchar(50) ----对含有DISTINCT的查询进行SQL构造
29
Declare @SqlCounts nvarchar(50) ----对含有DISTINCT的总数查询进行SQL构造
30
31
declare @timediff datetime --耗时测试时间差
32
select @timediff=getdate()
33
34
if @Dist = 0
35
begin
36
set @SqlSelect = 'select '
37
set @SqlCounts = 'Count(*)'
38
end
39
else
40
begin
41
set @SqlSelect = 'select distinct '
42
set @SqlCounts = 'Count(DISTINCT '+@ID+')'
43
end
44
45
46
if @Sort=0
47
begin
48
set @strFSortType=' ASC '
49
set @strSortType=' DESC '
50
end
51
else
52
begin
53
set @strFSortType=' DESC '
54
set @strSortType=' ASC '
55
end
56
57
58
59
--------生成查询语句--------
60
--此处@strTmp为取得查询结果数量的语句
61
if @strCondition is null or @strCondition='' --没有设置显示条件
62
begin
63
set @sqlTmp = @fldName + ' From ' + @tblName
64
set @strTmp = @SqlSelect+' @Counts='+@SqlCounts+' FROM '+@tblName
65
set @strID = ' From ' + @tblName
66
end
67
else
68
begin
69
set @sqlTmp = + @fldName + 'From ' + @tblName + ' where (1>0) ' + @strCondition
70
set @strTmp = @SqlSelect+' @Counts='+@SqlCounts+' FROM '+@tblName + ' where (1>0) ' + @strCondition
71
set @strID = ' From ' + @tblName + ' where (1>0) ' + @strCondition
72
end
73
74
----取得查询结果总数量-----
75
exec sp_executesql @strTmp,N'@Counts int out ',@Counts out
76
declare @tmpCounts int
77
if @Counts = 0
78
set @tmpCounts = 1
79
else
80
set @tmpCounts = @Counts
81
82
--取得分页总数
83
set @pageCount=(@tmpCounts+@pageSize-1)/@pageSize
84
85
/**//**当前页大于总页数 取最后一页**/
86
if @page>@pageCount
87
set @page=@pageCount
88
89
--/*-----数据分页2分处理-------*/
90
declare @pageIndex int --总数/页大小
91
declare @lastcount int --总数%页大小
92
93
set @pageIndex = @tmpCounts/@pageSize
94
set @lastcount = @tmpCounts%@pageSize
95
if @lastcount > 0
96
set @pageIndex = @pageIndex + 1
97
else
98
set @lastcount = @pagesize
99
100
--//***显示分页
101
if @strCondition is null or @strCondition='' --没有设置显示条件
102
begin
103
if @pageIndex<2 or @page<=@pageIndex / 2 + @pageIndex % 2 --前半部分数据处理
104
begin
105
if @page=1
106
set @strTmp=@SqlSelect+' top '+ CAST(@pageSize as VARCHAR(4))+' '+ @fldName+' from '+@tblName
107
+' order by '+ @fldSort +' '+ @strFSortType
108
else
109
begin
110
if @Sort=1
111
begin
112
set @strTmp=@SqlSelect+' top '+ CAST(@pageSize as VARCHAR(4))+' '+ @fldName+' from '+@tblName
113
+' where '+@ID+' <(select min('+ @ID +') from ('+ @SqlSelect+' top '+ CAST(@pageSize*(@page-1) as Varchar(20)) +' '+ @ID +' from '+@tblName
114
+' order by '+ @fldSort +' '+ @strFSortType+') AS TBMinID)'
115
+' order by '+ @fldSort +' '+ @strFSortType
116
end
117
else
118
begin
119
set @strTmp=@SqlSelect+' top '+ CAST(@pageSize as VARCHAR(4))+' '+ @fldName+' from '+@tblName
120
+' where '+@ID+' >(select max('+ @ID +') from ('+ @SqlSelect+' top '+ CAST(@pageSize*(@page-1) as Varchar(20)) +' '+ @ID +' from '+@tblName
121
+' order by '+ @fldSort +' '+ @strFSortType+') AS TBMinID)'
122
+' order by '+ @fldSort +' '+ @strFSortType
123
end
124
end
125
end
126
else
127
begin
128
set @page = @pageIndex-@page+1 --后半部分数据处理
129
if @page <= 1 --最后一页数据显示
130
set @strTmp=@SqlSelect+' * from ('+@SqlSelect+' top '+ CAST(@lastcount as VARCHAR(4))+' '+ @fldName+' from '+@tblName
131
+' order by '+ @fldSort +' '+ @strSortType+') AS TempTB'+' order by '+ @fldSort +' '+ @strFSortType
132
else
133
if @Sort=1
134
begin
135
set @strTmp=@SqlSelect+' * from ('+@SqlSelect+' top '+ CAST(@pageSize as VARCHAR(4))+' '+ @fldName+' from '+@tblName
136
+' where '+@ID+' >(select max('+ @ID +') from('+ @SqlSelect+' top '+ CAST(@pageSize*(@page-2)+@lastcount as Varchar(20)) +' '+ @ID +' from '+@tblName
137
+' order by '+ @fldSort +' '+ @strSortType+') AS TBMaxID)'
138
+' order by '+ @fldSort +' '+ @strSortType+') AS TempTB'+' order by '+ @fldSort +' '+ @strFSortType
139
end
140
else
141
begin
142
set @strTmp=@SqlSelect+' * from ('+@SqlSelect+' top '+ CAST(@pageSize as VARCHAR(4))+' '+ @fldName+' from '+@tblName
143
+' where '+@ID+' <(select min('+ @ID +') from('+ @SqlSelect+' top '+ CAST(@pageSize*(@page-2)+@lastcount as Varchar(20)) +' '+ @ID +' from '+@tblName
144
+' order by '+ @fldSort +' '+ @strSortType+') AS TBMaxID)'
145
+' order by '+ @fldSort +' '+ @strSortType+') AS TempTB'+' order by '+ @fldSort +' '+ @strFSortType
146
end
147
end
148
end
149
150
else --有查询条件
151
begin
152
if @pageIndex<2 or @page<=@pageIndex / 2 + @pageIndex % 2 --前半部分数据处理
153
begin
154
if @page=1
155
set @strTmp=@SqlSelect+' top '+ CAST(@pageSize as VARCHAR(4))+' '+ @fldName+' from '+@tblName
156
+' where 1=1 ' + @strCondition + ' order by '+ @fldSort +' '+ @strFSortType
157
else if(@Sort=1)
158
begin
159
set @strTmp=@SqlSelect+' top '+ CAST(@pageSize as VARCHAR(4))+' '+ @fldName+' from '+@tblName
160
+' where '+@ID+' <(select min('+ @ID +') from ('+ @SqlSelect+' top '+ CAST(@pageSize*(@page-1) as Varchar(20)) +' '+ @ID +' from '+@tblName
161
+' where (1=1) ' + @strCondition +' order by '+ @fldSort +' '+ @strFSortType+') AS TBMinID)'
162
+' '+ @strCondition +' order by '+ @fldSort +' '+ @strFSortType
163
end
164
else
165
begin
166
set @strTmp=@SqlSelect+' top '+ CAST(@pageSize as VARCHAR(4))+' '+ @fldName+' from '+@tblName
167
+' where '+@ID+' >(select max('+ @ID +') from ('+ @SqlSelect+' top '+ CAST(@pageSize*(@page-1) as Varchar(20)) +' '+ @ID +' from '+@tblName
168
+' where (1=1) ' + @strCondition +' order by '+ @fldSort +' '+ @strFSortType+') AS TBMinID)'
169
+' '+ @strCondition +' order by '+ @fldSort +' '+ @strFSortType
170
end
171
end
172
else
173
begin
174
set @page = @pageIndex-@page+1 --后半部分数据处理
175
if @page <= 1 --最后一页数据显示
176
set @strTmp=@SqlSelect+' * from ('+@SqlSelect+' top '+ CAST(@lastcount as VARCHAR(4))+' '+ @fldName+' from '+@tblName
177
+' where (1=1) '+ @strCondition +' order by '+ @fldSort +' '+ @strSortType+') AS TempTB'+' order by '+ @fldSort +' '+ @strFSortType
178
else if(@Sort=1)
179
set @strTmp=@SqlSelect+' * from ('+@SqlSelect+' top '+ CAST(@pageSize as VARCHAR(4))+' '+ @fldName+' from '+@tblName
180
+' where '+@ID+' >(select max('+ @ID +') from('+ @SqlSelect+' top '+ CAST(@pageSize*(@page-2)+@lastcount as Varchar(20)) +' '+ @ID +' from '+@tblName
181
+' where (1=1) '+ @strCondition +' order by '+ @fldSort +' '+ @strSortType+') AS TBMaxID)'
182
+' '+ @strCondition+' order by '+ @fldSort +' '+ @strSortType+') AS TempTB'+' order by '+ @fldSort +' '+ @strFSortType
183
else
184
set @strTmp=@SqlSelect+' * from ('+@SqlSelect+' top '+ CAST(@pageSize as VARCHAR(4))+' '+ @fldName+' from '+@tblName
185
+' where '+@ID+' <(select min('+ @ID +') from('+ @SqlSelect+' top '+ CAST(@pageSize*(@page-2)+@lastcount as Varchar(20)) +' '+ @ID +' from '+@tblName
186
+' where (1=1) '+ @strCondition +' order by '+ @fldSort +' '+ @strSortType+') AS TBMaxID)'
187
+' '+ @strCondition+' order by '+ @fldSort +' '+ @strSortType+') AS TempTB'+' order by '+ @fldSort +' '+ @strFSortType
188
end
189
end
190
191
------返回查询结果-----
192
exec sp_executesql @strTmp
193
select datediff(ms,@timediff,getdate()) as 耗时
194
--print @strTmp
195
SET NOCOUNT OFF
196
GO
197

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197
