摘要:
下文将分享三种不同的数据去重方法
数据去重:需根据某一字段来界定,当此字段出现大于一行记录时,我们就界定为此行数据存在重复。
数据去重方法1:
当表中最在最大流水号时候,我们可以通过关联的方式为每条重复的记录获取唯一值
数据去重方法2:
为表中记录,按照指定字段进行群组,并获取最大流水号,然后再进行去重操作
数据去重方法3:
采用分组后,重复数据组内排名,如果排名大于1代表是重复数据行数据
三种去重方法效率对比:
方法3 > 方法2 > 方法1
create table test(keyId int identity,sort varchar(10), info varchar(20)) go ---方法1 truncate table test ; insert into test(sort,info)values('A','maomao365.com')--1 insert into test(sort,info)values('A','猫猫小屋') --2 insert into test(sort,info)values('B','mssql_blog') --3 insert into test(sort,info)values('B','优秀的sql——blog') --4 insert into test(sort,info)values('B','maomao365') --5 insert into test(sort,info)values('C','sql优化blog') --6 go delete from test where test.keyId = (select max(b.keyId) from test b where test.sort=b.sort); select * from test ---方法2: truncate table test ; insert into test(sort,info)values('A','maomao365.com') insert into test(sort,info)values('A','猫猫小屋') insert into test(sort,info)values('B','mssql_blog') insert into test(sort,info)values('B','优秀的sql——blog') insert into test(sort,info)values('B','maomao365') insert into test(sort,info)values('C','sql优化blog') go delete from test where keyid not in(select min(keyId) from test group by sort having count(sort)>=1); select * from test ---方法3: truncate table test ; insert into test(sort,info)values('A','maomao365.com') insert into test(sort,info)values('A','猫猫小屋') insert into test(sort,info)values('B','mssql_blog') insert into test(sort,info)values('B','优秀的sql——blog') insert into test(sort,info)values('B','maomao365') insert into test(sort,info)values('C','sql优化blog') go delete A2 from ( select row_Number() over(partition by sort order by keyid) as keyId_e,* from test ) as A2 where A2.keyId_e >1 select * from test go drop table test
<img src="http://www.maomao365.com/wp-content/uploads/2018/07/mssql_sqlserver_数据表数据去重的三种方法分享.png" alt="mssql_sqlserver_数据表数据去重的三种方法分享" width="813" height="749" class="size-full wp-image-6767" />