zoukankan      html  css  js  c++  java
  • 【Oracle】删除重复记录

    --复习autotrace:
    SET AUTOTRACE OFF               --不生成AUTOTRACE 报告,这是缺省模式
    SET AUTOTRACE ON EXPLAIN        --AUTOTRACE只显示优化器执行路径报告
    SET AUTOTRACE ON STATISTICS     --只显示执行统计信息
    SET AUTOTRACE ON                --包含执行计划和统计信息
    SET AUTOTRACE TRACEONLY         --同set autotrace on,但是不显示查询输出
    
    --案例模拟
    --查询原始表:
    23:13:36 SQL> select * from emp;
    
         EMPNO ENAME      JOB              MGR HIREDATE            SAL       COMM     DEPTNO
    ---------- ---------- --------- ---------- ------------ ---------- ---------- ----------
          7698 BLAKE      MANAGER         7839 01-MAY-81          2850                    30
          7782 CLARK      MANAGER         7839 09-JUN-81          2450                    10
          7788 SCOTT      ANALYST         7566 19-APR-87          3000                    20
          7839 KING       PRESIDENT            17-NOV-81          5000                    10
          7844 TURNER     SALESMAN        7698 08-SEP-81          1500          0         30
          7876 ADAMS      CLERK           7788 23-MAY-87          1100                    20
          7900 JAMES      CLERK           7698 03-DEC-81           950                    30
          7902 FORD       ANALYST         7566 03-DEC-81          3000                    20
          7934 MILLER     CLERK           7782 23-JAN-82          1300                    10
          7369 SMITH      CLERK           7902 17-DEC-80           800                    20
          7499 ALLEN      SALESMAN        7698 20-FEB-81          1600        300         30
          7521 WARD       SALESMAN        7698 22-FEB-81          1250        500         30
          7566 JONES      MANAGER         7839 02-APR-81          2975                    20
          7654 MARTIN     SALESMAN        7698 28-SEP-81          1250       1400         30
    
    14 rows selected.
    
    Elapsed: 00:00:00.09
    
    --模拟,插入重复记录
    INSERT INTO emp select * from emp;
    
    --检查重复记录
    23:13:39 SQL> SELECT COUNT(empno), empno FROM emp GROUP BY empno HAVING COUNT(empno) > 1;
    
    COUNT(EMPNO)      EMPNO
    ------------ ----------
               2       7782
               2       7839
               2       7844
               2       7698
               2       7902
               2       7521
               2       7788
               2       7934
               2       7566
               2       7654
               2       7499
               2       7876
               2       7900
               2       7369
    
    14 rows selected.
    
    Elapsed: 00:00:00.04
    
    --方法一(只保留ROWID最小的记录):
    DELETE FROM emp
    WHERE  empno IN
           (SELECT empno FROM emp GROUP BY empno HAVING COUNT(empno) > 1)
    AND    ROWID NOT IN
           (SELECT MIN(ROWID) FROM emp GROUP BY empno HAVING COUNT(empno) > 1);
    
    [lubinsu@200-168-46-3 shell_monitor]$ sqlplus lubinsu/lubinsu
    
    SQL*Plus: Release 10.2.0.1.0 - Production on Sun Aug 18 23:09:34 2013
    
    Copyright (c) 1982, 2005, Oracle.  All rights reserved.
    
    
    Connected to:
    Oracle Database 10g Enterprise Edition Release 10.2.0.1.0 - Production
    With the Partitioning, OLAP and Data Mining options
    
    23:09:46 SQL> set timing on
    23:09:51 SQL> set autotrace traceonly
    23:09:58 SQL> set linesize 200
    23:17:32 SQL> DELETE FROM emp
    23:17:38   2  WHERE  empno IN
    23:17:40   3         (SELECT empno FROM emp GROUP BY empno HAVING COUNT(empno) > 1)
    23:17:45   4  AND    ROWID NOT IN
    23:17:49   5         (SELECT MIN(ROWID) FROM emp GROUP BY empno HAVING COUNT(empno) > 1);
    
    14 rows deleted.
    
    Elapsed: 00:00:00.07
    
    Execution Plan
    ----------------------------------------------------------
    Plan hash value: 319293636
    
    ------------------------------------------------------------------------------------
    | Id  | Operation               | Name     | Rows  | Bytes | Cost (%CPU)| Time     |
    ------------------------------------------------------------------------------------
    |   0 | DELETE STATEMENT        |          |     1 |    41 |     9  (34)| 00:00:01 |
    |   1 |  DELETE                 | EMP      |       |       |            |          |
    |*  2 |   HASH JOIN ANTI        |          |     1 |    41 |     9  (34)| 00:00:01 |
    |*  3 |    HASH JOIN SEMI       |          |     1 |    29 |     6  (34)| 00:00:01 |
    |   4 |     TABLE ACCESS FULL   | EMP      |    14 |   224 |     2   (0)| 00:00:01 |
    |   5 |     VIEW                | VW_NSO_1 |     1 |    13 |     3  (34)| 00:00:01 |
    |*  6 |      FILTER             |          |       |       |            |          |
    |   7 |       SORT GROUP BY     |          |     1 |     4 |     3  (34)| 00:00:01 |
    |   8 |        TABLE ACCESS FULL| EMP      |    14 |    56 |     2   (0)| 00:00:01 |
    |   9 |    VIEW                 | VW_NSO_2 |     1 |    12 |     3  (34)| 00:00:01 |
    |* 10 |     FILTER              |          |       |       |            |          |
    |  11 |      SORT GROUP BY      |          |     1 |    16 |     3  (34)| 00:00:01 |
    |  12 |       TABLE ACCESS FULL | EMP      |    14 |   224 |     2   (0)| 00:00:01 |
    ------------------------------------------------------------------------------------
    
    Predicate Information (identified by operation id):
    ---------------------------------------------------
    
       2 - access(ROWID="$nso_col_1")
       3 - access("EMPNO"="$nso_col_1")
       6 - filter(COUNT("EMPNO")>1)
      10 - filter(COUNT("EMPNO")>1)
    
    
    Statistics
    ----------------------------------------------------------
              1  recursive calls
             15  db block gets
          16254  consistent gets
              0  physical reads
           4256  redo size
            925  bytes sent via SQL*Net to client
           1116  bytes received via SQL*Net from client
              6  SQL*Net roundtrips to/from client
              3  sorts (memory)
              0  sorts (disk)
             14  rows processed
             
    --方法二、使用分析函数(只保留ROWID最小的记录):
    23:17:57 SQL> rollback;
    
    Rollback complete.
    
    Elapsed: 00:00:00.02
    23:19:11 SQL> DELETE FROM emp
    23:22:22   2  WHERE  ROWID IN (SELECT ROWID
    23:22:28   3                   FROM   (SELECT ROWID row_id,
    23:22:32   4                                  row_number() over(PARTITION BY empno ORDER BY ROWID ASC) row_num
    23:22:37   5                           FROM   emp e) v
    23:22:41   6                   WHERE  v.row_num >= 2);
    
    14 rows deleted.
    
    Elapsed: 00:00:00.04
    
    Execution Plan
    ----------------------------------------------------------
    Plan hash value: 3671588261
    
    -----------------------------------------------------------------------------------------
    | Id  | Operation                    | Name     | Rows  | Bytes | Cost (%CPU)| Time     |
    -----------------------------------------------------------------------------------------
    |   0 | DELETE STATEMENT             |          |     1 |    24 |     5  (40)| 00:00:01 |
    |   1 |  DELETE                      | EMP      |       |       |            |          |
    |   2 |   NESTED LOOPS               |          |     1 |    24 |     5  (40)| 00:00:01 |
    |   3 |    VIEW                      | VW_NSO_1 |    14 |   168 |     3  (34)| 00:00:01 |
    |   4 |     SORT UNIQUE              |          |     1 |   350 |            |          |
    |*  5 |      VIEW                    |          |    14 |   350 |     3  (34)| 00:00:01 |
    |   6 |       WINDOW SORT            |          |    14 |   224 |     3  (34)| 00:00:01 |
    |   7 |        TABLE ACCESS FULL     | EMP      |    14 |   224 |     2   (0)| 00:00:01 |
    |   8 |    TABLE ACCESS BY USER ROWID| EMP      |     1 |    12 |     1   (0)| 00:00:01 |
    -----------------------------------------------------------------------------------------
    
    Predicate Information (identified by operation id):
    ---------------------------------------------------
    
       5 - filter("V"."ROW_NUM">=2)
    
    
    Statistics
    ----------------------------------------------------------
              1  recursive calls
             18  db block gets
           5434  consistent gets
              0  physical reads
           4448  redo size
            925  bytes sent via SQL*Net to client
           1190  bytes received via SQL*Net from client
              6  SQL*Net roundtrips to/from client
              3  sorts (memory)
              0  sorts (disk)
             14  rows processed
    
    --方法三、
    23:22:45 SQL> DELETE FROM emp p WHERE ROWID NOT IN(SELECT MAX(ROWID) FROM emp i WHERE i.empno = p.empno);
    
    0 rows deleted.
    
    Elapsed: 00:00:00.13
    
    Execution Plan
    ----------------------------------------------------------
    Plan hash value: 1987023575
    
    ---------------------------------------------------------------------------------
    | Id  | Operation             | Name    | Rows  | Bytes | Cost (%CPU)| Time     |
    ---------------------------------------------------------------------------------
    |   0 | DELETE STATEMENT      |         |    13 |   533 |     6  (34)| 00:00:01 |
    |   1 |  DELETE               | EMP     |       |       |            |          |
    |*  2 |   HASH JOIN           |         |    13 |   533 |     6  (34)| 00:00:01 |
    |   3 |    VIEW               | VW_SQ_1 |    14 |   350 |     3  (34)| 00:00:01 |
    |   4 |     SORT GROUP BY     |         |    14 |   224 |     3  (34)| 00:00:01 |
    |   5 |      TABLE ACCESS FULL| EMP     |    14 |   224 |     2   (0)| 00:00:01 |
    |   6 |    TABLE ACCESS FULL  | EMP     |    14 |   224 |     2   (0)| 00:00:01 |
    ---------------------------------------------------------------------------------
    
    Predicate Information (identified by operation id):
    ---------------------------------------------------
    
       2 - access("EMPNO"="P"."EMPNO")
           filter(ROWID<>"VW_COL_1")
    
    
    Statistics
    ----------------------------------------------------------
            168  recursive calls
              0  db block gets
          10954  consistent gets
              0  physical reads
              0  redo size
            925  bytes sent via SQL*Net to client
           1008  bytes received via SQL*Net from client
              6  SQL*Net roundtrips to/from client
              2  sorts (memory)
              0  sorts (disk)
              0  rows processed
              
    --方法四、和方法三是类似的:
    Elapsed: 00:00:00.03
    23:28:25 SQL> DELETE FROM emp p WHERE ROWID < (SELECT MAX(ROWID) FROM emp i WHERE i.empno = p.empno);
    
    14 rows deleted.
    
    Elapsed: 00:00:00.06
    
    Execution Plan
    ----------------------------------------------------------
    Plan hash value: 1987023575
    
    ---------------------------------------------------------------------------------
    | Id  | Operation             | Name    | Rows  | Bytes | Cost (%CPU)| Time     |
    ---------------------------------------------------------------------------------
    |   0 | DELETE STATEMENT      |         |     1 |    41 |     6  (34)| 00:00:01 |
    |   1 |  DELETE               | EMP     |       |       |            |          |
    |*  2 |   HASH JOIN           |         |     1 |    41 |     6  (34)| 00:00:01 |
    |   3 |    VIEW               | VW_SQ_1 |    14 |   350 |     3  (34)| 00:00:01 |
    |   4 |     SORT GROUP BY     |         |    14 |   224 |     3  (34)| 00:00:01 |
    |   5 |      TABLE ACCESS FULL| EMP     |    14 |   224 |     2   (0)| 00:00:01 |
    |   6 |    TABLE ACCESS FULL  | EMP     |    14 |   224 |     2   (0)| 00:00:01 |
    ---------------------------------------------------------------------------------
    
    --方法五、使用group by,同样类似三四
    23:30:01 SQL> DELETE FROM emp p WHERE ROWID NOT IN (SELECT MAX(ROWID) FROM emp i GROUP BY i.empno);
    
    14 rows deleted.
    
    Elapsed: 00:00:00.06
    
    Execution Plan
    ----------------------------------------------------------
    Plan hash value: 3239269824
    
    ----------------------------------------------------------------------------------
    | Id  | Operation             | Name     | Rows  | Bytes | Cost (%CPU)| Time     |
    ----------------------------------------------------------------------------------
    |   0 | DELETE STATEMENT      |          |     1 |    24 |     6  (34)| 00:00:01 |
    |   1 |  DELETE               | EMP      |       |       |            |          |
    |*  2 |   HASH JOIN ANTI      |          |     1 |    24 |     6  (34)| 00:00:01 |
    |   3 |    TABLE ACCESS FULL  | EMP      |    14 |   168 |     2   (0)| 00:00:01 |
    |   4 |    VIEW               | VW_NSO_1 |    14 |   168 |     3  (34)| 00:00:01 |
    |   5 |     SORT GROUP BY     |          |    14 |   224 |     3  (34)| 00:00:01 |
    |   6 |      TABLE ACCESS FULL| EMP      |    14 |   224 |     2   (0)| 00:00:01 |
    ----------------------------------------------------------------------------------
    
    Predicate Information (identified by operation id):
    ---------------------------------------------------
    
       2 - access(ROWID="$nso_col_1")
    
    
    Statistics
    ----------------------------------------------------------
              0  recursive calls
             17  db block gets
          10836  consistent gets
              0  physical reads
           4324  redo size
            926  bytes sent via SQL*Net to client
           1002  bytes received via SQL*Net from client
              6  SQL*Net roundtrips to/from client
              2  sorts (memory)
              0  sorts (disk)
             14  rows processed
    
    --总结,还是自己想出来的方法二效率高,:)



  • 相关阅读:
    关于网购心态
    c++ In STL maps, is it better to use map::insert than []? Stack Overflow
    小工具:sshcopyid_老王的技术手册 ( 我的新博客:http://huoding.com )_百度空间
    djangoqbe
    C++ STL map的使用
    容器find_if函数定义和其第三个参数重载的疑问
    ArchLinux的安装与配置
    使用Grub进行Linux的硬盘安装与修复
    MySQL数据类型简介
    ArchLinux下Alsa的简单配置
  • 原文地址:https://www.cnblogs.com/pangblog/p/3268687.html
Copyright © 2011-2022 走看看