全表对比增量抽取
1 概述
在做数据仓库系统时,增量数据抽取是必不可少的环节,目前实现增量数据的方式有很多,如:时间戳、触发器,CDC,归档日志,全表删除后全量抽取以及全表对比增量抽取等,本次案例只对全表对比增量抽取讲解。
2 创建源表和目标表
-- 新建源表
create table increasea
(
ID VARCHAR2(30) NOT NULL,
DATEID VARCHAR2(50),
DEPARTMENTID VARCHAR2(50),
EMPID VARCHAR2(50),
EMPNAME VARCHAR2(100),
SALARY NUMBER(18,2),
STATUS VARCHAR2(50)
);
ALTER TABLE increasea ADD CONSTRAINT PKINCREASE PRIMARY KEY (ID);
-- 新建目标表
create table increaseb
(
ID VARCHAR2(30) NOT NULL,
DATEID VARCHAR2(50),
DEPARTMENTID VARCHAR2(50),
EMPID VARCHAR2(50),
EMPNAME VARCHAR2(100),
SALARY NUMBER(18,2),
STATUS VARCHAR2(50)
);
ALTER TABLE increaseb ADD CONSTRAINT PKINCREASEB PRIMARY KEY (ID);
3 监控源表中新增、修改、删除的数据
-- 本条SQL可以监控源表中的新增和修改的数据
SELECT * FROM INCREASEA MINUS SELECT * FROM INCREASEB;
-- 本条SQL可以监控源表中删除的数据
SELECT * FROM INCREASEB MINUS SELECT * FROM INCREASEA;
4 存储过程实现增量抽取
CREATE OR REPLACE PROCEDURE INCREASEETL AS
BEGIN
-- 找出所有新增、修改的数据,然后根据ID主键删除目标表中未修改的数据,之后插入所有新增、修改的数据
DELETE FROM INCREASEB
WHERE ID IN
(SELECT ID
FROM (SELECT * FROM INCREASEA MINUS SELECT * FROM INCERASEB) a);
COMMIT;
INSERT INTO INCREASEB
SELECT *
FROM (SELECT * FROM INCREASEA MINUS SELECT * FROM INCREASEB) b;
COMMIT;
-- 找出所有源表中删除的数据,删除目标表中相关删除的数据
DELETE FROM INCREASEB
WHERE ID IN
(SELECT ID
FROM (SELECT * FROM INCREASEB MINUS SELECT * FROM INCREASEA) c);
COMMIT;
END;
5 测试
1)源表新增数据
INSERT INTO INCREASEA(ID,EMPNAME) VALUES(3,'wangwu');
commit;
调用一次存储过程INCREASEETL
begin
-- Call the procedure
increaseetl;
end;
查看目标表
SELECT * FROM INCREASEB;
2)源表修改数据
UPDATE INCREASEA SET EMPNAME = 'ZHAOLIU' WHERE ID = '3';
COMMIT;
调用一次存储过程INCREASEETL
begin
-- Call the procedure
increaseetl;
end;
查看目标表
SELECT * FROM INCREASEB;
3)源表删除数据
DELETE FROM INCREASEA WHERE ID = '3';
COMMIT;
调用一次存储过程INCREASEETL
begin
-- Call the procedure
increaseetl;
end;
查看目标表
SELECT * FROM INCREASEB;
6 备注
在测试过程中,每次执行存储过程的原因是:此存储过程用于每天调度执行增量抽取,实现ETL每天定时抽取数据。
本案例适用于数据量较小的业务。