zoukankan      html  css  js  c++  java
  • SQL进阶系列之5外连接的用法

    写在前面

    SQL本身是作为一种数据提取工具而出现,使用SQL生成各种定制化报表和非定制化报表并非SQL原本用途的功能,但这并不意味着SQL无法实现这些功能。

    用外连接进行行列转换(1)(行 → 列):制作交叉表

    -- 建表语句
    /* 用外连接进行行列转换(1)(行→列):制作交叉表 */
    CREATE TABLE Courses
    (name   VARCHAR(32), 
     course VARCHAR(32), 
     PRIMARY KEY(name, course));
    
    INSERT INTO Courses VALUES('赤井', 'SQL入门');
    INSERT INTO Courses VALUES('赤井', 'UNIX基础');
    INSERT INTO Courses VALUES('铃木', 'SQL入门');
    INSERT INTO Courses VALUES('工藤', 'SQL入门');
    INSERT INTO Courses VALUES('工藤', 'Java中级');
    INSERT INTO Courses VALUES('吉田', 'UNIX基础');
    INSERT INTO Courses VALUES('渡边', 'SQL入门');
    
    -- 水平展开求交叉表(1):使用外连接
    SELECT C0.name,
    CASE WHEN C1.name IS NOT NULL THEN '○' ELSE NULL END AS “SQL入门",
    CASE WHEN C2.name IS NOT NULL THEN '○' ELSE NULL END AS "UNIX基础",
    CASE WHEN C3.name IS NOT NULL THEN '○' ELSE NULL END AS "Java中级"
    FROM (SELECT DISTINCT name FROM Courses) AS C0
    LEFT JOIN (SELECT name FROM Courses WHERE course = 'SQL入门') AS C1 ON C0.name = C1.name
    LEFT JOIN (SELECT name FROM Courses WHERE course = 'UNIX基础') AS C2 ON C0.name = C2.name
    LEFT JOIN (SELECT name FROM Courses WHERE course = 'Java中级') AS C3 ON C0.name = C3.name;
    -- 评价:方法直观并容易理解,但大量用到内嵌视图和连接操作,随着列数增加,代码会臃肿,性能也会恶化
    
    -- 水平展开求交叉表(2):使用标量子查询
    SELECT C0.name,
    (SELECT '○' FROM Courses AS C1 WHERE course = 'SQL入门' AND C1.name = C0.name) AS "SQL入门",
    (SELECT '○' FROM Courses AS C2 WHERE course = 'UNIX基础' AND C2.name = C0.name) AS "UNIX基础",
    (SELECT '○' FROM Courses AS C3 WHERE course = 'Java中级' AND C3.name = C0.name) AS "Java中级"
    FROM (SELECT DISTINCT name FROM Courses) AS C0;
    -- 在增加课程时,只需要修改SELECT子句后面的内容,代码修改比较简单,应对需求变更比较理想,缺点是性能不好,在SELECT子句中使用标量子查询和关联子查询时,计算开销较大
    
    -- 水平展开求交叉表(3):嵌套使用CASE WHEN表达式
    SELECT name,
    CASE WHEN SUM(CASE WHEN course = 'SQL入门' THEN 1 ELSE 0 END) = 1 THEN '○' ELSE NULL END AS "SQL入门",
    CASE WHEN SUM(CASE WHEN course = 'UNIX基础' THEN 1 ELSE 0 END) = 1 THEN '○' ELSE NULL END AS "UNIX基础",
    CASE WHEN SUM(CASE WHEN course = 'Java中级' THEN 1 ELSE 0 END) = 1 THEN '○' ELSE NULL END AS "Java中级"
    FROM Courses
    GROUP BY name;
    

    用外连接进行行列转换(2)(列 → 行):汇总重复项于一列

    -- 建表语句
    /* 用外连接进行行列转换(2)(列→行):汇总重复项于一列 */
    CREATE TABLE Personnel
     (employee   varchar(32), 
      child_1    varchar(32), 
      child_2    varchar(32), 
      child_3    varchar(32), 
      PRIMARY KEY(employee));
    
    INSERT INTO Personnel VALUES('赤井', '一郎', '二郎', '三郎');
    INSERT INTO Personnel VALUES('工藤', '春子', '夏子', NULL);
    INSERT INTO Personnel VALUES('铃木', '夏子', NULL,   NULL);
    INSERT INTO Personnel VALUES('吉田', NULL,   NULL,   NULL);
    
    -- 列数据转行数据(不去除空行)
    SELECT employee,child_1 FROM Personnel
    UNION ALL
    SELECT employee,child_2 FROM Personnel
    UNION ALL
    SELECT employee,child_3 FROM Personnel;
    
    -- 列数据转行数据(去除空行)
    SELECT employee,child_1 FROM Personnel WHERE child_1 IS NOT NULL
    UNION ALL
    SELECT employee,child_2 FROM Personnel WHERE child_2 IS NOT NULL
    UNION ALL
    SELECT employee,child_3 FROM Personnel WHERE child_3 IS NOT NULL;
    
    -- 按需自定义保留记录(不完全去除重复行)
    
    -- 准备所有孩子的视图
    CREATE VIEW Children(child) AS 
    SELECT child_1 FROM Personnel
    UNION SELECT child_2 FROM Personnel
    UNION SELECT child_3 FROM Personnel;
    
    -- 获取员工子女列表的SQL语句(没有孩子的员工也要输出)
    SELECT Personnel.employee,Children.child
    FROM Personnel 
    LEFT JOIN Children 
    ON Children.child IN (Personnel.child_1,Personnel.child_2,Personnel.child_3);
    

    在交叉表里制作嵌套式表侧栏

    /* 在交叉表里制作嵌套式表侧栏 */
    CREATE TABLE TblSex
    (sex_cd   char(1), 
     sex varchar(5), 
     PRIMARY KEY(sex_cd));
    
    CREATE TABLE TblAge 
    (age_class char(1), 
     age_range varchar(30), 
     PRIMARY KEY(age_class));
    
    CREATE TABLE TblPop 
    (pref_name  varchar(30), 
     age_class  char(1), 
     sex_cd     char(1), 
     population integer, 
     PRIMARY KEY(pref_name, age_class,sex_cd));
    
    INSERT INTO TblSex (sex_cd, sex ) VALUES('m',	'男');
    INSERT INTO TblSex (sex_cd, sex ) VALUES('f',	'女');
    
    INSERT INTO TblAge (age_class, age_range ) VALUES('1',	'21岁~30岁');
    INSERT INTO TblAge (age_class, age_range ) VALUES('2',	'31岁~40岁');
    INSERT INTO TblAge (age_class, age_range ) VALUES('3',	'41岁~50岁');
    
    INSERT INTO TblPop VALUES('秋田', '1', 'm', 400 );
    INSERT INTO TblPop VALUES('秋田', '3', 'm', 1000 );
    INSERT INTO TblPop VALUES('秋田', '1', 'f', 800 );
    INSERT INTO TblPop VALUES('秋田', '3', 'f', 1000 );
    INSERT INTO TblPop VALUES('青森', '1', 'm', 700 );
    INSERT INTO TblPop VALUES('青森', '1', 'f', 500 );
    INSERT INTO TblPop VALUES('青森', '3', 'f', 800 );
    INSERT INTO TblPop VALUES('东京', '1', 'm', 900 );
    INSERT INTO TblPop VALUES('东京', '1', 'f', 1500 );
    INSERT INTO TblPop VALUES('东京', '3', 'f', 1200 );
    INSERT INTO TblPop VALUES('千叶', '1', 'm', 900 );
    INSERT INTO TblPop VALUES('千叶', '1', 'f', 1000 );
    INSERT INTO TblPop VALUES('千叶', '3', 'f', 900 );
    
    -- 构造侧边栏
    SELECT age_class,age_range,sex_cd,sex 
    FROM TblAge CROSS JOIN TblSex;
    -- 处理TblPop表
    SELECT age_class,sex_cd,
    SUM(CASE WHEN pref_name IN ('秋田','青森') THEN population ELSE 0 END) AS "东北",
    SUM(CASE WHEN pref_name IN ('东京','千叶') THEN population ELSE 0 END) AS "关东"
    FROM TblPop
    GROUP BY age_class,sex_cd;
    -- 连接两张表
    SELECT A.age_range,A.sex,B.东北,B.关东 
    FROM (SELECT age_class,age_range,sex_cd,sex FROM TblAge CROSS JOIN TblSex) AS A 
    LEFT JOIN (SELECT age_class,sex_cd,
    SUM(CASE WHEN pref_name IN ('秋田','青森') THEN population ELSE 0 END) AS "东北",
    SUM(CASE WHEN pref_name IN ('东京','千叶') THEN population ELSE 0 END) AS "关东"
    FROM TblPop
    GROUP BY age_class,sex_cd) AS B
    ON A.age_class = B.age_class and A.sex_cd = B.sex_cd;
    

    作为乘法运算的连接

    -- 建表语句
    /* 作为乘法运算的连接 */
    CREATE TABLE Items
     (item_no INTEGER PRIMARY KEY,
      item    VARCHAR(32) NOT NULL);
    
    INSERT INTO Items VALUES(10, 'FD');
    INSERT INTO Items VALUES(20, 'CD-R');
    INSERT INTO Items VALUES(30, 'MO');
    INSERT INTO Items VALUES(40, 'DVD');
    
    CREATE TABLE SalesHistory
     (sale_date DATE NOT NULL,
      item_no   INTEGER NOT NULL,
      quantity  INTEGER NOT NULL,
      PRIMARY KEY(sale_date, item_no));
    
    INSERT INTO SalesHistory VALUES('2007-10-01',  10,  4);
    INSERT INTO SalesHistory VALUES('2007-10-01',  20, 10);
    INSERT INTO SalesHistory VALUES('2007-10-01',  30,  3);
    INSERT INTO SalesHistory VALUES('2007-10-03',  10, 32);
    INSERT INTO SalesHistory VALUES('2007-10-03',  30, 12);
    INSERT INTO SalesHistory VALUES('2007-10-04',  20, 22);
    INSERT INTO SalesHistory VALUES('2007-10-04',  30,  7);
    
    -- 要求得到所有item即便没有销售记录的quantity求和
    -- 方法一:连接前聚合,然后一对一进行连接
    SELECT Items.item_no,SH.quantity
    FROM Items LEFT JOIN (
    SELECT item_no,sum(quantity) as quantity
    FROM SalesHistory
    GROUP BY item_no) AS SH
    ON Items.item_no = SH.item_no;
    -- 评价:从性能角度讲,通过聚合将SH上的item_no变成了非空不重复,但SH本身不存在主键索引,无法利用查询优化
    
    -- 方法二:一对多进行连接不会增加行
    SELECT Items.item_no,sum(SH.quantity) AS quantity
    FROM Items 
    LEFT JOIN SalesHistory AS SH
    ON Items.item_no = SH.item_no
    GROUP BY Items.item_no;
    -- 评价:这种方法没有使用临时视图,性能会有所改善
    

    全外连接

    面向集合的角度理解连接类型

    • 左外连接 LEFT OUTER JOIN
    • 右外连接 RIGHT OUTER JOIN
    • 全外连接 FULL OUTER JOIN
    /* 全外连接 */
    CREATE TABLE Class_A
    (id char(1), 
     name varchar(30), 
     PRIMARY KEY(id));
    
    CREATE TABLE Class_B
    (id   char(1), 
     name varchar(30), 
     PRIMARY KEY(id));
    
    INSERT INTO Class_A (id, name) VALUES('1', '田中');
    INSERT INTO Class_A (id, name) VALUES('2', '铃木');
    INSERT INTO Class_A (id, name) VALUES('3', '伊集院');
    
    INSERT INTO Class_B (id, name) VALUES('1', '田中');
    INSERT INTO Class_B (id, name) VALUES('2', '铃木');
    INSERT INTO Class_B (id, name) VALUES('4', '西园寺');
    
    -- 全外连接保留全部信息
    SELECT 
    COALESCE(A.id,B.id) AS id,A.name AS a_name,B.name AS b_name
    FROM Class_A AS a FULL JOIN Class_B AS b
    ON A.id = B.id;
    
    -- 数据库不支持全外连接时的替代方案
    SELECT A.id AS id,A.name,B.name FROM Class_A AS A LEFT JOIN Class_B AS B ON A.id = B.id
    UNION
    SELECT B.id AS id,A.name,B.name FROM Class_A AS A RIGHT JOIN Class_B AS B ON A.id = B.id;
    

    INNER JOIN(或者INTERSECT)相当于交集,UNION(LEFT JOIN AND RIGHT JOIN ,再或者FULL JOIN)相当于并集

    用外连接进行集合运算

    用外连接求差集:A-B

    SELECT A.id,A.name AS A_name,B.name AS B_name FROM Class_A AS A LEFT JOIN Class_B AS B ON A.id = B.id WHERE B.name IS NULL;
    

    用外连接求差集:B-A

    SELECT A.id,A.name AS A_name,B.name AS B_name FROM Class_A AS A RIGHT JOIN Class_B AS B ON A.id = B.id WHERE A.name IS NULL;
    

    用全外连接求异或集

    SELECT COALESCE(A.id,B.id) AS id,COALESCE(A.name,B.name) AS name FROM Class_A AS A FULL JOIN Class_B AS B ON A.id = B.id WHERE A.name IS NULL OR B.name IS NULL;
    

    本节小结

    • SQL不是用来生成报表的语言,不建议用齐进行格式转换
    • 必要时可以考虑外连接和CASE表达式来解决问题
    • 生成嵌套表侧栏时,如果先生成主表的笛卡尔积再进行连接,很容易就可以完成
    • 从行数来看,表连接可以看成乘法。因此,当表之间是一对多的关系时,连接后行数不会增加
    • 外连接的思想和集合运算很像,使用外连接可以实现各种集合运算

    练习题

    -- 练习题 1-5-1 :先连接还是先聚合
    SELECT master.age_range,master.sex,
    SUM(CASE WHEN data.pref_name IN ('青森','秋田') THEN population ELSE NULL END) AS "东北",
    SUM(CASE WHEN data.pref_name IN ('东京','千叶') THEN population ELSE NULL END) AS "关东"
    FROM (TblAge CROSS JOIN TblSex) AS master 
    LEFT JOIN TblPop AS data 
    ON master.age_class = data.age_class
    AND master.sex_cd = data.sex_cd
    GROUP BY master.age_range,master.sex;
    
    -- 练习题 1-5-2:请留意孩子的人数
    SELECT Personnel.employee,COUNT(Children.child)
    FROM Personnel 
    LEFT JOIN Children 
    ON Children.child IN (Personnel.child_1,Personnel.child_2,Personnel.child_3)
    GROUP BY Personnel.employee;
    
    -- 练习题 1-5-3 全外连接和merge运算符
    MERGE INTO Class_A A
        USING (SELECT *
                 FROM Class_B ) B
          ON (A.id = B.id)
        WHEN MATCHED THEN
            UPDATE SET A.name = B.name
        WHEN NOT MATCHED THEN
            INSERT (id, name) VALUES (B.id, B.name);
    
  • 相关阅读:
    初学Python,对于开发工具不是很了解?一文带你选择适合你的开发工具
    Python文学家为Python写的一首词?(附中英文版)
    大数据到底怎么学: 数据科学概论与大数据学习误区
    Python写代码的时候为什么要注释?Sun因此被Oracle收购
    大数据分析:大数据时代如何发现身边的大数据?
    大数据经典学习路线(及供参考)之 一
    关于如何获取移动端 touchmove 事件中真正触摸点下方的元素
    webservice
    VS文件后缀名大全详解
    string 转 char* (C#)
  • 原文地址:https://www.cnblogs.com/evian-jeff/p/11544220.html
Copyright © 2011-2022 走看看