一、建立需要查询的表
CREATE TABLE Graduates (name VARCHAR(16) PRIMARY KEY, income INTEGER NOT NULL); -- 桑普森是个离群值,会拉高平均数 INSERT INTO Graduates VALUES('桑普森', 400000); INSERT INTO Graduates VALUES('迈克', 30000); INSERT INTO Graduates VALUES('怀特', 20000); INSERT INTO Graduates VALUES('阿诺德', 20000); INSERT INTO Graduates VALUES('史密斯', 20000); INSERT INTO Graduates VALUES('劳伦斯', 15000); INSERT INTO Graduates VALUES('哈德逊', 15000); INSERT INTO Graduates VALUES('肯特', 10000); INSERT INTO Graduates VALUES('贝克', 10000); INSERT INTO Graduates VALUES('斯科特', 10000);
1、求众数
(1)在having子句中用包含谓词all 的子查询
SELECT income, COUNT(*) AS cnt
FROM Graduates
GROUP BY income HAVING COUNT(*) >= ALL ( SELECT COUNT(*) FROM Graduates GROUP BY income);
缺点:谓词all 用于null和 空集时会出现问题
(2)在having子句中 用 包含极值函数的子查询
select income ,count(*) as cnt
from graduates
group by income
having cnt >= (select max(cnt)
from (select count(*) as cnt
from graduates
group by income) as tmp));
2、求中位数
用having子句进行自连接求中位数
第一步-- 将集合里的元素按照大小分为上半部分、下班部分 两个子集,求其交集(无论聚合数据的数目是 奇数 偶数)
select t1.income
from gradutes t1 , gradutes t2
group by t1.income
having sum(case when t2.income >=t1.income then 1 else 0 end) >= count(*)/2
and sum(case when t2.income <=t1.income then 1 else 0 end) >= count(*)/2;
第二步 -- 将上下部分集合求得的交集,去重,然后求平均,得到中值
select avg(distinct income)
from ( select t1.income
from gradutes t1,gradutes t2
group by t1.income
having sum(case when t2.income >= t1.income then 1 else 0) >= count(*)/2
and sum (case when t2.incomme <= t1.income then 1 else 0 ) >= count(*)/2) as tmp