写在前面
HAVING子句的处理对象是集合而不是记录
各队,全队点名
--各队,全体点名!
CREATE TABLE Teams
(member CHAR(12) NOT NULL PRIMARY KEY,
team_id INTEGER NOT NULL,
status CHAR(8) NOT NULL);
INSERT INTO Teams VALUES('乔', 1, '待命');
INSERT INTO Teams VALUES('肯', 1, '出勤中');
INSERT INTO Teams VALUES('米克', 1, '待命');
INSERT INTO Teams VALUES('卡伦', 2, '出勤中');
INSERT INTO Teams VALUES('凯斯', 2, '休息');
INSERT INTO Teams VALUES('简', 3, '待命');
INSERT INTO Teams VALUES('哈特', 3, '待命');
INSERT INTO Teams VALUES('迪克', 3, '待命');
INSERT INTO Teams VALUES('贝斯', 4, '待命');
INSERT INTO Teams VALUES('阿伦', 5, '出勤中');
INSERT INTO Teams VALUES('罗伯特', 5, '休息');
INSERT INTO Teams VALUES('卡根', 5, '待命');
-- 找出全队全员处于待命状态的队伍
-- NOT EXISTS法
SELECT team_id,member FROM Teams AS T1
WHERE NOT EXISTS (SELECT * FROM Teams AS T2 WHERE T1.team_id = T2.team_id AND status <> '待命' );
-- HAVING方法
SELECT team_id FROM Teams GROUP BY team_id HAVING COUNT(*) = SUM(CASE WHEN status = '待命' THEN 1 ELSE 0 END);
-- 变通方法1
SELECT team_id FROM Teams GROUP BY team_id HAVING MAX(status) = '待命' AND MIN(status) ='待命';
-- 变通方法2
SELECT team_id, CASE WHEN MAX(status) = '待命' AND MIN(status) = '待命' THEN '全都在待命' ELSE '队长!人手不够' END AS status
FROM Teams
GROUP BY team_id;
单重集合与多重集合
--单重集合与多重集合
CREATE TABLE Materials
(center CHAR(12) NOT NULL,
receive_date DATE NOT NULL,
material CHAR(12) NOT NULL,
PRIMARY KEY(center, receive_date));
INSERT INTO Materials VALUES('东京' ,'2007-4-01', '锡');
INSERT INTO Materials VALUES('东京' ,'2007-4-12', '锌');
INSERT INTO Materials VALUES('东京' ,'2007-5-17', '铝');
INSERT INTO Materials VALUES('东京' ,'2007-5-20', '锌');
INSERT INTO Materials VALUES('大阪' ,'2007-4-20', '铜');
INSERT INTO Materials VALUES('大阪' ,'2007-4-22', '镍');
INSERT INTO Materials VALUES('大阪' ,'2007-4-29', '铅');
INSERT INTO Materials VALUES('名古屋', '2007-3-15', '钛');
INSERT INTO Materials VALUES('名古屋', '2007-4-01', '钢');
INSERT INTO Materials VALUES('名古屋', '2007-4-24', '钢');
INSERT INTO Materials VALUES('名古屋', '2007-5-02', '镁');
INSERT INTO Materials VALUES('名古屋', '2007-5-10', '钛');
INSERT INTO Materials VALUES('福冈' ,'2007-5-10', '锌');
INSERT INTO Materials VALUES('福冈' ,'2007-5-28', '锡');
-- 找出原料出现重复的产地
SELECT center
FROM Materials
GROUP BY center
HAVING COUNT(*) <> COUNT(DISTINCT material);
-- 另一种写法
SELECT center,CASE WHEN COUNT(*) <> COUNT(DISTINCT material) THEN '存在重复' ELSE '不存在重复' END AS status
FROM Materials
GROUP BY center;
-- EXISTS写法
SELECT center,material
FROM Materials AS M1
WHERE EXISTS (SELECT * FROM Materials AS M2 WHERE M1.center = M2.center AND M1.receive_date <> M2.receive_date AND M1.material = M2.material);
寻找缺失的编号:升级版
--寻找缺失的编号:升级版
CREATE TABLE SeqTbl
( seq INTEGER NOT NULL PRIMARY KEY);
--不存在缺失编号(起始值=1)
DELETE FROM SeqTbl;
INSERT INTO SeqTbl VALUES(1);
INSERT INTO SeqTbl VALUES(2);
INSERT INTO SeqTbl VALUES(3);
INSERT INTO SeqTbl VALUES(4);
INSERT INTO SeqTbl VALUES(5);
-- 如果有查询结果,说明存在缺失的编号
SELECT '存在缺失的编号' AS gap FROM SeqTbl HAVING COUNT(*) <> MAX(seq) --只适用于从1开始的序列
-- 如果有查询结果,说明存在缺失的编号:只调查数列的连续性
SELECT '存在缺失的编号' AS gap FROM SeqTbl HAVING COUNT(*) <> MAX(seq) - MIN(seq) + 1;
-- 不论是否存在都返回一行结果
SELECT CASE WHEN COUNT(*) = 0 THEN '空表'
WHEN COUNT(*) = MAX(seq) - MIN(seq) + 1 THEN '不存在缺失编号'
ELSE '存在缺失编号' END AS gap
FROM SeqTbl;
-- 查找最小的缺失编号
SELECT CASE WHEN COUNT(*) = 0 OR MIN(seq) > 1 THEN 1
ELSE (SELECT MIN(seq+1) FROM SeqTbl AS S1 WHERE NOT EXISTS (SELECT * FROM SeqTbl AS S2 WHERE S2.seq = S1.seq + 1)) END
FROM SeqTbl;
为集合设置详细的条件
--为集合设置详细的条件
CREATE TABLE TestResults
(student CHAR(12) NOT NULL PRIMARY KEY,
class CHAR(1) NOT NULL,
sex CHAR(1) NOT NULL,
score INTEGER NOT NULL);
INSERT INTO TestResults VALUES('001', 'A', '男', 100);
INSERT INTO TestResults VALUES('002', 'A', '女', 100);
INSERT INTO TestResults VALUES('003', 'A', '女', 49);
INSERT INTO TestResults VALUES('004', 'A', '男', 30);
INSERT INTO TestResults VALUES('005', 'B', '女', 100);
INSERT INTO TestResults VALUES('006', 'B', '男', 92);
INSERT INTO TestResults VALUES('007', 'B', '男', 80);
INSERT INTO TestResults VALUES('008', 'B', '男', 80);
INSERT INTO TestResults VALUES('009', 'B', '女', 10);
INSERT INTO TestResults VALUES('010', 'C', '男', 92);
INSERT INTO TestResults VALUES('011', 'C', '男', 80);
INSERT INTO TestResults VALUES('012', 'C', '女', 21);
INSERT INTO TestResults VALUES('013', 'D', '女', 100);
INSERT INTO TestResults VALUES('014', 'D', '女', 0);
INSERT INTO TestResults VALUES('015', 'D', '女', 0);
-- 查出75%以上的学生分数都在80分以上的班级
SELECT class
FROM TestResults
GROUP BY class
HAVING COUNT(*) * 0.75 <= SUM(CASE WHEN score >= 80 THEN 1 ELSE 0 END);
-- 查出分数在50分以上的男生比女生多的班级
SELECT class
FROM TestResults
GROUP BY class
HAVING SUM(CASE WHEN score >= 50 AND sex = '男' THEN 1 ELSE 0 END) > SUM(CASE WHEN score >= 50 AND sex = '女' THEN 1 ELSE 0 END);
-- 比较男生和女生平均分的SQL语句(2):对空集求平均值后返回NULL
SELECT class
FROM TestResults
GROUP BY class
HAVING AVG(CASE WHEN sex = '男' THEN score ELSE NULL END) < AVG(CASE WHEN sex = '女' THEN score ELSE NULL END);
小结
常用调查集合性质的条件极其用途
No | 条件表达式 | 用途 |
---|---|---|
1 | COUNT(DISTINCT col)=COUNT(col) | col列没有重复值 |
2 | COUNT(*)=COUNT(col) | col列没有空值 |
3 | COUNT(*)=MAX(col) | col列没有缺失(起始是1) |
4 | COUNT(*)=MAX(col)-MIN(col)+1 | col列是连续的编号 |
5 | MIN(col)=MAX(col) | col列是相同值或NULL |
6 | MIN(col)*MAX(col)>0 | col列全是正数或全是负数 |
7 | MIN(col)*MAX(col)<0 | col列的最大值是正数,最小值是负数 |
8 | MIN(ABS(col)) | col最少有一个0 |
9 | MIN(col-常量) = -MAX(col-常量) | col列的最大值和最小值与指定常量等距 |
- SQL指定搜索条件时,最重要的是搞清楚搜索的实体是集合还是集合的元素
- 如果一个实体对应着一行数据,那么就是元素,所以使用WHERE子句
- 如果一个实体对应着多行数据,那么就是集合,所以使用HAVING子句
- HAVING子句 可以通过聚合函数针对集合指定各种条件
- 如果通过CASE表达式生成特征函数,那么无论多么复杂的条件都可以描述
- HAVING子句很强大
练习题
/* 练习题1-10-1:单重集合与多重集合的一般化 */
CREATE TABLE Materials2
(center VARCHAR(32) NOT NULL,
receive_date DATE NOT NULL,
material VARCHAR(32) NOT NULL,
orgland VARCHAR(32) NOT NULL,
PRIMARY KEY(center, receive_date, material));
INSERT INTO Materials2 VALUES('东京', '2007-04-01', '锡', '智利');
INSERT INTO Materials2 VALUES('东京', '2007-04-12', '锌', '泰国');
INSERT INTO Materials2 VALUES('东京', '2007-05-17', '铝', '巴西');
INSERT INTO Materials2 VALUES('东京', '2007-05-20', '锌', '泰国');
INSERT INTO Materials2 VALUES('大阪', '2007-04-20', '铜', '澳大利亚');
INSERT INTO Materials2 VALUES('大阪', '2007-04-22', '镍', '南非');
INSERT INTO Materials2 VALUES('大阪', '2007-04-29', '铅', '印度');
INSERT INTO Materials2 VALUES('名古屋', '2007-03-15', '钛', '玻利维亚');
INSERT INTO Materials2 VALUES('名古屋', '2007-04-01', '钢', '智利');
INSERT INTO Materials2 VALUES('名古屋', '2007-04-24', '钢', '阿根廷');
INSERT INTO Materials2 VALUES('名古屋', '2007-05-02', '镁', '智利');
INSERT INTO Materials2 VALUES('名古屋', '2007-05-10', '钛', '泰国');
INSERT INTO Materials2 VALUES('福冈', '2007-05-10', '锌', '美国');
INSERT INTO Materials2 VALUES('福冈', '2007-05-28', '锡', '俄罗斯');
/* 练习题1-10-1:单重集合与多重集合的一般化
-- 找出产品和原产地都一样的center
SELECT center
FROM Materials2 AS M1
WHERE EXISTS (SELECT * FROM Materials2 AS M2 WHERE M1.center = M2.center AND M1.receive_date <> M2.receive_date AND M1.material = M2.material AND M1.orgland = M2.orgland )
GROUP BY center;
/* 练习题1-10-1:单重集合与多重集合的一般化
选择(材料, 原产国)组合有重复的生产地 */
SELECT center
FROM Materials2
GROUP BY center
HAVING COUNT(material || orgland) <> COUNT(DISTINCT material || orgland);
/* 练习题1-10-2:多个条件的特征函数 */
CREATE TABLE TestScores
(student_id INTEGER NOT NULL,
subject VARCHAR(16) NOT NULL,
score INTEGER NOT NULL,
PRIMARY KEY (student_id, subject));
INSERT INTO TestScores VALUES(100, '数学', 100);
INSERT INTO TestScores VALUES(100, '语文', 80);
INSERT INTO TestScores VALUES(100, '理化', 80);
INSERT INTO TestScores VALUES(200, '数学', 80);
INSERT INTO TestScores VALUES(200, '语文', 95);
INSERT INTO TestScores VALUES(300, '数学', 40);
INSERT INTO TestScores VALUES(300, '语文', 50);
INSERT INTO TestScores VALUES(300, '社会', 55);
INSERT INTO TestScores VALUES(400, '数学', 80);
-- 找出数学分数在80分以上,且语文分数在50分以上的学生
SELECT student_id
FROM TestScores
WHERE subject IN ('语文','数学')
GROUP BY student_id
HAVING SUM(CASE WHEN subject = '数学' AND score > 80 THEN 1
WHEN subject = '语文' AND score > 50 THEN 1
ELSE 0 END)=2;