单表查询
一、语法顺序
select distinct 查询字段1,查询字段2,。。。 from 表名
where 分组之前的过滤条件
group by 分组依据
having 分组之后的过滤条件
order by 排序字段
limit 显示的条数;
二、执行顺序
def from(dir,file):
open('%s\%s' %(dir,file),'r')
return f
def where(f,pattern):
for line in f:
if pattern:
yield line
def group_by():
pass
def having():
pass
def distinct():
pass
def order_by():
pass
def limit():
pass
def select():
res1=from()
#在硬盘中找到表
res2=where(res1,pattern)
#拿着where指定的约束条件,去文件/表中取出一条条记录,在内存中得到一张虚拟的表, 如果没有where,默认全True
res3=group_by(res2,)
#将取出的一条条记录进行分组group by,如果没有group by,默认整体作为一组
res4=having(res3)
#将分组的结果进行having过滤,如果没有having,默认全True
res5=distinct(res4)
#去重, 如果没有distinct,默认不去重
res6=order_by(res5)
#将结果按条件排序
limit(res6)
#限制结果的显示条数
三、按照优先级的级别写SQL语句
a、先确定是哪张表 from db39.emp
b、是否有过滤条件 where name like '%i%'
。。。
z、放功能 select
四、where过滤
where字句中可以使用:
1. 比较运算符:> < >= <= <> != #不等于用 != 不用 <>
select id,name from db39.emp where id >= 3 and id <= 6
2. between 80 and 100
select * from db39.emp where id between 3 and 6; # >=3 and <=6
3. in(80,90,100) 值是80或90或100
select * from emp where salary in (20000,18000,17000); # select * from emp where salary = 20000 or salary = 18000 or salary = 17000;
4. like 'egon%', pattern可以是%或_, %表示任意多字符, _表示一个字符
select name,salary from db39.emp where name like '%i%' #要求:查询员工姓名中包含i字母的员工姓名与其薪资
select name,salary from db39.emp where name like '____'; #要求:查询员工姓名是由四个字符组成的的员工姓名与其薪资
select name,salary from db39.emp where char_length(name) = 4; #结果与上一条一致
5. 逻辑运算符:在多个条件直接可以使用逻辑运算符 and or not
select * from db39.emp where id not between 3 and 6;
select * from emp where salary not in (20000,18000,17000);
要求:查询岗位描述为空的员工名与岗位名
select name,post from db39.emp where post_comment is NULL; #针对NULL必须用is,不能用=
select name,post from db39.emp where post_comment is not NULL;
#NULL指的是不占任何存储空间,在mysql中空字符串也是占存储空间的,即不为空(NULL)
五、group by分组
如果不设置成only_full_group_by模式,分完组后用*默认取出的是组内的第一个人的数据。但分完组后单独取组内的某个元素是没有意义的,所以,分组前,一般会对模式做如下处理
#设置sql_mode为only_full_group_by,意味着以后但凡分组,只能取到分组的依据
mysql> set global sql_mode="strict_trans_tables,only_full_group_by";
#聚合函数 group function(一般与分组连用)
select post,max(salary) from emp group by post; #取不出组内的元素name, age..,只能取组名(分组依据)或用聚合函数
select post,min(salary) from emp group by post;
select post,avg(salary) from emp group by post;
select post,sum(salary) from emp group by post;
select post,count(id) from emp group by post;
#group_concat(分组之后用):把想要用的信息取出;字符串拼接操作
select post,group_concat(name) from emp group by post;
select post,group_concat(name,"_SB") from emp group by post;
select post,group_concat(name,": ",salary) from emp group by post;
select post,group_concat(salary) from emp group by post;
# 补充concat(不分组时用):字符串拼接操作
select concat("NAME: ",name) as 姓名,concat("SAL: ",salary) as 薪资 from emp;
# 补充as语法:为字段或表取别名
select name as 姓名,salary as 薪资 from emp; # as可省略
mysql> select emp.id,emp.name from emp as t1; # 报错
mysql> select t1.id,t1.name from emp as t1; # 同 mysql> select id,name from emp as t1;
# 查询四则运算
select name,salary*12 as annual_salary from emp;
#分组练习
select post,group_concat(name) from emp group by post; #查询岗位名以及岗位包含的所有员工名字
select post,count(id) from emp group by post; #查询岗位名以及各岗位内包含的员工个数
select sex,count(id) from emp group by sex; #查询公司内男员工和女员工的个数
select post,avg(salary) from emp group by post; #查询岗位名以及各岗位的平均薪资
select sex,avg(salary) from emp group by sex; #查询男员工与男员工的平均薪资,女员工与女员工的平均薪资
select post,avg(salary) from emp where age >= 30 group by post; #统计各部门年龄在30岁以上的员工平均工资
六、having过滤 (一定要用组名(分组依据)或聚合函数)
having的语法格式与where一模一样,只不过having是在分组之后进行的进一步过滤
即where不能用聚合函数,而having是可以用聚合函数,这也是他们俩最大的区别
#统计各部门年龄在30岁以上的员工平均工资,并且保留平均工资大于10000的部门
select post,avg(salary) from emp where age >= 30 group by post having avg(salary) > 10000;
#强调:having必须在group by后面使用 (不认默认分组)
select * from emp having avg(salary) > 10000; #报错
七、distinct去重 (在having之后执行,和post,name等属于同一执行级别)
select distinct post,avg(salary) from emp where age >= 30 group by post having avg(salary) > 10000;
八、order by 排序 (默认升序)
select * from emp order by salary asc; #默认升序排
select * from emp order by salary desc; #降序排
select * from emp order by age desc; #降序排
select * from emp order by age desc,salary asc; #先按照age降序排,再按照薪资升序排
# 统计各部门年龄在10岁以上的员工平均工资,并且保留平均工资大于1000的部门,然后对平均工资进行排序
select post,avg(salary) from emp where age > 10 group by post having avg(salary) > 1000 order by avg(salary);
九、limit 限制显示条数;分页
select * from emp limit 3;
select * from emp order by salary desc limit 1; #显示薪资最高人的信息
select * from emp limit 0,5; #分页, 从0开始,取5条(1-5)
select * from emp limit 5,5; #分页, 从5开始,取5条(6-10)
十、正则表达式
select * from emp where name regexp '^jin.*(n|g)$'; #调正则;正则表达式通用
多表连接查询
一、笛卡尔积
from emp,dep,dep2,...
二、内连接:把两张表有对应关系的记录连接成一张虚拟表
select * from emp inner join dep on emp.dep_id = dep.id;
#应用:
select * from emp,dep where emp.dep_id = dep.id and dep.name = "技术"; # 不推荐;不要用where做连表的活
select * from emp inner join dep on emp.dep_id = dep.id where dep.name = "技术"; #逻辑与上一条一致
三、左连接:在内连接的基础上,保留左边没有对应关系的记录
select * from emp left join dep on emp.dep_id = dep.id;
四、右连接:在内连接的基础上,保留右边没有对应关系的记录
select * from emp right join dep on emp.dep_id = dep.id;
五、全连接:在内连接的基础上,保留左、右边没有对应关系的记录
select * from emp left join dep on emp.dep_id = dep.id
union #去重
select * from emp right join dep on emp.dep_id = dep.id;
六、多表连接可以是单表不断地与虚拟表连接
#查找各部门最高工资
select t1.* from emp as t1
inner join
(select post,max(salary) as ms from emp group by post) as t2 #把虚拟表提成t2
on t1.post = t2.post
where t1.salary = t2.ms
;
select t1.* from emp as t1
inner join
(select post,max(salary) as ms from emp group by post) as t2
on t1.salary = t2.ms
;
子查询(一个问题一个问题解决)
把一个查询语句用括号括起来,当做另外一条查询语句的条件去用,称为子查询
select name from emp where dep_id = (select id from dep where name="技术"); #子查询
select emp.name from emp inner join dep on emp.dep_id = dep.id where dep.name="技术"; #链表
#查询平均年龄在25岁以上的部门名
select name from dep where id in (select dep_id from emp group by dep_id having avg(age) > 25); #子查询
select dep.name from emp inner join dep on emp.dep_id = dep.id group by dep.name having avg(age) > 25; #链表
#查看不足2人的部门名(子查询得到的是有人的部门id)
select * from emp where exists (select id from dep where id > 3); #exists用法,当()返回True时,外层查询语句将进行查询;当返回值为False时,外层查询语句不进行查询(empty set)
#查询每个部门最新入职的那位员工
select t1.id,t1.name,t1.post,t1.hire_date,t2.post,t2.max_date from emp as t1 inner join (select post,max(hire_date) as max_date from emp group by post) as t2 on t1.post = t2.post where t1.hire_date = t2.max_date;