一、去重,去除多个字段相同的记录,保留一个
场景一:去除表中所有重复数据,均只保留一个
delete from tbl_dept where dept_id not in ( select id from ( select min(dept_id) as id from tbl_dept group by dept_name,rm_flag ) as temp )
场景二:只去除表中市声部的重复数据,且只保留一个
delete from tbl_dept where dept_id not in ( select id from ( select min(dept_id) as id from tbl_dept group by dept_name,rm_flag ) as temp ) and dept_name like '%市声部%'
二、查询 名称为'%user%'的表 和 有'user'字段的表
名称中有'%user%'的表
select table_name from information_schema.tables where table_schema='neimenggu_new2' and table_name like '%user%'
有'user'字段的表
select TABLE_SCHEMA,TABLE_NAME,COLUMN_NAME from information_schema.columns where column_name like 'user'
三、存在一个表而不在另一个表中的数据
A、B两表,找出ID字段中,存在A表,但是不存在B表的数据。A表总共13w数据,去重后大约3W条数据,B表有2W条数据,且B表的ID字段有索引。
方法一
使用 not in ,容易理解,效率低~执行时间为:1.395秒~
select distinct A.ID from A where A.ID not in (select ID from B)
方法二
使用 left join...on... , "B.ID isnull" 表示左连接之后在B.ID字段为 null的记录~执行时间:0.739秒~
select A.ID from A left join B on A.ID=B.ID where B.ID is null
方法三
逻辑相对复杂,但是速度最快~执行时间: 0.570秒~
select * from B where (select count(1) as num from A where A.ID = B.ID) = 0
四、行转列、合并字段、列转行
-- 行转列(最大值)
select user_name,
MAX(CASE WHEN course='数学' THEN score ELSE 0 END) 数学,
MAX(CASE WHEN course='语文' THEN score ELSE 0 END) 语文,
MAX(CASE WHEN course='英语' THEN score ELSE 0 END) 英语
from TEST_TB_GRADE GROUP BY user_name
-- 行转列(合计)
select user_name,
SUM(CASE WHEN course='数学' THEN score ELSE 0 END) 数学,
SUM(CASE WHEN course='语文' THEN score ELSE 0 END) 语文,
SUM(CASE WHEN course='英语' THEN score ELSE 0 END) 英语
from TEST_TB_GRADE GROUP BY user_name
-- 合并字段
select user_name,GROUP_CONCAT(course,":",score) from TEST_TB_GRADE GROUP BY user_name
-- 列转行
select user_name, '语文' COURSE , CN_SCORE as SCORE from test_tb_grade2
union select user_name, '数学' COURSE, MATH_SCORE as SCORE from test_tb_grade2
union select user_name, '英语' COURSE, EN_SCORE as SCORE from test_tb_grade2
order by user_name,COURSE;