核心概念
SQL 编写质量直接影响数据库性能。优秀的 SQL 应该:
- 正确性:结果准确,符合业务逻辑
- 性能:执行快速,资源消耗少
- 可读性:结构清晰,易于理解和维护
- 安全性:防止 SQL 注入,数据访问安全
SELECT 语句优化
1. 避免 SELECT *
-- 错误示例
SELECT * FROM users WHERE id = 1;
-- 正确示例
SELECT id, name, email FROM users WHERE id = 1;
原因:
- 增加网络传输开销
- 无法使用覆盖索引
- 表结构变更可能影响应用
- 读取不必要的数据
2. 使用覆盖索引
-- 创建覆盖索引
CREATE INDEX idx_email_name ON users(email, name);
-- 查询只使用索引列
SELECT email, name FROM users WHERE email = 'test@example.com';
-- Extra: Using index(覆盖索引,不回表)
3. 小表驱动大表
-- 错误:大表驱动小表
SELECT * FROM orders o
INNER JOIN users u ON o.user_id = u.id
WHERE o.status = 1; -- orders 表 1000 万行,users 表 10 万行
-- 正确:小表驱动大表
SELECT * FROM users u
INNER JOIN orders o ON u.id = o.user_id
WHERE o.status = 1;
4. EXISTS 优于 IN
-- IN:先执行子查询,可能产生大结果集
SELECT * FROM users
WHERE id IN (SELECT user_id FROM orders WHERE amount > 100);
-- EXISTS:先执行外层查询,效率更高
SELECT * FROM users u
WHERE EXISTS (SELECT 1 FROM orders o WHERE o.user_id = u.id AND o.amount > 100);
-- 更优:使用 JOIN
SELECT DISTINCT u.* FROM users u
INNER JOIN orders o ON u.id = o.user_id
WHERE o.amount > 100;
WHERE 条件优化
1. 避免在索引列上使用函数
-- 错误:索引失效,全表扫描
SELECT * FROM orders WHERE DATE(create_time) = '2024-01-01';
SELECT * FROM users WHERE YEAR(birth_date) = 1990;
SELECT * FROM products WHERE UPPER(name) = 'IPHONE';
-- 正确:使用范围查询
SELECT * FROM orders
WHERE create_time >= '2024-01-01' AND create_time < '2024-01-02';
SELECT * FROM users
WHERE birth_date >= '1990-01-01' AND birth_date < '1991-01-01';
SELECT * FROM products
WHERE BINARY name = 'IPHONE'; -- 或应用层转换
2. 避免隐式类型转换
-- phone 是 VARCHAR 类型
-- 错误:数字不加引号,索引失效
SELECT * FROM users WHERE phone = 13800138000;
-- 正确:加引号
SELECT * FROM users WHERE phone = '13800138000';
-- status 是 INT 类型
-- 错误:字符串加引号,可能类型转换
SELECT * FROM orders WHERE status = '1';
-- 正确:数字不加引号
SELECT * FROM orders WHERE status = 1;
3. 避免 != 或 <> 操作符
-- 错误:索引失效
SELECT * FROM users WHERE status != 1;
SELECT * FROM users WHERE status <> 1;
-- 正确:使用 IN 或 OR
SELECT * FROM users WHERE status IN (0, 2, 3);
SELECT * FROM users WHERE status = 0 OR status = 2 OR status = 3;
4. IS NULL / IS NOT NULL 优化
-- IS NULL 可能使用索引(取决于数据分布)
SELECT * FROM users WHERE deleted_at IS NULL;
-- IS NOT NULL 通常不使用索引
SELECT * FROM users WHERE deleted_at IS NOT NULL;
-- 优化:使用默认值代替 NULL
ALTER TABLE users MODIFY deleted_at TINYINT DEFAULT 0;
SELECT * FROM users WHERE deleted_at = 0;
5. LIKE 查询优化
-- 错误:前缀通配符,索引失效
SELECT * FROM users WHERE name LIKE '%张%';
-- 正确:前缀匹配,使用索引
SELECT * FROM users WHERE name LIKE '张%';
-- 优化:使用全文索引
CREATE FULLTEXT INDEX idx_name_fulltext ON users(name);
SELECT * FROM users WHERE MATCH(name) AGAINST('张三');
-- 优化:使用搜索引擎(Elasticsearch)
JOIN 优化
1. 小表驱动大表原则
-- 原则:LEFT JOIN 时,左表应该是小表
-- 错误
SELECT * FROM large_table l
LEFT JOIN small_table s ON l.id = s.large_id;
-- 正确
SELECT * FROM small_table s
LEFT JOIN large_table l ON s.large_id = l.id;
2. ON 条件优化
-- 错误:过滤条件放在 WHERE
SELECT * FROM orders o
LEFT JOIN users u ON o.user_id = u.id
WHERE u.status = 1; -- LEFT JOIN 变成 INNER JOIN
-- 正确:过滤条件放在 ON
SELECT * FROM orders o
LEFT JOIN users u ON o.user_id = u.id AND u.status = 1;
3. 避免多表 JOIN
-- 错误:JOIN 表过多,性能差
SELECT o.*, u.name, p.title, c.name AS category_name, s.status_name
FROM orders o
JOIN users u ON o.user_id = u.id
JOIN products p ON o.product_id = p.id
JOIN categories c ON p.category_id = c.id
JOIN order_status s ON o.status = s.id
WHERE o.create_time > '2024-01-01';
-- 正确:拆分查询,应用层组装
-- 查询 1:获取订单
SELECT * FROM orders WHERE create_time > '2024-01-01';
-- 查询 2:批量获取用户信息
SELECT * FROM users WHERE id IN (...);
-- 查询 3:批量获取商品信息
SELECT * FROM products WHERE id IN (...);
-- 应用层组装数据
4. JOIN 字段类型一致
-- 错误:类型不一致,索引失效
-- orders.user_id 是 INT, users.id 是 BIGINT
SELECT * FROM orders o
JOIN users u ON o.user_id = u.id;
-- 正确:确保类型一致
ALTER TABLE orders MODIFY user_id BIGINT;
ORDER BY 优化
1. 利用索引排序
-- 创建索引
CREATE INDEX idx_create_time ON orders(create_time);
-- 使用索引排序
SELECT * FROM orders ORDER BY create_time DESC;
-- Extra: Using index
-- 错误:排序字段无索引
SELECT * FROM orders ORDER BY remark;
-- Extra: Using filesort
2. 避免多字段排序
-- 创建复合索引
CREATE INDEX idx_status_time ON orders(status, create_time);
-- 使用索引排序
SELECT * FROM orders WHERE status = 1 ORDER BY create_time DESC;
-- 错误:排序字段顺序与索引不一致
SELECT * FROM orders ORDER BY create_time, status;
-- 无法使用索引排序
3. LIMIT 优化
-- 错误:深度分页
SELECT * FROM orders ORDER BY create_time DESC LIMIT 1000000, 10;
-- 优化 1:延迟关联
SELECT o.* FROM orders o
INNER JOIN (
SELECT id FROM orders ORDER BY create_time DESC LIMIT 1000000, 10
) tmp ON o.id = tmp.id;
-- 优化 2:游标分页(推荐)
SELECT * FROM orders
WHERE create_time < :last_seen_time
ORDER BY create_time DESC
LIMIT 10;
GROUP BY 优化
1. 利用索引分组
-- 创建索引
CREATE INDEX idx_category ON products(category_id);
-- 使用索引分组
SELECT category_id, COUNT(*) FROM products GROUP BY category_id;
2. 避免复杂分组
-- 错误:表达式分组,索引失效
SELECT DATE(create_time), COUNT(*) FROM orders GROUP BY DATE(create_time);
-- 正确:添加计算列
ALTER TABLE orders ADD COLUMN create_date DATE;
UPDATE orders SET create_date = DATE(create_time);
CREATE INDEX idx_create_date ON orders(create_date);
SELECT create_date, COUNT(*) FROM orders GROUP BY create_date;
3. WITH ROLLUP 优化
-- 使用 WITH ROLLUP 获取小计
SELECT category_id, status, COUNT(*)
FROM products
GROUP BY category_id, status WITH ROLLUP;
INSERT 优化
1. 批量插入
-- 错误:单条插入
INSERT INTO users (name, email) VALUES ('Alice', 'alice@example.com');
INSERT INTO users (name, email) VALUES ('Bob', 'bob@example.com');
INSERT INTO users (name, email) VALUES ('Charlie', 'charlie@example.com');
-- 正确:批量插入
INSERT INTO users (name, email) VALUES
('Alice', 'alice@example.com'),
('Bob', 'bob@example.com'),
('Charlie', 'charlie@example.com');
2. 关闭自动提交
-- 错误:每条 INSERT 自动提交
SET autocommit = 1;
INSERT INTO users ...; -- 1000 次
INSERT INTO users ...;
...
-- 正确:手动控制事务
SET autocommit = 0;
START TRANSACTION;
INSERT INTO users ...; -- 1000 次
INSERT INTO users ...;
...
COMMIT;
3. 调整参数
-- 调整批量插入大小
SET GLOBAL bulk_insert_buffer_size = 64M;
-- 调整日志刷新策略
SET GLOBAL innodb_flush_log_at_trx_commit = 2;
SET GLOBAL sync_binlog = 100;
4. LOAD DATA 导入
-- 最快的数据导入方式
LOAD DATA LOCAL INFILE '/path/to/data.csv'
INTO TABLE users
FIELDS TERMINATED BY ','
LINES TERMINATED BY '\n'
(name, email, phone);
UPDATE 优化
1. 避免全表更新
-- 错误:无条件更新
UPDATE users SET status = 1;
-- 正确:添加 WHERE 条件
UPDATE users SET status = 1 WHERE id > 1000;
2. 批量更新使用 CASE
-- 错误:多条 UPDATE
UPDATE users SET status = 1 WHERE id = 1;
UPDATE users SET status = 2 WHERE id = 2;
UPDATE users SET status = 3 WHERE id = 3;
-- 正确:单条 CASE 语句
UPDATE users SET status = CASE id
WHEN 1 THEN 1
WHEN 2 THEN 2
WHEN 3 THEN 3
END
WHERE id IN (1, 2, 3);
3. 使用 LIMIT 限制影响行数
-- 限制更新行数
UPDATE orders SET status = 1 WHERE status = 0 LIMIT 1000;
DELETE 优化
1. 避免大事务删除
-- 错误:一次性删除大量数据
DELETE FROM logs WHERE create_time < '2024-01-01'; -- 1000 万行
-- 正确:分批删除
DELETE FROM logs WHERE create_time < '2024-01-01' LIMIT 1000;
-- 循环执行直到影响行数为 0
2. 使用 TRUNCATE 代替 DELETE
-- 删除全表数据
DELETE FROM temp_table; -- 慢,记录 undo log
-- 更快:TRUNCATE
TRUNCATE TABLE temp_table; -- 快,不记录 undo log
3. 软删除代替物理删除
-- 物理删除
DELETE FROM users WHERE id = 1;
-- 软删除(推荐)
UPDATE users SET deleted_at = NOW() WHERE id = 1;
-- 查询时过滤
SELECT * FROM users WHERE deleted_at IS NULL;
子查询优化
1. 子查询转 JOIN
-- 子查询(可能效率低)
SELECT * FROM users
WHERE id IN (SELECT user_id FROM orders WHERE amount > 100);
-- JOIN(通常更快)
SELECT DISTINCT u.* FROM users u
INNER JOIN orders o ON u.id = o.user_id
WHERE o.amount > 100;
2. 避免相关子查询
-- 错误:相关子查询,每行执行一次
SELECT u.*,
(SELECT COUNT(*) FROM orders WHERE user_id = u.id) AS order_count
FROM users u;
-- 正确:JOIN 聚合
SELECT u.*, COUNT(o.id) AS order_count
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
GROUP BY u.id;
3. EXISTS 代替 IN
-- IN(先执行子查询)
SELECT * FROM users
WHERE id IN (SELECT user_id FROM vip_users);
-- EXISTS(先执行外层)
SELECT * FROM users u
WHERE EXISTS (SELECT 1 FROM vip_users v WHERE v.id = u.id);
常见陷阱
陷阱 1:OR 导致索引失效
-- 错误:OR 连接不同字段
SELECT * FROM users WHERE name = '张三' OR email = 'zhang@example.com';
-- 正确:UNION ALL
SELECT * FROM users WHERE name = '张三'
UNION ALL
SELECT * FROM users WHERE email = 'zhang@example.com';
陷阱 2:UNION 与 UNION ALL
-- UNION:去重,消耗资源
SELECT id FROM table1
UNION
SELECT id FROM table2;
-- UNION ALL:不去重,更快
SELECT id FROM table1
UNION ALL
SELECT id FROM table2;
陷阱 3:COUNT 性能
-- COUNT(*):最优(MySQL 优化)
SELECT COUNT(*) FROM users;
-- COUNT(1):与 COUNT(*) 相同
SELECT COUNT(1) FROM users;
-- COUNT(column):排除 NULL,稍慢
SELECT COUNT(email) FROM users;
-- COUNT(DISTINCT column):最慢
SELECT COUNT(DISTINCT email) FROM users;
陷阱 4:GROUP BY 默认行为
-- 错误:依赖 MySQL 默认行为(可能返回不确定数据)
SELECT id, name, email FROM users GROUP BY name;
-- 正确:使用聚合函数
SELECT ANY_VALUE(id), name, ANY_VALUE(email) FROM users GROUP BY name;
-- 或开启 ONLY_FULL_GROUP_BY 模式
SET sql_mode = 'ONLY_FULL_GROUP_BY';
陷阱 5:字符串拼接
-- 错误:使用 CONCAT 拼接大量字符串
SELECT CONCAT(name, ':', email, ':', phone) AS info FROM users;
-- 正确:应用层拼接
SELECT name, email, phone FROM users;
-- 应用层:`${name}:${email}:${phone}`
最佳实践总结
编写规范
-
SELECT
- 明确指定列名,避免 SELECT *
- 使用覆盖索引
- 小表驱动大表
-
WHERE
- 索引列不使用函数
- 避免隐式类型转换
- 使用 IN 代替多个 OR
-
JOIN
- 限制 JOIN 表数量(≤ 3 个)
- 确保 JOIN 字段类型一致
- 过滤条件放在正确的 ON/WHERE 中
-
ORDER BY / GROUP BY
- 利用索引排序和分组
- 避免深度分页
- 使用游标分页
-
INSERT / UPDATE / DELETE
- 批量操作
- 分批处理大数据量
- 使用事务控制
性能检查清单
-- 1. 使用 EXPLAIN 分析
EXPLAIN SELECT ...;
-- 2. 检查索引使用
-- type: 至少是 range,最好是 ref 或 const
-- Extra: 避免 Using temporary, Using filesort
-- 3. 检查慢查询
SELECT * FROM mysql.slow_log;
-- 4. 监控执行时间
SHOW PROFILING;
参考资料
- MySQL 官方文档 - SQL 语法
- 《高性能 MySQL》第 4 章:Schema 与数据类型优化
- 《SQL 调优艺术》