Skip to content
清晨的一缕阳光
返回

MySQL SQL 编写最佳实践

核心概念

SQL 编写质量直接影响数据库性能。优秀的 SQL 应该:

SELECT 语句优化

1. 避免 SELECT *

-- 错误示例
SELECT * FROM users WHERE id = 1;

-- 正确示例
SELECT id, name, email FROM users WHERE id = 1;

原因:

2. 使用覆盖索引

-- 创建覆盖索引
CREATE INDEX idx_email_name ON users(email, name);

-- 查询只使用索引列
SELECT email, name FROM users WHERE email = 'test@example.com';
-- Extra: Using index(覆盖索引,不回表)

3. 小表驱动大表

-- 错误:大表驱动小表
SELECT * FROM orders o 
INNER JOIN users u ON o.user_id = u.id 
WHERE o.status = 1;  -- orders 表 1000 万行,users 表 10 万行

-- 正确:小表驱动大表
SELECT * FROM users u 
INNER JOIN orders o ON u.id = o.user_id 
WHERE o.status = 1;

4. EXISTS 优于 IN

-- IN:先执行子查询,可能产生大结果集
SELECT * FROM users 
WHERE id IN (SELECT user_id FROM orders WHERE amount > 100);

-- EXISTS:先执行外层查询,效率更高
SELECT * FROM users u 
WHERE EXISTS (SELECT 1 FROM orders o WHERE o.user_id = u.id AND o.amount > 100);

-- 更优:使用 JOIN
SELECT DISTINCT u.* FROM users u 
INNER JOIN orders o ON u.id = o.user_id 
WHERE o.amount > 100;

WHERE 条件优化

1. 避免在索引列上使用函数

-- 错误:索引失效,全表扫描
SELECT * FROM orders WHERE DATE(create_time) = '2024-01-01';
SELECT * FROM users WHERE YEAR(birth_date) = 1990;
SELECT * FROM products WHERE UPPER(name) = 'IPHONE';

-- 正确:使用范围查询
SELECT * FROM orders 
WHERE create_time >= '2024-01-01' AND create_time < '2024-01-02';

SELECT * FROM users 
WHERE birth_date >= '1990-01-01' AND birth_date < '1991-01-01';

SELECT * FROM products 
WHERE BINARY name = 'IPHONE';  -- 或应用层转换

2. 避免隐式类型转换

-- phone 是 VARCHAR 类型
-- 错误:数字不加引号,索引失效
SELECT * FROM users WHERE phone = 13800138000;

-- 正确:加引号
SELECT * FROM users WHERE phone = '13800138000';

-- status 是 INT 类型
-- 错误:字符串加引号,可能类型转换
SELECT * FROM orders WHERE status = '1';

-- 正确:数字不加引号
SELECT * FROM orders WHERE status = 1;

3. 避免 != 或 <> 操作符

-- 错误:索引失效
SELECT * FROM users WHERE status != 1;
SELECT * FROM users WHERE status <> 1;

-- 正确:使用 IN 或 OR
SELECT * FROM users WHERE status IN (0, 2, 3);
SELECT * FROM users WHERE status = 0 OR status = 2 OR status = 3;

4. IS NULL / IS NOT NULL 优化

-- IS NULL 可能使用索引(取决于数据分布)
SELECT * FROM users WHERE deleted_at IS NULL;

-- IS NOT NULL 通常不使用索引
SELECT * FROM users WHERE deleted_at IS NOT NULL;

-- 优化:使用默认值代替 NULL
ALTER TABLE users MODIFY deleted_at TINYINT DEFAULT 0;
SELECT * FROM users WHERE deleted_at = 0;

5. LIKE 查询优化

-- 错误:前缀通配符,索引失效
SELECT * FROM users WHERE name LIKE '%张%';

-- 正确:前缀匹配,使用索引
SELECT * FROM users WHERE name LIKE '张%';

-- 优化:使用全文索引
CREATE FULLTEXT INDEX idx_name_fulltext ON users(name);
SELECT * FROM users WHERE MATCH(name) AGAINST('张三');

-- 优化:使用搜索引擎(Elasticsearch)

JOIN 优化

1. 小表驱动大表原则

-- 原则:LEFT JOIN 时,左表应该是小表
-- 错误
SELECT * FROM large_table l 
LEFT JOIN small_table s ON l.id = s.large_id;

-- 正确
SELECT * FROM small_table s 
LEFT JOIN large_table l ON s.large_id = l.id;

2. ON 条件优化

-- 错误:过滤条件放在 WHERE
SELECT * FROM orders o 
LEFT JOIN users u ON o.user_id = u.id 
WHERE u.status = 1;  -- LEFT JOIN 变成 INNER JOIN

-- 正确:过滤条件放在 ON
SELECT * FROM orders o 
LEFT JOIN users u ON o.user_id = u.id AND u.status = 1;

3. 避免多表 JOIN

-- 错误:JOIN 表过多,性能差
SELECT o.*, u.name, p.title, c.name AS category_name, s.status_name
FROM orders o
JOIN users u ON o.user_id = u.id
JOIN products p ON o.product_id = p.id
JOIN categories c ON p.category_id = c.id
JOIN order_status s ON o.status = s.id
WHERE o.create_time > '2024-01-01';

-- 正确:拆分查询,应用层组装
-- 查询 1:获取订单
SELECT * FROM orders WHERE create_time > '2024-01-01';

-- 查询 2:批量获取用户信息
SELECT * FROM users WHERE id IN (...);

-- 查询 3:批量获取商品信息
SELECT * FROM products WHERE id IN (...);

-- 应用层组装数据

4. JOIN 字段类型一致

-- 错误:类型不一致,索引失效
-- orders.user_id 是 INT, users.id 是 BIGINT
SELECT * FROM orders o 
JOIN users u ON o.user_id = u.id;

-- 正确:确保类型一致
ALTER TABLE orders MODIFY user_id BIGINT;

ORDER BY 优化

1. 利用索引排序

-- 创建索引
CREATE INDEX idx_create_time ON orders(create_time);

-- 使用索引排序
SELECT * FROM orders ORDER BY create_time DESC;
-- Extra: Using index

-- 错误:排序字段无索引
SELECT * FROM orders ORDER BY remark;
-- Extra: Using filesort

2. 避免多字段排序

-- 创建复合索引
CREATE INDEX idx_status_time ON orders(status, create_time);

-- 使用索引排序
SELECT * FROM orders WHERE status = 1 ORDER BY create_time DESC;

-- 错误:排序字段顺序与索引不一致
SELECT * FROM orders ORDER BY create_time, status;
-- 无法使用索引排序

3. LIMIT 优化

-- 错误:深度分页
SELECT * FROM orders ORDER BY create_time DESC LIMIT 1000000, 10;

-- 优化 1:延迟关联
SELECT o.* FROM orders o
INNER JOIN (
    SELECT id FROM orders ORDER BY create_time DESC LIMIT 1000000, 10
) tmp ON o.id = tmp.id;

-- 优化 2:游标分页(推荐)
SELECT * FROM orders 
WHERE create_time < :last_seen_time 
ORDER BY create_time DESC 
LIMIT 10;

GROUP BY 优化

1. 利用索引分组

-- 创建索引
CREATE INDEX idx_category ON products(category_id);

-- 使用索引分组
SELECT category_id, COUNT(*) FROM products GROUP BY category_id;

2. 避免复杂分组

-- 错误:表达式分组,索引失效
SELECT DATE(create_time), COUNT(*) FROM orders GROUP BY DATE(create_time);

-- 正确:添加计算列
ALTER TABLE orders ADD COLUMN create_date DATE;
UPDATE orders SET create_date = DATE(create_time);
CREATE INDEX idx_create_date ON orders(create_date);

SELECT create_date, COUNT(*) FROM orders GROUP BY create_date;

3. WITH ROLLUP 优化

-- 使用 WITH ROLLUP 获取小计
SELECT category_id, status, COUNT(*) 
FROM products 
GROUP BY category_id, status WITH ROLLUP;

INSERT 优化

1. 批量插入

-- 错误:单条插入
INSERT INTO users (name, email) VALUES ('Alice', 'alice@example.com');
INSERT INTO users (name, email) VALUES ('Bob', 'bob@example.com');
INSERT INTO users (name, email) VALUES ('Charlie', 'charlie@example.com');

-- 正确:批量插入
INSERT INTO users (name, email) VALUES 
('Alice', 'alice@example.com'),
('Bob', 'bob@example.com'),
('Charlie', 'charlie@example.com');

2. 关闭自动提交

-- 错误:每条 INSERT 自动提交
SET autocommit = 1;
INSERT INTO users ...;  -- 1000 次
INSERT INTO users ...;
...

-- 正确:手动控制事务
SET autocommit = 0;
START TRANSACTION;
INSERT INTO users ...;  -- 1000 次
INSERT INTO users ...;
...
COMMIT;

3. 调整参数

-- 调整批量插入大小
SET GLOBAL bulk_insert_buffer_size = 64M;

-- 调整日志刷新策略
SET GLOBAL innodb_flush_log_at_trx_commit = 2;
SET GLOBAL sync_binlog = 100;

4. LOAD DATA 导入

-- 最快的数据导入方式
LOAD DATA LOCAL INFILE '/path/to/data.csv'
INTO TABLE users
FIELDS TERMINATED BY ','
LINES TERMINATED BY '\n'
(name, email, phone);

UPDATE 优化

1. 避免全表更新

-- 错误:无条件更新
UPDATE users SET status = 1;

-- 正确:添加 WHERE 条件
UPDATE users SET status = 1 WHERE id > 1000;

2. 批量更新使用 CASE

-- 错误:多条 UPDATE
UPDATE users SET status = 1 WHERE id = 1;
UPDATE users SET status = 2 WHERE id = 2;
UPDATE users SET status = 3 WHERE id = 3;

-- 正确:单条 CASE 语句
UPDATE users SET status = CASE id
    WHEN 1 THEN 1
    WHEN 2 THEN 2
    WHEN 3 THEN 3
END
WHERE id IN (1, 2, 3);

3. 使用 LIMIT 限制影响行数

-- 限制更新行数
UPDATE orders SET status = 1 WHERE status = 0 LIMIT 1000;

DELETE 优化

1. 避免大事务删除

-- 错误:一次性删除大量数据
DELETE FROM logs WHERE create_time < '2024-01-01';  -- 1000 万行

-- 正确:分批删除
DELETE FROM logs WHERE create_time < '2024-01-01' LIMIT 1000;
-- 循环执行直到影响行数为 0

2. 使用 TRUNCATE 代替 DELETE

-- 删除全表数据
DELETE FROM temp_table;  -- 慢,记录 undo log

-- 更快:TRUNCATE
TRUNCATE TABLE temp_table;  -- 快,不记录 undo log

3. 软删除代替物理删除

-- 物理删除
DELETE FROM users WHERE id = 1;

-- 软删除(推荐)
UPDATE users SET deleted_at = NOW() WHERE id = 1;

-- 查询时过滤
SELECT * FROM users WHERE deleted_at IS NULL;

子查询优化

1. 子查询转 JOIN

-- 子查询(可能效率低)
SELECT * FROM users 
WHERE id IN (SELECT user_id FROM orders WHERE amount > 100);

-- JOIN(通常更快)
SELECT DISTINCT u.* FROM users u
INNER JOIN orders o ON u.id = o.user_id
WHERE o.amount > 100;

2. 避免相关子查询

-- 错误:相关子查询,每行执行一次
SELECT u.*, 
    (SELECT COUNT(*) FROM orders WHERE user_id = u.id) AS order_count
FROM users u;

-- 正确:JOIN 聚合
SELECT u.*, COUNT(o.id) AS order_count
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
GROUP BY u.id;

3. EXISTS 代替 IN

-- IN(先执行子查询)
SELECT * FROM users 
WHERE id IN (SELECT user_id FROM vip_users);

-- EXISTS(先执行外层)
SELECT * FROM users u 
WHERE EXISTS (SELECT 1 FROM vip_users v WHERE v.id = u.id);

常见陷阱

陷阱 1:OR 导致索引失效

-- 错误:OR 连接不同字段
SELECT * FROM users WHERE name = '张三' OR email = 'zhang@example.com';

-- 正确:UNION ALL
SELECT * FROM users WHERE name = '张三'
UNION ALL
SELECT * FROM users WHERE email = 'zhang@example.com';

陷阱 2:UNION 与 UNION ALL

-- UNION:去重,消耗资源
SELECT id FROM table1
UNION
SELECT id FROM table2;

-- UNION ALL:不去重,更快
SELECT id FROM table1
UNION ALL
SELECT id FROM table2;

陷阱 3:COUNT 性能

-- COUNT(*):最优(MySQL 优化)
SELECT COUNT(*) FROM users;

-- COUNT(1):与 COUNT(*) 相同
SELECT COUNT(1) FROM users;

-- COUNT(column):排除 NULL,稍慢
SELECT COUNT(email) FROM users;

-- COUNT(DISTINCT column):最慢
SELECT COUNT(DISTINCT email) FROM users;

陷阱 4:GROUP BY 默认行为

-- 错误:依赖 MySQL 默认行为(可能返回不确定数据)
SELECT id, name, email FROM users GROUP BY name;

-- 正确:使用聚合函数
SELECT ANY_VALUE(id), name, ANY_VALUE(email) FROM users GROUP BY name;

-- 或开启 ONLY_FULL_GROUP_BY 模式
SET sql_mode = 'ONLY_FULL_GROUP_BY';

陷阱 5:字符串拼接

-- 错误:使用 CONCAT 拼接大量字符串
SELECT CONCAT(name, ':', email, ':', phone) AS info FROM users;

-- 正确:应用层拼接
SELECT name, email, phone FROM users;
-- 应用层:`${name}:${email}:${phone}`

最佳实践总结

编写规范

  1. SELECT

    • 明确指定列名,避免 SELECT *
    • 使用覆盖索引
    • 小表驱动大表
  2. WHERE

    • 索引列不使用函数
    • 避免隐式类型转换
    • 使用 IN 代替多个 OR
  3. JOIN

    • 限制 JOIN 表数量(≤ 3 个)
    • 确保 JOIN 字段类型一致
    • 过滤条件放在正确的 ON/WHERE 中
  4. ORDER BY / GROUP BY

    • 利用索引排序和分组
    • 避免深度分页
    • 使用游标分页
  5. INSERT / UPDATE / DELETE

    • 批量操作
    • 分批处理大数据量
    • 使用事务控制

性能检查清单

-- 1. 使用 EXPLAIN 分析
EXPLAIN SELECT ...;

-- 2. 检查索引使用
-- type: 至少是 range,最好是 ref 或 const
-- Extra: 避免 Using temporary, Using filesort

-- 3. 检查慢查询
SELECT * FROM mysql.slow_log;

-- 4. 监控执行时间
SHOW PROFILING;

参考资料


分享这篇文章到:

上一篇文章
JVM 调优实战指南
下一篇文章
Go Map 底层实现原理