1. 引言:慢查询的代价与影响
在当今数据驱动的应用中,慢查询是系统性能的"隐形杀手"。对于Java开发者而言,理解慢查询的监控与优化至关重要:
- 用户体验:页面加载时间从毫秒级到秒级的恶化
- 系统吞吐:单个慢查询可能阻塞整个数据库连接池
- 资源消耗:CPU、内存、IO的异常占用
- 业务风险:超时导致的交易失败、数据不一致
// 真实场景:慢查询导致的级联故障 @Service @Transactional public class OrderService { public void processBatchOrders(List<Order> orders) { for (Order order : orders) { // 看似简单的查询,在数据量增长后变慢 User user = userRepository.findByComplexConditions(order.getCriteria()); if (user != null) { order.setUserId(user.getId()); orderRepository.save(order); } } // 事务长时间不提交,导致连接池耗尽 } }
2. 慢查询基础概念
2.1 什么是慢查询?
定义:执行时间超过指定阈值的SQL语句。MySQL默认阈值为10秒,但生产环境通常设置为1-2秒。
-- 查看当前慢查询配置 SHOW VARIABLES LIKE 'long_query_time'; -- 通常设置为1-2秒 SET GLOBAL long_query_time = 1.0;
2.2 慢查询的根源分析
3. 慢查询监控体系搭建
3.1 慢查询日志配置
# /etc/my.cnf 慢查询配置 [mysqld] # 开启慢查询日志 slow_query_log = 1 slow_query_log_file = /var/log/mysql/slow.log long_query_time = 1.0 log_queries_not_using_indexes = 1 log_throttle_queries_not_using_indexes = 60 min_examined_row_limit = 100 log_slow_admin_statements = 1 log_slow_slave_statements = 1
3.2 实时状态监控
-- 查看当前慢查询状态 SHOW STATUS LIKE 'Slow_queries'; -- 监控数据库性能指标 SHOW STATUS WHERE Variable_name IN ( 'Threads_connected', 'Threads_running', 'Innodb_rows_read', 'Innodb_rows_updated', 'Select_scan', 'Sort_merge_passes' ); -- 查看正在执行的慢查询 SHOW PROCESSLIST;
3.3 Java应用层监控集成
@Component public class QueryPerformanceMonitor { private static final Logger logger = LoggerFactory.getLogger(QueryPerformanceMonitor.class); @Autowired private DataSource dataSource; private ThreadLocal<Long> queryStartTime = new ThreadLocal<>(); public void startMonitoring() { queryStartTime.set(System.currentTimeMillis()); } public void endMonitoring(String sql, Object[] params) { Long startTime = queryStartTime.get(); if (startTime != null) { long duration = System.currentTimeMillis() - startTime; if (duration > 1000) { // 1秒阈值 logger.warn("慢查询检测 - 执行时间: {}ms, SQL: {}, 参数: {}", duration, sql, Arrays.toString(params)); // 记录到专门的慢查询表 logSlowQueryToDatabase(sql, params, duration); } queryStartTime.remove(); } } @Aspect @Component public static class QueryMonitoringAspect { @Autowired private QueryPerformanceMonitor monitor; @Around("execution(* org.springframework.jdbc.core.JdbcTemplate.query(..))") public Object monitorQuery(ProceedingJoinPoint joinPoint) throws Throwable { monitor.startMonitoring(); try { return joinPoint.proceed(); } finally { String sql = (String) joinPoint.getArgs()[0]; Object[] params = (Object[]) joinPoint.getArgs()[1]; monitor.endMonitoring(sql, params); } } } }
4. 慢查询日志分析实战
4.1 慢查询日志格式解析
# Time: 2023-10-01T10:00:00.123456Z # User@Host: app_user[app_user] @ [192.168.1.100] Id: 12345 # Query_time: 2.345678 Lock_time: 0.000123 Rows_sent: 10 Rows_examined: 100000 SET timestamp=1696156800; SELECT * FROM orders o JOIN users u ON o.user_id = u.id WHERE u.email LIKE '%@example.com' AND o.create_time > '2023-01-01' ORDER BY o.amount DESC LIMIT 10;
关键字段解读:
- Query_time: 查询执行总时间
- Lock_time: 锁等待时间
- Rows_sent: 返回给客户端的行数
- Rows_examined: 扫描的行数(关键性能指标)
4.2 使用mysqldumpslow分析
# 安装MySQL客户端工具 sudo apt-get install mysql-client # 分析慢查询日志 mysqldumpslow -s t /var/log/mysql/slow.log # 按执行时间排序,显示前10条 mysqldumpslow -s t -t 10 /var/log/mysql/slow.log # 分析特定模式的查询 mysqldumpslow -g "ORDER BY" /var/log/mysql/slow.log # 输出示例: # Count: 25 Time=2.34s (58s) Lock=0.00s (0s) Rows=10.0 (250), app_user[app_user]@[192.168.1.100] # SELECT * FROM orders ...
4.3 使用pt-query-digest高级分析
bash
# 安装Percona Toolkit wget https://repo.percona.com/apt/percona-release_latest.$(lsb_release -sc)_all.deb sudo dpkg -i percona-release_latest.$(lsb_release -sc)_all.deb sudo apt-get update sudo apt-get install percona-toolkit # 分析慢查询日志 pt-query-digest /var/log/mysql/slow.log # 输出详细报告到文件 pt-query-digest /var/log/mysql/slow.log --output slowlog_analysis.html # 监控实时慢查询 pt-query-digest --processlist h=localhost --interval 0.1 --print --no-continue
5. 慢查询优化核心技术
5.1 索引优化策略
5.1.1 索引诊断与创建
-- 查看表索引情况 SHOW INDEX FROM orders; -- 分析索引使用情况 EXPLAIN SELECT * FROM orders WHERE user_id = 1001 AND status = 'COMPLETED'; -- 创建复合索引 ALTER TABLE orders ADD INDEX idx_user_status (user_id, status); -- 创建覆盖索引 ALTER TABLE orders ADD INDEX idx_covering (user_id, status, amount, create_time);
5.1.2 索引失效场景分析
public class IndexFailureScenarios { /** * 常见的索引失效场景 */ public void commonIndexFailures() { // 1. 隐式类型转换 // user_id是VARCHAR,但传入数字 → 索引失效 String sql1 = "SELECT * FROM users WHERE user_id = 12345"; // 2. 前导通配符LIKE String sql2 = "SELECT * FROM users WHERE email LIKE '%@example.com'"; // 3. 对索引列进行函数操作 String sql3 = "SELECT * FROM orders WHERE DATE(create_time) = '2023-10-01'"; // 4. OR条件使用不当 String sql4 = "SELECT * FROM products WHERE category_id = 1 OR price > 100"; // 5. 不满足最左前缀原则 // 索引 (category_id, status) String sql5 = "SELECT * FROM products WHERE status = 'ACTIVE'"; } /** * 优化方案 */ public void optimizationSolutions() { // 1. 类型匹配 String sql1 = "SELECT * FROM users WHERE user_id = '12345'"; // 2. 避免前导通配符 String sql2 = "SELECT * FROM users WHERE email LIKE 'user%@example.com'"; // 3. 避免索引列函数操作 String sql3 = "SELECT * FROM orders WHERE create_time >= '2023-10-01' AND create_time < '2023-10-02'"; // 4. OR优化为UNION String sql4 = "SELECT * FROM products WHERE category_id = 1 " + "UNION SELECT * FROM products WHERE price > 100"; // 5. 调整索引或查询条件 String sql5 = "SELECT * FROM products WHERE category_id IN (1,2,3) AND status = 'ACTIVE'"; } }
5.2 SQL重写优化
5.2.1 分页查询优化
-- 原始慢查询(偏移量大时变慢) SELECT * FROM orders ORDER BY create_time DESC LIMIT 10000, 20; -- 优化方案1:游标分页(基于ID) SELECT * FROM orders WHERE id > 10000 ORDER BY id ASC LIMIT 20; -- 优化方案2:延迟关联 SELECT * FROM orders o JOIN ( SELECT id FROM orders ORDER BY create_time DESC LIMIT 10000, 20 ) AS tmp ON o.id = tmp.id;
5.2.2 JOIN查询优化
-- 原始慢JOIN SELECT u.*, o.*, p.* FROM users u LEFT JOIN orders o ON u.id = o.user_id LEFT JOIN payments p ON o.id = p.order_id WHERE u.create_time > '2023-01-01'; -- 优化:分阶段查询 + 应用层组合 -- 第一阶段:获取用户ID列表 SELECT id FROM users WHERE create_time > '2023-01-01' LIMIT 1000; -- 第二阶段:批量查询关联数据 SELECT * FROM orders WHERE user_id IN (?, ?, ...); SELECT * FROM payments WHERE order_id IN (?, ?, ...);
5.3 Java代码层面的优化
@Repository public class OptimizedUserRepository { @Autowired private JdbcTemplate jdbcTemplate; /** * 优化前:N+1查询问题 */ public List<User> findUsersWithOrdersBad() { List<User> users = jdbcTemplate.query( "SELECT * FROM users WHERE status = 'ACTIVE'", new UserRowMapper()); // N+1查询问题 for (User user : users) { List<Order> orders = jdbcTemplate.query( "SELECT * FROM orders WHERE user_id = ?", new Object[]{user.getId()}, new OrderRowMapper()); user.setOrders(orders); } return users; } /** * 优化后:单次JOIN查询 */ public List<User> findUsersWithOrdersOptimized() { String sql = """ SELECT u.*, o.id as order_id, o.amount, o.create_time as order_time FROM users u LEFT JOIN orders o ON u.id = o.user_id WHERE u.status = 'ACTIVE' ORDER BY u.id, o.create_time DESC """; return jdbcTemplate.query(sql, new UserWithOrdersRowMapper()); } /** * 优化:批量查询减少数据库往返 */ public Map<Long, List<Order>> findOrdersByUserIds(List<Long> userIds) { if (userIds.isEmpty()) { return Collections.emptyMap(); } String inClause = String.join(",", Collections.nCopies(userIds.size(), "?")); String sql = String.format( "SELECT * FROM orders WHERE user_id IN (%s) ORDER BY create_time DESC", inClause); List<Order> orders = jdbcTemplate.query(sql, userIds.toArray(), new OrderRowMapper()); // 在应用层分组,减少数据库压力 return orders.stream() .collect(Collectors.groupingBy(Order::getUserId)); } }
6. 系统级优化策略
6.1 数据库参数调优
# /etc/my.cnf 性能优化参数 [mysqld] # 缓冲池设置(通常设置为物理内存的70-80%) innodb_buffer_pool_size = 16G innodb_buffer_pool_instances = 8 # 日志设置 innodb_log_file_size = 2G innodb_log_buffer_size = 64M # 连接设置 max_connections = 500 thread_cache_size = 50 table_open_cache = 2000 # 查询缓存(MySQL 8.0已移除,5.7版本谨慎使用) # query_cache_type = 0
6.2 架构层面的优化
// 读写分离架构 @Configuration @EnableConfigurationProperties(DataSourceProperties.class) public class ReadWriteDataSourceConfig { @Bean @Primary public DataSource routingDataSource() { Map<Object, Object> targetDataSources = new HashMap<>(); // 主库(写) targetDataSources.put("master", createMasterDataSource()); // 从库(读) targetDataSources.put("slave1", createSlaveDataSource()); targetDataSources.put("slave2", createSlaveDataSource()); ReadWriteRoutingDataSource routingDataSource = new ReadWriteRoutingDataSource(); routingDataSource.setTargetDataSources(targetDataSources); routingDataSource.setDefaultTargetDataSource( targetDataSources.get("master")); return routingDataSource; } // 基于注解的读写分离 @Target({ElementType.METHOD, ElementType.TYPE}) @Retention(RetentionPolicy.RUNTIME) public @interface ReadOnly { } }
7. 自动化监控与告警系统
7.1 基于Prometheus的监控
# prometheus.yml 配置 scrape_configs: - job_name: 'mysql_slow_queries' static_configs: - targets: ['mysql-exporter:9104'] metrics_path: /metrics params: collect[]: - slowlog - engine_innodb - global_status # 告警规则 groups: - name: mysql_slow_queries rules: - alert: MySQLSlowQueryRateHigh expr: rate(mysql_global_status_slow_queries[5m]) > 10 for: 2m labels: severity: warning annotations: summary: "MySQL慢查询率过高" description: "当前慢查询率为 {{ $value }} 个/秒"
7.2 Java应用集成监控
@Component public class SlowQueryAlertService { @Autowired private MeterRegistry meterRegistry; private final Counter slowQueryCounter; private final DistributionSummary queryDurationSummary; public SlowQueryAlertService() { this.slowQueryCounter = Counter.builder("db.query.slow") .description("慢查询计数") .register(meterRegistry); this.queryDurationSummary = DistributionSummary.builder("db.query.duration") .description("查询耗时分布") .baseUnit("milliseconds") .register(meterRegistry); } public void recordQuery(String sql, long duration, boolean isSlow) { queryDurationSummary.record(duration); if (isSlow) { slowQueryCounter.increment(); // 触发告警逻辑 if (shouldSendAlert(sql, duration)) { sendAlertToTeams(sql, duration); } } } private boolean shouldSendAlert(String sql, long duration) { // 基于SQL模式、执行时间、频率等判断 return duration > 5000; // 5秒以上立即告警 } private void sendAlertToTeams(String sql, long duration) { // 发送告警到Teams/Slack/钉钉 String message = String.format( " 慢查询告警\n执行时间: %dms\nSQL: %s", duration, abbreviateSql(sql)); // 调用Webhook发送消息 sendWebhookAlert(message); } }
8. 实战案例:电商系统慢查询优化
8.1 案例背景
-- 原始慢查询:商品搜索+分页 SELECT p.*, c.name as category_name, COUNT(r.id) as review_count FROM products p LEFT JOIN categories c ON p.category_id = c.id LEFT JOIN reviews r ON p.id = r.product_id WHERE p.status = 'ACTIVE' AND p.price BETWEEN 100 AND 1000 AND p.stock_count > 0 AND (p.name LIKE '%手机%' OR p.description LIKE '%手机%') GROUP BY p.id ORDER BY p.sales_count DESC LIMIT 0, 20; -- 执行时间:8.2秒,扫描行数:50万+
8.2 优化步骤
步骤1:索引分析与创建
-- 分析现有索引 SHOW INDEX FROM products; -- 创建复合索引 ALTER TABLE products ADD INDEX idx_search_composite ( status, price, stock_count, sales_count ); -- 创建全文索引(用于名称和描述搜索) ALTER TABLE products ADD FULLTEXT(name, description);
步骤2:SQL重写
-- 优化后的查询 SELECT p.*, c.name as category_name, r.review_count FROM products p FORCE INDEX (idx_search_composite) LEFT JOIN categories c ON p.category_id = c.id LEFT JOIN ( SELECT product_id, COUNT(*) as review_count FROM reviews GROUP BY product_id ) r ON p.id = r.product_id WHERE p.status = 'ACTIVE' AND p.price BETWEEN 100 AND 1000 AND p.stock_count > 0 AND MATCH(p.name, p.description) AGAINST('手机' IN BOOLEAN MODE) ORDER BY p.sales_count DESC LIMIT 0, 20; -- 执行时间:0.15秒,扫描行数:2000
步骤3:Java代码优化
@Service public class ProductSearchService { /** * 分阶段搜索优化 */ public Page<ProductVO> searchProducts(ProductSearchRequest request) { // 第一阶段:快速获取ID列表 List<Long> productIds = findProductIdsByConditions(request); if (productIds.isEmpty()) { return Page.empty(); } // 第二阶段:批量获取详细信息 Map<Long, Product> products = productRepository.findByIds(productIds); Map<Long, Category> categories = categoryRepository.findByProductIds(productIds); Map<Long, Long> reviewCounts = reviewRepository.countByProductIds(productIds); // 第三阶段:应用层组装 return assembleProductVOs(products, categories, reviewCounts, request.getPageable()); } private List<Long> findProductIdsByConditions(ProductSearchRequest request) { String sql = """ SELECT id FROM products WHERE status = 'ACTIVE' AND price BETWEEN ? AND ? AND stock_count > 0 AND MATCH(name, description) AGAINST(? IN BOOLEAN MODE) ORDER BY sales_count DESC LIMIT ?, ? """; return jdbcTemplate.queryForList(sql, Long.class, request.getMinPrice(), request.getMaxPrice(), request.getKeyword(), request.getOffset(), request.getPageSize()); } }
9. 预防性优化与最佳实践
9.1 开发规范
/** * 数据库操作最佳实践 */ public class DatabaseBestPractices { // 1. 使用预编译语句防止SQL注入 public User findByEmail(String email) { return jdbcTemplate.queryForObject( "SELECT * FROM users WHERE email = ?", new Object[]{email}, new UserRowMapper()); } // 2. 合理使用批量操作 public void batchInsertUsers(List<User> users) { jdbcTemplate.batchUpdate( "INSERT INTO users (name, email) VALUES (?, ?)", users.stream() .map(user -> new Object[]{user.getName(), user.getEmail()}) .collect(Collectors.toList())); } // 3. 限制查询结果集大小 public List<User> findActiveUsers(int limit) { return jdbcTemplate.query( "SELECT * FROM users WHERE status = 'ACTIVE' LIMIT ?", new Object[]{limit}, new UserRowMapper()); } // 4. 避免SELECT * public List<String> findUserNames() { return jdbcTemplate.queryForList( "SELECT name FROM users WHERE status = 'ACTIVE'", String.class); } }
9.2 定期维护任务
-- 定期分析表 ANALYZE TABLE orders, users, products; -- 优化表(碎片整理) OPTIMIZE TABLE orders; -- 更新索引统计信息 UPDATE mysql.innodb_index_stats SET last_update = NOW(); -- 清理历史数据 DELETE FROM audit_logs WHERE create_time < DATE_SUB(NOW(), INTERVAL 90 DAY);
10. 总结
慢查询监控与优化是一个系统工程,需要从监控、分析、优化到预防的全链路管理:
- 监控体系:建立全方位的慢查询检测机制
- 分析工具:掌握专业的日志分析工具使用
- 优化技术:索引优化、SQL重写、架构调整多管齐下
- 预防措施:开发规范、定期维护、容量规划
通过系统化的慢查询治理,可以显著提升应用性能,保障系统稳定运行,为业务发展提供坚实的技术支撑。