五、网络问题排查
5.1 网络连接问题
# 1. 查看网络连接状态
netstat -antp | awk '{print $6}' | sort | uniq -c
# 状态分布:
# ESTABLISHED: 正常连接
# TIME_WAIT: 主动关闭后等待(过多说明频繁创建连接)
# CLOSE_WAIT: 被动关闭后未调用close(应用层bug)
# SYN_RECV: 半连接队列满
# 2. 查看TCP连接详情
ss -tni
# 3. 抓包分析
tcpdump -i eth0 -s 0 -w capture.pcap -c 1000
# 4. 分析抓包文件
tcpdump -r capture.pcap -nn
# 或使用Wireshark图形化分析
# 5. 检查端口监听
netstat -tlnp
5.2 TIME_WAIT过多问题
# 查看TIME_WAIT数量
netstat -ant | grep TIME_WAIT | wc -l
# 解决方案:
# 1. 开启端口复用
echo 1 > /proc/sys/net/ipv4/tcp_tw_reuse
# 2. 减小FIN_WAIT2超时时间
echo 30 > /proc/sys/net/ipv4/tcp_fin_timeout
# 3. 使用连接池复用连接
# 4. 调整客户端为短连接时使用长连接
5.3 网络延迟排查
# 1. ping测试基础延迟
ping -c 10 target-host
# 2. traceroute查看路由跳数
traceroute target-host
# 3. mtr综合测试(实时显示丢包和延迟)
mtr target-host
# 4. 端到端延迟测试
iperf3 -c server -p 5201
# 5. 应用层延迟分析(客户端curl)
curl -w "@curl-format.txt" -o /dev/null -s https://api.example.com
# curl-format.txt内容:
# time_namelookup: %{time_namelookup}\n
# time_connect: %{time_connect}\n
# time_appconnect: %{time_appconnect}\n
# time_pretransfer: %{time_pretransfer}\n
# time_redirect: %{time_redirect}\n
# time_starttransfer: %{time_starttransfer}\n
# ----------\n
# time_total: %{time_total}\n
5.4 连接池问题
// HTTP连接池配置示例(Apache HttpClient)
public class HttpClientPoolConfig {
public CloseableHttpClient createOptimizedClient() {
// 连接池配置
PoolingHttpClientConnectionManager cm = new PoolingHttpClientConnectionManager();
cm.setMaxTotal(200); // 最大连接数
cm.setDefaultMaxPerRoute(50); // 每个路由最大连接数
// 请求配置
RequestConfig config = RequestConfig.custom()
.setConnectTimeout(3000) // 连接超时
.setSocketTimeout(5000) // Socket超时
.setConnectionRequestTimeout(1000) // 从连接池获取连接超时
.build();
// 空闲连接清理
cm.setValidateAfterInactivity(1000);
return HttpClientBuilder.create()
.setConnectionManager(cm)
.setDefaultRequestConfig(config)
.setKeepAliveStrategy((response, context) -> 30000) // 30秒保活
.evictIdleConnections(30, TimeUnit.SECONDS)
.build();
}
}
// 连接池问题排查:
// 1. 查看连接池监控指标(活跃连接数、空闲连接数)
// 2. 检查是否有连接泄漏(不归还连接)
// 3. 检查连接池大小是否合理
六、数据库性能优化
6.1 慢查询排查
-- 开启慢查询日志
SET GLOBAL slow_query_log = ON;
SET GLOBAL long_query_time = 1;
SET GLOBAL log_queries_not_using_indexes = ON;
-- 查看慢查询日志位置
SHOW VARIABLES LIKE 'slow_query_log_file';
-- 查看当前执行中的SQL
SHOW PROCESSLIST;
-- 或更详细的信息
SELECT * FROM information_schema.PROCESSLIST WHERE COMMAND != 'Sleep';
-- 查看锁等待
SELECT * FROM information_schema.INNODB_TRX;
SELECT * FROM information_schema.INNODB_LOCKS;
SELECT * FROM information_schema.INNODB_LOCK_WAITS;
6.2 索引失效分析
-- 使用EXPLAIN分析执行计划
EXPLAIN FORMAT=JSON
SELECT o.*, u.name
FROM orders o
JOIN users u ON o.user_id = u.id
WHERE o.create_time > '2024-01-01'
AND o.status = 1
ORDER BY o.id DESC
LIMIT 100;
-- 关键指标解读:
-- type: ALL(全表扫描) > index(索引全扫) > range(范围) > ref(引用) > eq_ref > const > system
-- Extra: Using filesort(需优化) / Using temporary(需优化) / Using index(覆盖索引)
-- 索引使用情况统计
SELECT
index_name,
cardinality,
(cardinality / (SELECT COUNT(*) FROM orders)) * 100 AS selectivity
FROM information_schema.STATISTICS
WHERE table_name = 'orders';
6.3 事务与锁问题
-- 查看当前事务
SELECT
trx_id,
trx_state,
trx_started,
TIMESTAMPDIFF(SECOND, trx_started, NOW()) as duration_sec,
trx_mysql_thread_id,
trx_query
FROM information_schema.INNODB_TRX
ORDER BY duration_sec DESC;
-- 查看未提交的事务(可能导致死锁)
SELECT * FROM information_schema.INNODB_TRX WHERE trx_state = 'RUNNING';
-- 查看死锁信息
SHOW ENGINE INNODB STATUS\G
-- 重点关注 "LATEST DETECTED DEADLOCK" 部分
6.4 数据库连接池调优
HikariCP配置优化:
minimumIdle: 10
maximumPoolSize: 20
connectionTimeout: 30000
idleTimeout: 600000
maxLifetime: 1800000
leakDetectionThreshold: 60000 # 连接泄漏检测(生产环境开启)
# MySQL驱动优化
dataSourceProperties:
cachePrepStmts: true
prepStmtCacheSize: 250
prepStmtCacheSqlLimit: 2048
useServerPrepStmts: true
rewriteBatchedStatements: true
# 连接泄漏检测日志:
# "Connection leak detection: connection returned to pool after 65 seconds"