CPU异常定位手段
获取线程cpu占用
top 获取pid为18234进程的线程cpu占用,其pid为374037
[root@ceph ~]# top -H -p 18234 -n 1 top - 10:20:42 up 8 days, 1:08, 1 user, load average: 13.09, 13.17, 12.68 Threads: 10742 total, 1 running, 10741 sleeping, 0 stopped, 0 zombie %Cpu(s): 5.3 us, 8.0 sy, 0.0 ni, 84.0 id, 1.8 wa, 0.0 hi, 0.8 si, 0.0 st KiB Mem : 19752244+total, 16810388 free, 12193627+used, 58775792 buff/cache KiB Swap: 0 total, 0 free, 0 used. 74628448 avail Mem PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND 374037 ceph 20 0 13.9g 1.5g 8484 R 96.8 0.8 2431:07 ms_pipe_read 18236 ceph 20 0 13.9g 1.5g 8484 S 3.2 0.8 183:29.12 log 22685 ceph 20 0 13.9g 1.5g 8484 S 3.2 0.8 48:15.72 journal_wrt_fin 28838 ceph 20 0 13.9g 1.5g 8484 S 3.2 0.8 62:48.29 tp_osd_tp 789508 ceph 20 0 13.9g 1.5g 8484 S 3.2 0.8 0:02.36 ms_pipe_write 18234 ceph 20 0 13.9g 1.5g 8484 S 0.0 0.8 0:10.38 ceph-osd 18238 ceph 20 0 13.9g 1.5g 8484 S 0.0 0.8 0:02.04 service 18239 ceph 20 0 13.9g 1.5g 8484 S 0.0 0.8 0:00.03 admin_socket 18240 ceph 20 0 13.9g 1.5g 8484 S 0.0 0.8 0:02.68 ceph-osd 18241 ceph 20 0 13.9g 1.5g 8484 S 0.0 0.8 4:18.44 ms_reaper 18242 ceph 20 0 13.9g 1.5g 8484 S 0.0 0.8 0:00.01 ms_reaper 18243 ceph 20 0 13.9g 1.5g 8484 S 0.0 0.8 0:00.01 ms_reaper 18244 ceph 20 0 13.9g 1.5g 8484 S 0.0 0.8 0:00.01 ms_reaper 18245 ceph 20 0 13.9g 1.5g 8484 S 0.0 0.8 0:00.04 ms_reaper 18246 ceph 20 0 13.9g 1.5g 8484 S 0.0 0.8 0:00.00 ms_reaper 18247 ceph 20 0 13.9g 1.5g 8484 S 0.0 0.8 0:26.90 safe_timer 18248 ceph 20 0 13.9g 1.5g 8484 S 0.0 0.8 12:45.89 safe_timer 18249 ceph 20 0 13.9g 1.5g 8484 S 0.0 0.8 0:00.00 safe_timer
ps获取18234线程的cpu使用率排序
[root@ceph ~]# ps -p 18234 -Lo pid,%cpu,command | sort -r -k 1| head PID %CPU COMMAND 18234 99.7 /usr/bin/ceph-osd -f --cluster ceph --id 48 --setuser ceph --setgroup ceph 18234 1.5 /usr/bin/ceph-osd -f --cluster ceph --id 48 --setuser ceph --setgroup ceph 18234 1.1 /usr/bin/ceph-osd -f --cluster ceph --id 48 --setuser ceph --setgroup ceph 18234 1.1 /usr/bin/ceph-osd -f --cluster ceph --id 48 --setuser ceph --setgroup ceph 18234 0.6 /usr/bin/ceph-osd -f --cluster ceph --id 48 --setuser ceph --setgroup ceph 18234 0.6 /usr/bin/ceph-osd -f --cluster ceph --id 48 --setuser ceph --setgroup ceph 18234 0.6 /usr/bin/ceph-osd -f --cluster ceph --id 48 --setuser ceph --setgroup ceph 18234 0.6 /usr/bin/ceph-osd -f --cluster ceph --id 48 --setuser ceph --setgroup ceph 18234 0.5 /usr/bin/ceph-osd -f --cluster ceph --id 48 --setuser ceph --setgroup ceph
获取cpu排序的进程,-o为排序,-b为特定输出,-c为输出command,-U过滤用户
[root@ceph ~]# top -o +%CPU -bn 1 -U ceph -c top - 14:07:29 up 8 days, 4:55, 2 users, load average: 9.89, 10.91, 10.78 Tasks: 925 total, 7 running, 918 sleeping, 0 stopped, 0 zombie %Cpu(s): 5.6 us, 10.2 sy, 0.0 ni, 80.7 id, 2.5 wa, 0.0 hi, 0.9 si, 0.0 st KiB Mem : 19752244+total, 14998000 free, 12188595+used, 60638496 buff/cache KiB Swap: 0 total, 0 free, 0 used. 74681224 avail Mem PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND 18234 ceph 20 0 14.2g 1.5g 7180 S 123.7 0.8 5066:26 /usr/bin/ceph-osd -f --cluster ceph --id 48 --setuser ceph --setgroup ceph 17749 ceph 20 0 14.8g 1.4g 12868 S 31.3 0.7 1599:14 /usr/bin/ceph-osd -f --cluster ceph --id 53 --setuser ceph --setgroup ceph 40350 ceph 20 0 16.2g 1.6g 6880 S 31.3 0.9 2030:31 /usr/bin/ceph-osd -f --cluster ceph --id 42 --setuser ceph --setgroup ceph 44611 ceph 20 0 16.1g 1.7g 7248 S 28.2 0.9 2264:00 /usr/bin/ceph-osd -f --cluster ceph --id 39 --setuser ceph --setgroup ceph 12407 ceph 20 0 16.2g 1.6g 7792 S 25.2 0.8 1941:31 /usr/bin/ceph-osd -f --cluster ceph --id 40 --setuser ceph --setgroup ceph 12652 ceph 20 0 16.6g 1.7g 7764 S 25.2 0.9 2218:45 /usr/bin/ceph-osd -f --cluster ceph --id 47 --setuser ceph --setgroup ceph 18010 ceph 20 0 15.8g 1.6g 7120 S 25.2 0.8 1972:40 /usr/bin/ceph-osd -f --cluster ceph --id 45 --setuser ceph --setgroup ceph 42458 ceph 20 0 16.7g 1.7g 7744 S 23.7 0.9 2448:25 /usr/bin/ceph-osd -f --cluster ceph --id 37 --setuser ceph --setgroup ceph 37105 ceph 20 0 16.4g 1.7g 7660 S 22.9 0.9 2367:21 /usr/bin/ceph-osd -f --cluster ceph --id 43 --setuser ceph --setgroup ceph 20591 ceph 20 0 16.8g 1.8g 7272 S 22.1 1.0 2387:48 /usr/bin/ceph-osd -f --cluster ceph --id 46 --setuser ceph --setgroup ceph 13612 ceph 20 0 16.5g 1.6g 7632 S 19.8 0.9 2104:41 /usr/bin/ceph-osd -f --cluster ceph --id 49 --setuser ceph --setgroup ceph 34286 ceph 20 0 8933948 1.3g 6464 S 18.3 0.7 1716:12 /usr/bin/ceph-osd -f --cluster ceph --id 41 --setuser ceph --setgroup ceph 36056 ceph 20 0 16.4g 1.6g 7792 S 17.6 0.8 1899:13 /usr/bin/ceph-osd -f --cluster ceph --id 52 --setuser ceph --setgroup ceph 20992 ceph 20 0 9320296 1.1g 6040 S 16.8 0.6 1573:35 /usr/bin/ceph-osd -f --cluster ceph --id 36 --setuser ceph --setgroup ceph 20124 ceph 20 0 16.2g 1.5g 5852 S 16.0 0.8 1612:22 /usr/bin/ceph-osd -f --cluster ceph --id 38 --setuser ceph --setgroup ceph 33121 ceph 20 0 15.6g 1.5g 6200 S 16.0 0.8 1687:48 /usr/bin/ceph-osd -f --cluster ceph --id 51 --setuser ceph --setgroup ceph 17547 ceph 20 0 16.2g 1.5g 7172 S 15.3 0.8 1906:07 /usr/bin/ceph-osd -f --cluster ceph --id 44 --setuser ceph --setgroup ceph 5898 ceph 20 0 10.0g 772900 80752 S 13.7 0.4 2102:06 /usr/bin/ceph-mon -f --cluster ceph --id ceph --setuser ceph --setgroup ceph 46355 ceph 20 0 9500172 1.1g 7512 S 11.5 0.6 1573:49 /usr/bin/ceph-osd -f --cluster ceph --id 50 --setuser ceph --setgroup ceph 5895 ceph 20 0 560660 239940 2688 S 0.0 0.1 0:58.61 /usr/bin/ceph-mds -f --cluster ceph --id ceph --setuser ceph --setgroup ceph
获取cpu大于100的进程
top -o +%CPU -bn 1 -U ceph -c | sed -n '8,$p' | awk '$9>100{print $0}' 18234 ceph 20 0 14.3g 1.6g 7056 S 150.0 0.8 5139:56 /usr/bin/ceph-osd -f --cluster ceph --id 48 --setuser ceph --setgroup ceph
使用pstack获取进程调用栈,并查找374037所在线程
pstack 18234 > osd_48.pstack Thread 9079 (Thread 0x7f5294d19700 (LWP 374037)): #0 0x00007f55c0458aab in recv () from /lib64/libpthread.so.0 #1 0x00005648458a057d in Pipe::do_recv(char*, unsigned long, int) () #2 0x00005648458a0937 in Pipe::buffered_recv(char*, unsigned long, int) () #3 0x00005648458a0a33 in Pipe::tcp_read_nonblocking(char*, unsigned int) () #4 0x00005648458a0d0d in Pipe::tcp_read(char*, unsigned int) () #5 0x00005648458ab058 in Pipe::accept() () #6 0x00005648458b22ff in Pipe::reader() () #7 0x00005648458baf1d in Pipe::Reader::entry() () #8 0x00007f55c0451e65 in start_thread () from /lib64/libpthread.so.0 #9 0x00007f55bead688d in clone () from /lib64/libc.so.6