1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
|
我的博客已迁移到xdoujiang.com请去那边和我交流
晚上某个时段收到某台服务器load很高,但再登录服务器查看时候load可能已经下降了,
所以使用shell脚本来处理解决问题 监控频率30秒1次 将系统负载超过一定数值就记录到文本里
cat
load.sh
#!/bin/bash
#--------------------------------------------------
#Author:jimmygong
#Email:jimmygong@taomee.com
#FileName:load.sh
#Function:
#Version:1.0
#Created:2015-06-02
#--------------------------------------------------
sleeptime=30
logpath=
"/opt/scripts"
[[ -d $logpath ]] ||
mkdir
$logpath
echosucc ()
{
succstatus=
"[ Ok ]"
printf
"\033[32m $succstatus $* \033[0m\n"
}
function
usage ()
{
echo
"Usage: $0 {start|stop}"
}
if
[[ $
# -ne 1 ]]
then
usage
exit
1
fi
function
start ()
{
echosucc
"Starting load monitor"
while
true
do
load=$(
cat
/proc/loadavg
|
awk
'{print $1}'
)
cpunum=$(
cat
/proc/cpuinfo
|
grep
-c processor)
if
[[ $(
echo
"$load > $cpunum"
|
bc
) = 1 ]]
then
result=$(
ps
-eo pcpu,pmem,user,args|
awk
'$1 > 0'
|
sort
-nr)
if
[[ -n
"$result"
]]
then
echo
"$result"
> $logpath
/load
.$(
date
+
"%Y%m%d%H%M"
)
fi
fi
sleep
$sleeptime
done
}
function
stop ()
{
echo
$$ >
"$logpath/pidfile"
for
PID
in
$(
ps
-eo pid,
command
|
grep
"$0"
|
grep
-
v
grep
|
awk
'{print $1}'
);
do
[[ $PID != $(
cat
$logpath
/pidfile
) ]] &&
kill
-s TERM $PID >&
/dev/null
done
echosucc
'Stopping load monitor'
}
case
$1
in
start)
start &
;;
stop)
stop
;;
*)
usage
;;
esac
|
本文转自 xdoujiang 51CTO博客,原文链接:http://blog.51cto.com/7938217/1657964,如需转载请自行联系原作者