Nagios 安装设置

简介:

公司服务器越来越多了,本来用一个脚本去检测了,现在改用Nagios


ubuntu 客户端安装脚本

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#!/bin/bash
tmp_dir= /tmp/nagios
nagios_ser= "192.168.1.3"
groupadd nagios  
useradd  -g nagios -s  /sbin/nologin  nagios
if  [ ! -d $tmp_dir ];  then
     mkdir  $tmp_dir
fi
cd  $tmp_dir
wget http: //downloads .sourceforge.net /project/nagios/nrpe-2 .x /nrpe-2 .15 /nrpe-2 .15. tar .gz
wget http: //nagios-plugins .org /download/nagios-plugins-2 .0.1. tar .gz
#---- install
for  in  ` ls  -1`
     do  tar  xf $i
done
apt-get -y --force- yes  install  openssl ruby1.9.1 build-essential
apt-get -y --force- yes  install  libssl-dev lm-sensors
tar  xvf nagios-plugins-2.0.1. tar .gz
cd  nagios-plugins-2.0.1
. /configure  --with-nagios-user=nagios --with-nagios-group=nagios
make   
make  install
cd  ../
tar  xvf nrpe-2.15. tar .gz
cd  . /nrpe-2 .15
. /configure  --with-ssl-lib= /usr/lib/x86_64-linux-gnu
make  all   
make  install -plugin   
make  install -daemon   
make  install -daemon-config
#mv ./check_* /usr/local/nagios/libexec
#chmod 755 -R /usr/local/nagios/libexec
chown  -R nagios:nagios  /usr/local/nagios/
cat  > /usr/local/nagios/etc/nrpe .cfg<<EOF
log_facility=daemon
pid_file= /var/run/nrpe .pid
server_port=5666
nrpe_user=nagios
nrpe_group=nagios
allowed_hosts=127.0.0.1,$nagios_ser
                                              
dont_blame_nrpe=0
allow_bash_command_substitution=0
debug=0
command_timeout=60
connection_timeout=300
command [check_users]= /usr/local/nagios/libexec/check_users  -w 5 -c 10
command [check_load]= /usr/local/nagios/libexec/check_load  -w 15,10,5 -c 30,25,20
command [check_zombie_procs]= /usr/local/nagios/libexec/check_procs  -w 5 -c 10 -s Z
command [check_total_procs]= /usr/local/nagios/libexec/check_procs  -w 150 -c 200
command [check_procs]= /usr/local/nagios/libexec/check_procs  -w 150 -c 200
command [check_alldisk]= /usr/local/nagios/libexec/check_alldisk  -w 90 -c 95
command [check_http]= /usr/local/nagios/libexec/check_http  -H 127.0.0.1 -w 5 -c 10
command [check_ping]= /usr/local/nagios/libexec/check_ping  -H 127.0.0.1 -w 3000.0,80% -c 5000.0,100% -p 5
command [check_ssh]= /usr/local/nagios/libexec/check_ssh  -4 127.0.0.1
command [check_swap]= /usr/local/nagios/libexec/check_swap   -w 30% -c 10%
command [check_sensors]= /usr/local/nagios/libexec/check_sensors
command [check_mdadm]= /usr/local/nagios/libexec/check_mdadm
command [check_smart]= /usr/local/nagios/libexec/check_smart
command [check_drbd]= /usr/local/nagios/libexec/check_drbd
EOF
echo  "/usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d"  >>  /etc/rc . local
/usr/local/nagios/bin/nrpe  -c  /usr/local/nagios/etc/nrpe .cfg -d
rm  -rf $tmp_dir



自己折腾的ruby脚本,

1:check_smart 磁盘状态检测

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
#!/usr/bin/env ruby
#0 ok; 1 warning; 2 critical; 3 unknown
#echo "nagios ALL=NOPASSWD:/usr/sbin/smartctl" >>/etc/sudoers
#CentOS sed -i "s:Defaults  requiretty:Defaults:nagios !requiretty:" /etc/sudoers
#调用 check_nrpe!check_smart
health =  ""
`ls - 1  /dev/sd[a-z]* | grep [a-z]$`.split. each  do  |hdd|
   status = `sudo /usr/sbin/smartctl - H  #{hdd} | grep result | awk -F: '{print $2}'`
   if  status.match(/ PASSED /)
     health = health + hdd +  "  OK\n"
   else
     health = health + hdd +  "  Fail\n"
   end
end
if  health.include?  "Fail"
         puts health
         exit  2
end
puts health
exit  0

2:check_mdadm 软阵列检测

1
2
3
4
5
6
7
8
9
10
#!/usr/bin/env ruby
#0 ok; 1 warning; 2 critical; 3 unknown
status = `cat /proc/mdstat`
if  status.scan( 'U' ).size == status.scan( 'md' ).size *  2
     puts  "Soft Raid OK"
     exit  0
else
     puts  "Soft Raid Fail"
     exit  2
end

3:check_drbd DRBD检测

1
2
3
4
5
6
7
8
9
#!/usr/bin/ruby
#0 ok; 1 warning; 2 critical; 3 unknown
if  `cat /proc/drbd`.scan( "UpToDate" ).count == `ls -la /dev/ | grep ^b | grep drbd | wc -l`.to_i *  2
     puts  "DRBD OK"
     exit  0
else
     puts  "DRBD Critical"
     exit  2
end

4:check_alldisk 检测磁盘空间

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
#!/usr/bin/env ruby
#ARGV[1] min ,ARGV[3] max
# -w 90 -c 95
#0 ok; 1 warning; 2 critical; 3 unknown
space =  ''
status = `df -hl -x tmpfs -x devtmpfs | grep -v ^Filesystem`.split
if  status.size <  6  #unkown
     puts  "UNKOWN"
     exit  3
end
(status.size /  6 ).times  do  |x|
     current_use, min_use, max_use = status[ 4  + x *  6 ][ 0 ..- 2 ].to_i,  ARGV [ 1 ].to_i,  ARGV [ 3 ].to_i
     if   current_use > max_use  #critical
         space = space + status[x *  6 ] +  "  "  + status[ 4  + x *  6 ] +   "  "  + status[ 5  + x *  6 ] + "  Critical\n"
     elsif  current_use > min_use  and  current_use <= max_use  #warning
         space = space + status[x *  6 ] +  "  "  + status[ 4  + x *  6 ] +   "  "  + status[ 5  + x *  6 ] +  "  Warning\n"
     elsif   current_use <= min_use  #ok
         space = space + status[x *  6 ] +  "  "  + status[ 4  + x *  6 ] +   "  "  + status[ 5  + x *  6 ] +  "  OK\n"
     end
end
if  space.include?( "Crtitical" )
     puts space
     exit  2
elsif  space.include?( "Warning" )
     puts space
     exit  1
else
     puts space
     exit  0
end


本文转自 nonono11 51CTO博客,原文链接:http://blog.51cto.com/abian/1412478,如需转载请自行联系原作者
相关文章
|
Web App开发 开发工具 机器学习/深度学习
|
监控 网络协议 测试技术
|
Web App开发 开发工具 机器学习/深度学习
|
Web App开发 监控 网络架构