之前有一篇文章讲到使用update percent监控agent的数据提交状况,可以有效地发现agent的故障问题,而使用unreachable的时候,会因为unreachable process busy的情况造成误报(可以通过增大StartPollersUnreachable和UnreachablePeriod解决),附一个python小程序,用来计算host的update percent(遇到host update 为0时不能报警的情况,使用RIGHT JOIN+IFNULL解决).
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
|
#!/usr/bin/python
# -*- coding: utf8 -*-
#get zabbix agent update percent
import
MySQLdb
import
os
import
sys
import
smtplib
from
email.MIMEText
import
MIMEText
reload
(sys)
sys.setdefaultencoding(
'utf-8'
)
def
SendMail(sub,content,to_list
=
[
"ericni.ni@vipshop.com"
]):
me
=
mail_user
+
"<"
+
mail_user
+
">"
msg
=
MIMEText(content,
'html'
,
'utf-8'
)
msg[
'Subject'
]
=
sub
msg[
'From'
]
=
me
msg[
'To'
]
=
";"
.join(to_list)
try
:
s
=
smtplib.SMTP()
s.connect(mail_host)
s.login(mail_user,mail_pass)
s.sendmail(me, to_list, msg.as_string())
s.close()
return
True
except
Exception, e:
print
str
(e)
return
False
def
getAll(sql):
db
=
MySQLdb.connect(
'xxx'
,
'xxx'
,
'xxx'
,
'xxx'
)
cursor
=
db.cursor()
try
:
cursor.execute(sql)
result
=
cursor.fetchall()
except
Exception,e:
print
"failed info %s"
%
(
str
(e))
print
result
print
type
(result)
return
result
cursor.close()
db.close()
def
getReport(allInfo):
print
allInfo
mailcontent
=
""
print
type
(allInfo)
if
len
(allInfo)
=
=
0
:
pass
else
:
mailcontent
=
"""
<html>
<head>
<meta http-equiv="Content-Type" content="text/html";charset=utf-8>
<title>Zabbix agent update percent</title>
<style type="text/css">
body { font-size: 14px; color: #333;background-color: #fff;}
td { border: 1px solid #C1DAD7;padding:"4px 10px"; line-height: 24px;}
table {border-collapse: collapse; width: 96%s;}
.divtd {color:#E28E40;}
.divcss5{ color:#F00;}
</style>
</head>
<body>
<table style="border-collapse: collapse; width: 60%;">
<tbody>
<tr>
<td><div class="divtd">domain</div></td>
<td><div class="divtd">ip</div></td>
<td><div class="divtd">percent</div></td>
</tr>
"""
for
line
in
allInfo:
mailcontent
+
=
"<tr>"
mailcontent
+
=
"<td><div>%s</div></td>"
%
line[
0
]
mailcontent
+
=
"<td><div>%s</div></td>"
%
line[
1
]
if
float
(line[
2
]) <
=
50
:
mailcontent
+
=
"""<td><div style="color: #F80000;">%.2f</div></td>"""
%
line[
2
]
else
:
mailcontent
+
=
"<td><div>%.2f</div></td>"
%
line[
2
]
mailcontent
+
=
"</tr>"
mailcontent
+
=
" </tbody> </table>"
mailcontent
+
=
" </body> </html> "
print
mailcontent
SendMail(
"Zabbix host update percent"
,mailcontent.encode(
'utf-8'
))
if
__name__
=
=
"__main__"
:
mail_host
=
"xxx"
mail_user
=
"xxxx"
mail_pass
=
"xxxx"
mail_port
=
"xxxx"
allInfo
=
[]
sql
=
"""
select b.hostname ,c.ip,a.update_percent as uppercent from (
select b.hostid,ROUND(IFNULL(a.aa,0)*100/b.bb,2) as update_percent from
(select hostid,count(*) as aa from items where lastclock > UNIX_TIMESTAMP()-1800 and delay < 900 and hostid in (select hostid from hosts where status=0) and status = 0 group by hostid ) a RIGHT JOIN
(select hostid,count(*) as bb from items where delay < 900 and status = 0 and hostid in (select hostid from hosts where status=0) group by hostid) b
ON a.hostid=b.hostid)a,(select hostid,lower(host) as hostname from hosts where status=0)b, (select hostid,ip from interface where type='1')c where a.hostid=b.hostid and b.hostid=c.hostid
having(a.update_percent) < 80 order by uppercent;
"""
allInfo
=
getAll(sql)
getReport(allInfo)
|
产生的报警邮件如下:
本文转自菜菜光 51CTO博客,原文链接:http://blog.51cto.com/caiguangguang/1345789,如需转载请自行联系原作者