最近机房总流量总是异常,然后我是不断的接到短信与电话报警,收到后通过cacti查看还是十分的麻烦与浪费时间,为了解决这个问题,我自己写了一个脚本,从数据库里获取所有主机监控数据,然后打印出流量超过10m的主机信息,这样能快速帮我判断异常流量主机。
脚本是使用python编写,使用MySQLdb从zabbix数据库里获取流量数据,经过流量判断后在把数据写入到excel里。
使用前需要安装MySQLdb、xlwt模块,可以使用easy_install安装。
下面是脚本内容
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
|
#!/usr/bin/env python
#-*- coding: utf-8 -*-
#author:Deng Lei
#email: dl528888@gmail.com
import
MySQLdb
import
time
import
sys
import
xlwt
reload(sys)
sys.setdefaultencoding(
'utf8'
)
if
__name__ ==
"__main__"
:
now_hour=int(
time
.strftime(
'%H'
))
old_hour=now_hour-1
now_time=
time
.strftime(
'%Y-%m-%d'
)
mysql_conn=MySQLdb.connect(host=
'10.10.14.11'
,user=
'zabbix'
,
passwd
=
'zabbix'
,port=3306,charset=
"utf8"
)
mysql_cur=mysql_conn.cursor()
mysql_conn.select_db(
'zabbix'
)
last_results=[]
in_results=[]
out_results=[]
network_device=[
'em2'
,
'eth1'
,
'eth0'
]
try:
room=sys.argv[1]
except IndexError:
room=
'all'
try:
old_hour=sys.argv[2]
except IndexError:
old_hour=now_hour-1
try:
now_hour=sys.argv[3]
except IndexError:
now_hour=int(
time
.strftime(
'%H'
))
if
room ==
'all'
:
#search network in traffic
for
i
in
network_device:
search_sql=
"select from_unixtime(hi.clock,'%%Y-%%m-%%d %%T') as Date,g.name as Group_Name,h.host as Host,round(max(hi.value_max)/1000,0) as Network from hosts_groups hg join groups g on g.groupid = hg.groupid join items i on hg.hostid = i.hostid join hosts h on h.hostid=i.hostid join trends_uint hi on i.itemid = hi.itemid where i.key_='net.if.in[%s]' and hi.clock >= UNIX_TIMESTAMP('%s %s:00:00') and hi.clock < UNIX_TIMESTAMP('%s %s:00:00') group by h.host;"
%(i,now_time,old_hour,now_time,now_hour)
n=mysql_cur.execute(search_sql)
result=mysql_cur.fetchall()
for
ii
in
result:
msg1={
'Group_Name'
:ii[1],
'Host'
:ii[2],
'Network_device'
:i,
'Source'
:
'In'
}
if
msg1 not
in
in_results:
in_results.append(msg1)
msg={
'Date'
:ii[0],
'Group_Name'
:ii[1],
'Host'
:ii[2],
'Network'
:float(ii[3]),
'Network_device'
:i,
'Source'
:
'In'
}
last_results.append(msg)
#search network out traffic
for
i
in
network_device:
search_sql=
"select from_unixtime(hi.clock,'%%Y-%%m-%%d %%T') as Date,g.name as Group_Name,h.host as Host,round(max(hi.value_max)/1000,0) as Network from hosts_groups hg join groups g on g.groupid = hg.groupid join items i on hg.hostid = i.hostid join hosts h on h.hostid=i.hostid join trends_uint hi on i.itemid = hi.itemid where i.key_='net.if.out[%s]' and hi.clock >= UNIX_TIMESTAMP('%s %s:00:00') and hi.clock < UNIX_TIMESTAMP('%s %s:00:00') group by h.host;"
%(i,now_time,old_hour,now_time,now_hour)
n=mysql_cur.execute(search_sql)
result=mysql_cur.fetchall()
for
ii
in
result:
msg1={
'Group_Name'
:ii[1],
'Host'
:ii[2],
'Network_device'
:i,
'Source'
:
'out'
}
if
msg1 not
in
out_results:
out_results.append(msg1)
msg={
'Date'
:ii[0],
'Group_Name'
:ii[1],
'Host'
:ii[2],
'Network'
:float(ii[3]),
'Network_device'
:i,
'Source'
:
'out'
}
last_results.append(msg)
else
:
#search network in traffic
for
i
in
network_device:
search_sql=
"select from_unixtime(hi.clock,'%%Y-%%m-%%d %%T') as Date,g.name as Group_Name,h.host as Host,round(max(hi.value_max)/1000,0) as Network from hosts_groups hg join groups g on g.groupid = hg.groupid join items i on hg.hostid = i.hostid join hosts h on h.hostid=i.hostid join trends_uint hi on i.itemid = hi.itemid where i.key_='net.if.in[%s]' and g.name like '%s' and hi.clock >= UNIX_TIMESTAMP('%s %s:00:00') and hi.clock < UNIX_TIMESTAMP('%s %s:00:00') group by h.host;"
%(i,room+
"%"
,now_time,old_hour,now_time,now_hour)
n=mysql_cur.execute(search_sql)
result=mysql_cur.fetchall()
for
ii
in
result:
msg1={
'Group_Name'
:ii[1],
'Host'
:ii[2],
'Network_device'
:i,
'Source'
:
'In'
}
if
msg1 not
in
in_results:
in_results.append(msg1)
msg={
'Date'
:ii[0],
'Group_Name'
:ii[1],
'Host'
:ii[2],
'Network'
:float(ii[3]),
'Network_device'
:i,
'Source'
:
'In'
}
last_results.append(msg)
#search network out traffic
for
i
in
network_device:
search_sql=
"select from_unixtime(hi.clock,'%%Y-%%m-%%d %%T') as Date,g.name as Group_Name,h.host as Host,round(max(hi.value_max)/1000,0) as Network from hosts_groups hg join groups g on g.groupid = hg.groupid join items i on hg.hostid = i.hostid join hosts h on h.hostid=i.hostid join trends_uint hi on i.itemid = hi.itemid where i.key_='net.if.out[%s]' and g.name like '%s' and hi.clock >= UNIX_TIMESTAMP('%s %s:00:00') and hi.clock < UNIX_TIMESTAMP('%s %s:00:00') group by h.host;"
%(i,room+
"%"
,now_time,old_hour,now_time,now_hour)
n=mysql_cur.execute(search_sql)
result=mysql_cur.fetchall()
for
ii
in
result:
msg1={
'Group_Name'
:ii[1],
'Host'
:ii[2],
'Network_device'
:i,
'Source'
:
'out'
}
if
msg1 not
in
out_results:
out_results.append(msg1)
msg={
'Date'
:ii[0],
'Group_Name'
:ii[1],
'Host'
:ii[2],
'Network'
:float(ii[3]),
'Network_device'
:i,
'Source'
:
'out'
}
last_results.append(msg)
time
=
"%s-[%s-%s]"
%(now_time,old_hour,now_hour)
a=[]
for
i
in
last_results:
if
i[
'Network'
] >=10000:
msg=(i[
'Group_Name'
],i[
'Host'
],i[
'Network'
]
/1000
,i[
'Network_device'
],i[
'Source'
])
a.append(msg)
sort_list=sorted(a,key=lambda d:d[2],reverse = True)
wb = xlwt.Workbook()
ws = wb.add_sheet(
'zabbix'
, cell_overwrite_ok=True)
ws.write(0,0,
'报警组'
.decode(
"utf-8"
))
ws.write(0,1,
'主机'
.decode(
"utf-8"
))
ws.write(0,2,
'流量(Mbps)'
.decode(
"utf-8"
))
ws.write(0,3,
'网卡名'
.decode(
"utf-8"
))
ws.write(0,4,
'方向'
.decode(
"utf-8"
))
for
i
in
range(1,len(sort_list)+1):
for
ii
in
range(0,len(sort_list[i-1])):
ws.write(i,ii,sort_list[i-1][ii])
ws.col(0).width = 3333*3
ws.col(1).width = 3333
wb.save(
'/tmp/zabbix_network_traffic-%s.xls'
%
time
)
mysql_cur.close()
mysql_conn.close()
|
运行的话,参数信息如下:
第一个参数是机房信息,比如我有2个机房,分别是A与B,我就想查看A机房的,那么第一个参数就写'A';
第二个参数是开始时间,如09;
第三个参数是结束时间,如13;
比如我想查看A机房早上9点到下午13点的超过10m流量,那么可以使用
1
|
python check_zabbix_network_traffic.py
'A'
09 13
|
默认输出目录是/tmp/,文件名格式是zabbix_network_traffic-当天-[开始时间-将结束时间],如zabbix_network_traffic-2015-08-19-[0-13].xls
效果为
报警组是zabbix里的报警组描述,主机就是ip信息,网卡名就是检测的网卡名,方向就是流入或者流出流量。
对于网卡名我在多描述一下,默认脚本里,网卡为em2、eth1、eth0
1
|
network_device=[
'em2'
,
'eth1'
,
'eth0'
]
|
为什么写这个呢,就是因为我这里主机网卡信息很混乱,比如openstack的云平台主机就一个网卡eth0,而centos5的网卡就是eth0于eth1,centos6 与centos7就都是em1与em2,就导致不通平台与不同系统,公网的设备名不一样,所以我这个脚本里做了判断,如果有em2网卡,就不检测eth1与eth0了,依次类推。
我写的目录就是为了快速查看依次流量,大家如果有其他的需求,可以自己根据需求改。
本文转自 reinxu 51CTO博客,原文链接:http://blog.51cto.com/dl528888/1685932,如需转载请自行联系原作者