1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
|
1.cdn
nginx httpcode分析脚本
http:
/
/
caiguangguang.blog.
51cto
.com
/
1652935
/
1371902
id_list_success
=
[
200
,
206
,
300
,
301
,
302
,
303
,
304
,
305
,
306
,
307
]
#是源里面更新,cdn代码
flow
=
0
# 这些状态码的流量
flow1
=
0
#总流量
flow_ppsucai
=
0
#对应域名的流量
count
=
0
#总行数
count_sucai
=
0
count_sucai_100
=
0
count_sucai_30_100
=
0
count_sucai_30
=
0
三种状态
sum_time
=
0.0
统计所需要时间
count_success
=
count_200
+
count_300
response_time
=
round
(sum_time
/
count_success,
2
)
所有时间
response_time_source
=
round
(sum_time_source
/
count_success,
2
)
200
响应时间
count_200_backup
=
0
count_not_200_backup
=
0
if
web_code
not
in
id_list_200
and
backup_server
not
in
server_list:
#print web_code, backup_server
count_not_200_backup
+
=
1
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
|
2.
根据现有的域名监控
参考 zabbix应用之nginx统一监控
accepts,handled,requests,active,reading,writing,waiting
cat nginx_site_discovery.conf
UserParameter
=
nginxSiteDiscovery,bash
/
usr
/
local
/
zabbix
/
bin
/
nginx_monitor.sh nginxSiteDiscovery
UserParameter
=
getNginxStatus[
*
],bash
/
usr
/
local
/
zabbix
/
bin
/
nginx_monitor.sh getNginxStatus
"$1"
"$2"
cat
/
usr
/
local
/
zabbix
/
bin
/
nginx_monitor.sh
#!/bin/bash
#
# Filename: nginx_monitor.sh
# Revision: 1.0
# Date: 2014/09/24
# Author: Qicheng
# Email:
# Website: http://qicheng0211.blog.51cto.com
# Description: nginx统一监控脚本
# Notes:
#
# 修改AGENT_CONF的值为本地zabbix agent的配置文件路径
AGENT_CONF
=
"/usr/local/zabbix/etc/zabbix_agentd.conf"
# nginx站点的配置文件路径
NGINX_SITE_CONF
=
"/usr/local/zabbix/scripts/nginx_site.conf"
# zabbix_sender的路径
ZBX_SENDER
=
"/usr/local/bin/zabbix_sender"
FUNCTION
=
$
1
HOST_NAME
=
$
2
NGINX_SITE
=
$
3
CURL
=
"/usr/bin/curl"
TIMEOUT
=
30
# nginx site low-level discovery
function nginxSiteDiscovery()
{
nginx_site
=
($(grep
'^[^#]'
${NGINX_SITE_CONF}))
max_index
=
$[${
#nginx_site[@]}-1]
printf
'{\n'
printf
'\t"data":['
for
key
in
`seq
-
s
' '
0
$max_index`
do
printf
'\n\t\t{'
printf
"\"{#NGINX_SITE}\":\"${nginx_site[${key}]}\"}"
if
[ $key
-
ne $max_index ];then
printf
","
fi
done
printf
'\n\t]\n'
printf
'}\n'
}
# 获取nginx status,把数据发送到zabbix server
function getNginxStatus()
{
nginx_status_url
=
"${NGINX_SITE}/nginx_status"
# 获取nginx_status后,保存到下面的文件里
nginx_status_file
=
"/tmp/nginx_status_$(echo ${NGINX_SITE} | sed 's#^http.*://##; s#/#_#g').log"
:>
"$nginx_status_file"
# curl获取nginx_status
${CURL}
-
s
-
-
connect
-
timeout ${TIMEOUT}
"$nginx_status_url"
2
>&
1
| tee
"$nginx_status_file"
line_num
=
$(cat
"$nginx_status_file"
| wc
-
l)
# 判断是否正确获取nginx_status
[ $line_num
-
ne
4
] && { echo
"ERROR: $nginx_status_file is not correct."
; exit
1
;}
active
=
$(cat
"$nginx_status_file"
| grep
'Active'
| awk
'{print $NF}'
)
reading
=
$(cat
"$nginx_status_file"
| grep
'Reading'
| awk
'{print $2}'
)
writing
=
$(cat
"$nginx_status_file"
| grep
'Writing'
| awk
'{print $4}'
)
waiting
=
$(cat
"$nginx_status_file"
| grep
'Waiting'
| awk
'{print $6}'
)
accepts
=
$(cat
"$nginx_status_file"
| awk NR
=
=
3
| awk
'{print $1}'
)
handled
=
$(cat
"$nginx_status_file"
| awk NR
=
=
3
| awk
'{print $2}'
)
requests
=
$(cat
"$nginx_status_file"
| awk NR
=
=
3
| awk
'{print $3}'
)
echo
"Sending the data to zabbix server..."
# 将特定格式的数据发送到zabbix server,每行的格式为:<hostname> <key> <value>
cat << EOF | ${ZBX_SENDER}
-
c ${AGENT_CONF}
-
i
-
"${HOST_NAME}"
"nginx_status[$NGINX_SITE,active]"
"${active}"
"${HOST_NAME}"
"nginx_status[$NGINX_SITE,reading]"
"${reading}"
"${HOST_NAME}"
"nginx_status[$NGINX_SITE,writing]"
"${writing}"
"${HOST_NAME}"
"nginx_status[$NGINX_SITE,waiting]"
"${waiting}"
"${HOST_NAME}"
"nginx_status[$NGINX_SITE,accepts]"
"${accepts}"
"${HOST_NAME}"
"nginx_status[$NGINX_SITE,handled]"
"${handled}"
"${HOST_NAME}"
"nginx_status[$NGINX_SITE,requests]"
"${requests}"
EOF
}
[ $
# -eq 0 ] && { echo "ERROR: The script needs at least one parameter."; exit 1;}
case $FUNCTION
in
nginxSiteDiscovery|getNginxStatus)
$FUNCTION
;;
*
)
echo
"ERROR: Bad parameters."
exit
1
;;
esac
|
效果图
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
|
3.nagios
监控nginx状态(check_nginx_status.pl)
3.1
被监控端设置
测试脚本
.
/
check_nginx_status.pl
-
H
192.168
.
1.247
-
s www.xxx.com
-
P
80
其中红色为nginx服务器的IP地址;绿色为需要监控的网站网址
vi
/
usr
/
local
/
nagios
/
etc
/
nrpe.cfg 添加
command[check_nginx]
=
/
usr
/
local
/
nagios
/
libexec
/
check_nginx_status.pl
-
H
192.168
.
1.247
-
swww.xxx.com
-
P
80
删除
/
tmp生成的
192.168
.
1.247_check_nginx_status8d727909e5ace94dc547c3af50af6cb9
不然后面后报错!提示无法生成文件。
rm
/
tmp
/
192.168
.
1.247_check_nginx_status8d727909e5ace94dc547c3af50af6cb9
3.2
nagios主机的设置
/
usr
/
local
/
nagios
/
libexec
/
check_nrpe
-
H
192.168
.
1.247
-
c check_nginx
vi
/
usr
/
local
/
nagios
/
etc
/
nagios.cfg 添加
cfg_file
=
/
usr
/
local
/
nagios
/
etc
/
objects
/
nginx.cfg
define host{
use linux
-
server
host_name nginx
alias nginx
address 被监控端IP
}
define service{
use generic
-
service
host_name nginx
service_description check
-
swap
check_command check_nrpe!check_swap
}
define service{
use generic
-
service
host_name nginx
service_description check
-
load
check_command check_nrpe!check_load
}
define service{
use generic
-
service
host_name nginx
service_description check
-
disk
check_command check_nrpe!check_sda1
}
define service{
use generic
-
service
host_name nginx
service_description check
-
users
check_command check_nrpe!check_users
}
define service{
use generic
-
service
host_name nginx
service_description otal_procs
check_command check_nrpe!check_total_procs
}
define service{
use generic
-
service ; Name of service template to use
host_name nginx
service_description PING
check_command check_ping!
100.0
,
20
%
!
500.0
,
60
%
}
define service{
use generic
-
service
host_name nginx
service_description nginx_status
check_command check_nrpe!check_nginx!
notifications_enabled
0
}
3.3
nagios服务器报错
NGINX UNKNOWN
-
unable to write temporary data
in
:
/
tmp
/
192.168
.
1.247_check_nginx_status8d727909e5ace94dc547c3af50af6cb9
解决方法:删除被监控主机
/
tmp下的文件
192.168
.
1.247_check_nginx_status8d727909e5ace94dc547c3af50af6cb9
rm
/
tmp
/
192.168
.
1.247_check_nginx_status8d727909e5ace94dc547c3af50af6cb9
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
|
4.zabbix
开源监控系列三(自定义key监控nginx_status)
1.
每台nginx server的主配置文件 都配置status模块
2.
在一台nginx server 获取到nginx status的值,写到本地一个临时文件
3.
按需求处理临时文件,使用UserParameter
=
nginx_status[
*
] 自定义key值的方式,
4.crontab
每分钟获取一次nginx status值
以下只在监控机上去做
1.
脚本
#!/bin/bash
rm
-
rf
/
tmp
/
ngst
/
*
function GETSTATUS {
for
ip
in
{
1.
.
3
}
do
curl http:
/
/
web$ip:
8080
/
ngst >
/
tmp
/
ngst
/
web$ip
2
>
/
dev
/
null
done
}
function HANDLE {
for
i
in
{
1.
.
3
}
do
cat
/
tmp
/
ngst
/
web$i |grep Active |awk
'{print $1,$NF}'
>>
/
tmp
/
ngst
/
web${i}_status
cat
/
tmp
/
ngst
/
web$i |grep Reading |awk
'{print $1,$2}'
>>
/
tmp
/
ngst
/
web${i}_status
cat
/
tmp
/
ngst
/
web$i |grep Writing |awk
'{print $3,$4}'
>>
/
tmp
/
ngst
/
web${i}_status
cat
/
tmp
/
ngst
/
web$i |grep Waiting |awk
'{print $5,$6}'
>>
/
tmp
/
ngst
/
web${i}_status
#mv /tmp/ngst/web$i.new /tmp/ngst/web$i
done
}
function TOTAL{
cat
/
tmp
/
ngst
/
*
status |grep Active |awk
'{total+=$NF}END{print total}'
>
/
tmp
/
ngst
/
total
#
总的 Active 连接数
}
GETSTATUS
HANDLE
TOTAL
2.
设置zabbix agent端 只需修改配置文件,添加如下几行,默认zabbix自定义key以知晓
UserParameter
=
nginx_status.total, cat
/
tmp
/
ngst
/
total
UserParameter
=
nginx_status[
*
], cat
/
tmp
/
ngst
/
web1 |grep
"$1"
|awk
'{print $NF}'
UserParameter
=
nginx2_status[
*
], cat
/
tmp
/
ngst
/
web2 |grep
"$1"
|awk
'{print $NF}'
UserParameter
=
nginx3_status[
*
], cat
/
tmp
/
ngst
/
web3 |grep
"$1"
|awk
'{print $NF}'
#只需要写这么几行就能实现对 Active Reading 等或者其他值得监控。
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
|
5.zabbix_sender
主动上传k
/
v监控nginx日志状态码
http:
/
/
benpaozhe.blog.
51cto
.com
/
10239098
/
1904654
在items多了后,有一些速度慢的items如果不采用主动模式,会把server端拖死,而zabbix_sender其实是一种变相的主动模式,配合计划任务,主动将k
/
v上传到zabbix,现将nginx日志zabbix_sender实现状态码的监控抛砖引玉做下介绍
1.agent
端编写脚本和计划任务
需求是监控nginx日志的
200
、
400
、
401
、
403
、
404
、
499
、
502
、
503
、
504
状态码,按分钟进行数量统计上报。编写脚本入下
#!/bin/bash
log_dir
=
/
data1
/
ms
/
comos
/
logs
/
access.log
#log目录
log_tmp_dir
=
/
tmp
/
last_min_log
#过滤出最后1分钟的日志做临时存放
senderfile
=
/
tmp
/
sender_file
#用zabbix_sender发送的k/v文件
Hostname
=
`hostname`
#获取主机名,与server端主机名会有验证
last_min
=
`date
-
d
"1 minute ago"
'+%Y:%H:%M'
`
#nginx里的时间,1分钟之前
tail
-
60000
${log_dir} |grep
"${last_min}"
> ${log_tmp_dir}
#qps在500到800之间,增加性能,tail结尾60000条取出最后一分钟日志。
cat
/
tmp
/
last_min_log |awk
-
F
'"'
'{print $3}'
|awk
'{print $1}'
|sort |uniq
-
c >
/
tmp
/
stat
#将状态码去重统计
>$senderfile
#清零zabbix_sender的发送文件
c_200
=
`awk
'$2==200{print $1}'
/
tmp
/
stat`;[
-
z $c_200 ] && c_200
=
0
#取出数量,如果没有此状态码,数量初始化为0
c_400
=
`awk
'$2==400{print $1}'
/
tmp
/
stat`;[
-
z $c_400 ] && c_400
=
0
c_401
=
`awk
'$2==401{print $1}'
/
tmp
/
stat`;[
-
z $c_401 ] && c_401
=
0
c_403
=
`awk
'$2==403{print $1}'
/
tmp
/
stat`;[
-
z $c_403 ] && c_403
=
0
c_404
=
`awk
'$2==404{print $1}'
/
tmp
/
stat`;[
-
z $c_404 ] && c_404
=
0
c_499
=
`awk
'$2==499{print $1}'
/
tmp
/
stat`;[
-
z $c_499 ] && c_499
=
0
c_502
=
`awk
'$2==502{print $1}'
/
tmp
/
stat`;[
-
z $c_502 ] && c_502
=
0
c_503
=
`awk
'$2==503{print $1}'
/
tmp
/
stat`;[
-
z $c_503 ] && c_503
=
0
c_504
=
`awk
'$2==504{print $1}'
/
tmp
/
stat`;[
-
z $c_504 ] && c_504
=
0
echo
"$Hostname nginx_stat200 $c_200"
>> $senderfile
#将k/v写入zabbix_sender的发送文件,注意写法,主机名、key、value
echo
"$Hostname nginx_stat400 $c_400"
>> $senderfile
echo
"$Hostname nginx_stat401 $c_401"
>> $senderfile
echo
"$Hostname nginx_stat403 $c_403"
>> $senderfile
echo
"$Hostname nginx_stat404 $c_404"
>> $senderfile
echo
"$Hostname nginx_stat499 $c_499"
>> $senderfile
echo
"$Hostname nginx_stat502 $c_502"
>> $senderfile
echo
"$Hostname nginx_stat503 $c_503"
>> $senderfile
echo
"$Hostname nginx_stat504 $c_504"
>> $senderfile
/
usr
/
local
/
zabbix
/
bin
/
zabbix_sender
-
c
/
usr
/
local
/
zabbix
/
etc
/
zabbix_agentd.conf
-
i $senderfile
#最终的向server发送
添加到计划任务:
*
/
1
*
*
*
*
/
usr
/
local
/
zabbix
/
script
/
nginxlog.sh
2
>&
1
2.
配置server端和grafana进行绘图
配置项如下,将
type
设置为Zabbix trapper
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
|
6.zabbix
监控nginx性能状态
1.
编写脚步来获取nginx的相关信息
vim
/
usr
/
local
/
zabbix
/
scripts
/
nginx
-
check_performance.sh
#!/bin/bash
##################################
# Zabbix monitoring script
#
# nginx:
# - anything available via nginx stub-status module
#
##################################
# Contact:
# vincent.viallet@gmail.com
# Zabbix requested parameter
ZBX_REQ_DATA
=
"$1"
ZBX_REQ_DATA_URL
=
"$2"
# Nginx defaults
NGINX_STATUS_DEFAULT_URL
=
"www.baidu.com/nginx-status"
#(这里写网站的域名)
WGET_BIN
=
"/usr/bin/wget"
#
# Error handling:
# - need to be displayable in Zabbix (avoid NOT_SUPPORTED)
# - items need to be of type "float" (allow negative + float)
#
ERROR_NO_ACCESS_FILE
=
"-0.9900"
ERROR_NO_ACCESS
=
"-0.9901"
ERROR_WRONG_PARAM
=
"-0.9902"
ERROR_DATA
=
"-0.9903"
# either can not connect / bad host / bad port
# Handle host and port if non-default
if
[ !
-
z
"$ZBX_REQ_DATA_URL"
]; then
URL
=
"$ZBX_REQ_DATA_URL"
else
URL
=
"$NGINX_STATUS_DEFAULT_URL"
fi
# save the nginx stats in a variable for future parsing
NGINX_STATS
=
$($WGET_BIN
-
q $URL
-
O
-
2
>
/
dev
/
null)
# error during retrieve
if
[ $?
-
ne
0
-
o
-
z
"$NGINX_STATS"
]; then
echo $ERROR_DATA
exit
1
fi
#
# Extract data from nginx stats
#
case $ZBX_REQ_DATA
in
active_connections) echo
"$NGINX_STATS"
| head
-
1
| cut
-
f3
-
d
' '
;;
accepted_connections) echo
"$NGINX_STATS"
| grep
-
Ev
'[a-zA-Z]'
| cut
-
f2
-
d
' '
;;
handled_connections) echo
"$NGINX_STATS"
| grep
-
Ev
'[a-zA-Z]'
| cut
-
f3
-
d
' '
;;
handled_requests) echo
"$NGINX_STATS"
| grep
-
Ev
'[a-zA-Z]'
| cut
-
f4
-
d
' '
;;
reading) echo
"$NGINX_STATS"
| tail
-
1
| cut
-
f2
-
d
' '
;;
writing) echo
"$NGINX_STATS"
| tail
-
1
| cut
-
f4
-
d
' '
;;
waiting) echo
"$NGINX_STATS"
| tail
-
1
| cut
-
f6
-
d
' '
;;
*
) echo $ERROR_WRONG_PARAM; exit
1
;;
esac
exit
0
[root@ittestserver1 opt]
# chmod +x /usr/local/zabbix/scripts/nginx-check_performance.sh
-
rw
-
r
-
-
r
-
x1 root root
1645
2
月
4
14
:
26
/
usr
/
local
/
zabbix
/
scripts
/
nginx
-
check_performance.sh
2.
配置zabbix_agentd.conf。启用UserParameter,并配置相关的参数
[root@ittestserver1 opt]
# vim /usr/local/zabbix/etc/zabbix_agentd.conf
####### USER-DEFINED MONITORED PARAMETERS #######
### Option: UnsafeUserParameters
# Allow all characters to be passed in arguments to user-defined parameters.
# The following characters are not allowed:
# \ ' " ` * ? [ ] { } ~ $ ! & ; ( ) < > | # @
# Additionally, newline characters are not allowed.
# 0 - do not allow
# 1 - allow
#
# Mandatory: no
# Range: 0-1
# Default:
# UnsafeUserParameters=0
UnsafeUserParameters
=
1
### Option: UserParameter
# User-defined parameter to monitor. There can be several user-defined parameters.
# Format: UserParameter=<key>,<shell command>
# See 'zabbix_agentd' directory for examples.
#
# Mandatory: no
# Default:
# UserParameter=
UserParameter
=
nginx[
*
],
/
usr
/
local
/
zabbix
/
scripts
/
nginx
-
check_performance.sh
"$1"
3.zabbix_get
-
s
10.253
.
17.20
-
p
10050
-
k
"nginx[reading]"
|
名称 |
描述 |
|
Accepts(接受) |
NGINX 所接受的客户端连接数 |
资源: 功能 |
Handled(已处理) |
成功的客户端连接数 |
资源: 功能 |
Active(活跃) |
当前活跃的客户端连接数 |
资源: 功能 |
Dropped(已丢弃,计算得出) |
丢弃的连接数(接受 - 已处理) |
工作:错误* |
Requests(请求数) |
客户端请求数 |
工作:吞吐量 |
NGINX worker 进程接受 OS 的连接请求时 Accepts 计数器增加,而Handled 是当实际的请求得到连接时(通过建立一个新的连接或重新使用一个空闲的)。这两个计数器的值通常都是相同的,如果它们有差别则表明连接被Dropped, 往往这是由于资源限制,比如已经达到 NGINX 的worker_connections的限制.
备注:
Active :当前活跃的连接数。
Accepts: 接受的请求数
Handled: 处理的请求数(正常服务器响应,这两项应该是可以相等的)
Requests: 客户端处理的请求数。(吞吐量)
Reading: 当接收到请求时,连接离开 Waiting 状态,并且该请求本身使 Reading 状态计数增加。在这种状态下 NGINX 会读取客户端请求首部。请求首部是比较小的,因此这通常是一个快速的操作。
Writing: 请求被读取之后,其使 Writing 状态计数增加,并保持在该状态,直到响应返回给客户端。这意味着,该请求在 Writing 状态时, 一方面 NGINX 等待来自上游系统的结果(系统放在 NGINX “后面”),另外一方面,NGINX 也在同时响应。请求往往会在 Writing 状态花费大量的时间。
Waiting: 活跃的连接也可以处 于 Waiting 子状态,如果有在此刻没有活跃请求的话。新连接可以绕过这个状态并直接变为到 Reading 状态,最常见的是在使用“accept filter(接受过滤器)” 和 “deferred accept(延迟接受)”时,在这种情况下,NGINX 不会接收 worker 进程的通知,直到它具有足够的数据才开始响应。如果连接设置为 keep-alive ,那么它在发送响应后将处于等待状态
writing,waiting这两个特别注意
本文转自 liqius 51CTO博客,原文链接:http://blog.51cto.com/szgb17/1909301,如需转载请自行联系原作者