Graylog服务状态异常监测与告警的shell脚本
需求场景
1、当graylog-server服务状态异常时发送异常告警
2、当graylog-server服务状态从异常状态恢复时发送恢复告警
实现的脚本如下
之前有写过类似脚本 【优化篇】使用Keepalived实现简单的GrayLog高可用
下面借助chatgpt重新编写了一个
vim /opt/check_graylog_status.sh
#!/bin/bash LOCK_FILE=/var/log/graylog-server_record.log # 钉钉机器人 Webhook URL WEBHOOK_URL="https://oapi.dingtalk.com/robot/send?access_token=b672dc14ee9a965d75a1fe9XXXXXXXXXX09586594572abf8f41cf" # 记录异常状态的文件路径 STATUS_FILE="/tmp/graylog-server_status.txt" NORMAL_STATUS_FILE="/opt/graylog-server_normal_status.txt" #echo "Active: active (running)" > /opt/graylog-server_normal_status.txt # 获取当前时间 current_time=$(date +"%Y-%m-%d %H:%M:%S") # 获取当前 graylog-server 状态 current_status=$(/usr/bin/systemctl status graylog-server | grep Active | awk -F "since" '{print $1}' | sed 's/^ *//;s/ *$//') # 读取上次记录的状态 previous_status=$(cat "$STATUS_FILE") # 正常graylog-server 状态 #Active: active (running) normal_status=$(cat "$NORMAL_STATUS_FILE") # 判断当前状态是否与上次记录的状态不一致 if [[ "$current_status" == "$normal_status" && "$previous_status" != "$normal_status" ]]; then # 发送恢复正常告警到钉钉机器人 echo "--------------------------------" >> ${LOCK_FILE} 2>&1 echo `date +"%Y-%m-%d %H:%M:%S"` >> ${LOCK_FILE} 2>&1 echo "服务状态已恢复运行,发送dingding告警" >> ${LOCK_FILE} 2>&1 recovery_message="【告警通知】:graylog-server 状态已恢复正常。\n\n【时间】:$current_time \n\n【graylog-server状态】:<font color=#67C23A>$current_status</font>" echo $recovery_message >> ${LOCK_FILE} 2>&1 curl -s -H "Content-Type: application/json" -d "{\"msgtype\":\"markdown\",\"markdown\":{\"title\":\"告警通知\",\"text\":\"$recovery_message\"}}" "$WEBHOOK_URL" # 将当前状态记录到文件 echo "$current_status" > "$STATUS_FILE" echo "--------------------------------" >> ${LOCK_FILE} 2>&1 elif [[ "$current_status" != "$previous_status" ]]; then # 发送异常告警到钉钉机器人 echo "--------------------------------" >> ${LOCK_FILE} 2>&1 echo `date +"%Y-%m-%d %H:%M:%S"` >> ${LOCK_FILE} 2>&1 echo "服务状态异常,发送dingding告警" >> ${LOCK_FILE} 2>&1 alert_message="【告警通知】:graylog-server 状态异常告警!\n\n【时间】:$current_time \n\n 【graylog-server状态】:<font color=#FF0000>$current_status</font>" echo $alert_message >> ${LOCK_FILE} 2>&1 curl -s -H "Content-Type: application/json" -d "{\"msgtype\":\"markdown\",\"markdown\":{\"title\":\"告警通知\",\"text\":\"$alert_message\"}}" "$WEBHOOK_URL" # 将当前状态记录到文件 echo "$current_status" > "$STATUS_FILE" echo "--------------------------------" >> ${LOCK_FILE} 2>&1 fi
部署步骤
1、创建相关文件
touch /tmp/graylog-server_status.txt touch /opt/graylog-server_normal_status.txt echo "Active: active (running)" > /opt/graylog-server_normal_status.txt
2、定时任务
crontab -e * * * * * /opt/check_graylog_status.sh
3、测试
正常运行时停止graylog-server
systemctl stop graylog-server
异常时再启动graylog-server
systemctl stop graylog-server
最终的效果如下:只有状态发生变化时才告警