我们在集成微服务框架的时候,涉及服务太多,如果是单节点的话,遇到凌晨服务挂起的问题会很麻烦。并且原生的监控也不是很理想。这里结合nacos,再通过钉钉来发送服务下线告警,这样可在第一时间确定服务异常并及时处理。
一、引入相关依赖
pom.xml
<!-- SpringCloud Alibaba Nacos -->
<dependency>
<groupId>com.alibaba.cloud</groupId>
<artifactId>spring-cloud-starter-alibaba-nacos-discovery</artifactId>
</dependency>
<!-- SpringCloud Alibaba Nacos Config -->
<dependency>
<groupId>com.alibaba.cloud</groupId>
<artifactId>spring-cloud-starter-alibaba-nacos-config</artifactId>
</dependency>
<!-- SpringBoot Web -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<!-- Spring Security -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-security</artifactId>
</dependency>
<!-- Spring Context Support -->
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-context-support</artifactId>
</dependency>
<dependency>
<groupId>com.aliyun</groupId>
<artifactId>alibaba-dingtalk-service-sdk</artifactId>
<version>2.0.0</version>
</dependency>
二、服务状态监控逻辑
@Slf4j
@Component
@RefreshScope
public class ServiceStatusListener implements InitializingBean {
@Autowired
private MonitorConfig config;
@Value("${spring.cloud.nacos.discovery.server-addr}")
private String nacosUrl;
@Value("${spring.cloud.nacos.discovery.namespace}")
private String namespace;
static Map<String, Long> cache = new ConcurrentHashMap<>();
@Autowired
private DingtalkService dingtalkService;
@PreDestroy
public void preDestroy() {
log.info("preDestroy....");
}
/**
* 初始化监听服务上下线
*
* @throws Exception
*/
@Override
public void afterPropertiesSet() throws Exception {
log.info("afterPropertiesSet........");
Properties properties = System.getProperties();
properties.setProperty("serverAddr", nacosUrl);
properties.setProperty("namespace", namespace);
NamingService naming = NamingFactory.createNamingService(properties);
List<String> serviceNames = config.getServices();
log.info("需要监控的服务数:{}", serviceNames.size());
for (String service : serviceNames) {
naming.subscribe(service, event -> {
List<Instance> instances = ((NamingEvent) event).getInstances();
String serviceName = ((NamingEvent) event).getServiceName();
if (instances.size() == 0) {
log.info("服务【{}】未启动,加入监听列表", serviceName);
cache.put(serviceName, System.currentTimeMillis());
//开启告警
if (config.getEnabled()) {
dingtalkService.sendMsgByInterval(serviceName);
}
} else {
log.info("服务【{}】已启动", serviceName);
cache.remove(serviceName);
}
});
}
}
}
三、钉钉发送告警逻辑
@Slf4j
@Component
@RefreshScope
public class DingtalkService {
@Autowired
private MonitorConfig config;
private final String template = "【%s】服务下线, 服务异常下线告警";
@Value("${dingtalk.webhook}")
private String webhook;
@Value("${dingtalk.secret}")
private String secret;
/**
* 发送消息
* 这里使用异步操作
* @param serviceName
*/
@Async("taskExecutor")
public void sendMsgByInterval(String serviceName) {
Long time = ServiceStatusListener.cache.get(serviceName);
// 单位毫秒
long interval = config.getInterval() * 60 * 1000;
while (ServiceStatusListener.cache.containsKey(serviceName)) {
long now = System.currentTimeMillis();
// 当被监听的服务超时未启动则发送告警
if ((now - time) >= interval) {
sendMsg(serviceName);
ServiceStatusListener.cache.remove(serviceName);
break;
}
try {
log.info("{}服务监控中...", serviceName);
TimeUnit.SECONDS.sleep(10);
} catch (InterruptedException e) {
}
}
}
public void sendMsg(String service) {
log.info("【{}}】服务下线,发送钉钉提醒", service);
SendMessage error = new SendMessage();
error.setSecret(secret);
error.setWebhook(webhook);
error.setText(String.format(template, service));
// 通过钉钉发送消息
DingTalkUtil.sendMsg(error);
}
}
四、钉钉推送工具
推送到钉钉群,可@单人,也可以@所有人DingTalkUtil
public static void sendMsg(SendMessage msg) {
try {
Long timestamp = System.currentTimeMillis();
String secret = msg.getSecret();
String stringToSign = timestamp + "\n" + secret;
Mac mac = Mac.getInstance("HmacSHA256");
mac.init(new SecretKeySpec(secret.getBytes("UTF-8"), "HmacSHA256"));
byte[] signData = mac.doFinal(stringToSign.getBytes("UTF-8"));
String sign = URLEncoder.encode(new String(Base64.encodeBase64(signData)), "UTF-8");
DingTalkClient client = new DefaultDingTalkClient(msg.getWebhook() + "×tamp=" + timestamp + "&sign=" + sign);
OapiRobotSendRequest request = new OapiRobotSendRequest();
OapiRobotSendRequest.At at = new OapiRobotSendRequest.At();
//推送所有人
at.setIsAtAll(true);
request.setAt(at);
//文本消息
request.setMsgtype("text");
OapiRobotSendRequest.Text text = new OapiRobotSendRequest.Text();
text.setContent(msg.getText());
request.setText(text);
OapiRobotSendResponse response = client.execute(request);
log.info("钉钉推送返回结果:" + response);
} catch (Exception e) {
log.error("钉钉通知异常", e);
}
}
五、多线程配置
@Slf4j
@EnableAsync
@Configuration
public class ThreadPoolTaskConfig {
/**
* 核心线程数(默认线程数)
*/
private static final int CORE_POOL_SIZE = 10;
/**
* 最大线程数
*/
private static final int MAX_POOL_SIZE = 15;
/**
* 允许线程空闲时间(单位:默认为秒)
*/
private static final int KEEP_ALIVE_TIME = 10;
/**
* 缓冲队列大小
*/
private static final int QUEUE_CAPACITY = 20;
/**
* 线程池名前缀
*/
private static final String THREAD_NAME_PREFIX = "monitor-";
@Bean("taskExecutor")
public ThreadPoolTaskExecutor taskExecutor() {
ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor();
executor.setCorePoolSize(CORE_POOL_SIZE);
executor.setMaxPoolSize(MAX_POOL_SIZE);
executor.setQueueCapacity(QUEUE_CAPACITY);
executor.setKeepAliveSeconds(KEEP_ALIVE_TIME);
executor.setThreadNamePrefix(THREAD_NAME_PREFIX);
// 线程池对拒绝任务的处理策略
// CallerRunsPolicy:由调用线程(提交任务的线程)处理该任务
executor.setRejectedExecutionHandler(new ThreadPoolExecutor.CallerRunsPolicy());
// 初始化
executor.initialize();
return executor;
}
}
还有一个服务监控的配置类省略掉了,可以根据配置文件自己生成
六、相关配置
# Spring
spring:
cloud:
nacos:
discovery:
# 服务注册地址
server-addr: xxx:8849
# 命名空间
namespace: xxx
config:
# 配置中心地址
server-addr: xxx:8849
# 命名空间
namespace: xxx
# 配置文件格式
file-extension: yml
# 共享配置
shared-configs:
- application-{
mathJaxContainer[0]}{
spring.cloud.nacos.config.file-extension}
# 以下配置可以放到nacos上面
# 服务监控
monitor:
alarm:
# 是否开启提醒
enabled: true
# 服务监控列表
services: xxx,xxxx
# 服务最多停止几分钟(启动时),大于这个时间则可判断为离线
interval: 2
# 钉钉机器人
dingtalk:
webhook: https://oapi.dingtalk.com/robot/send?access_token=xxx
secret: xxx
关键的逻辑代码已经贴出来了,希望对大家有所帮助