prometheus专题—（四）Ansible批量安装node_exporther-阿里云开发者社区

prometheus专题—（四）Ansible批量安装node_exporther

2021-12-10 831

版权

本文内容由阿里云实名注册用户自发贡献，版权归原作者所有，阿里云开发者社区不拥有其著作权，亦不承担相应法律责任。具体规则请查看《阿里云开发者社区用户服务协议》和《阿里云开发者社区知识产权保护指引》。如果您发现本社区中有涉嫌抄袭的内容，填写侵权投诉表单进行举报，一经查实，本社区将立刻删除涉嫌侵权内容。

本文涉及的产品

可观测监控 Prometheus 版，每月50GB免费额度

可观测可视化 Grafana 版，10个用户账号 1个月

简介： 基础设置

基础设置

配置hosts
172.16.58.78   prome-master01
172.16.58.79   prome-node01
master上生成ssh key 并拷贝到node上
ssh-keygen
ssh-copy-id prome_node_01
# 测试ssh联通
ssh prome_node_01

master安装ansible

yum install -y ansible
# 关闭hostcheck 
vim /etc/ansible/ansible.cfg
ssh_args = -o ControlMaster=auto -o ControlPersist=60s -o StrictHostKeyChecking＝no 
playbook执行时需要设置机器文件 
cat <<EOF > /opt/tgzs/host_file
prome-master01
prome-node01
EOF
设置syslog 和logrotate服务
ansible-playbook -i host_file init_syslog_logrotate.yaml
编写ansible 发布服务脚本
ansible-playbook -i host_file  service_deploy.yaml  -e "tgz=node_exporter-1.1.2.linux-amd64.tar.gz" -e "app=node_exporter"
检查node_exporter服务状态
ansible -i host_file all -m shell -a " ps -ef |grep node_exporter|grep -v grep "

📎init_syslog_logrotate.yaml

📎service_deploy.yaml

浏览器访问

IP:9100/metrics
IP:9100/metrics

本机访问数据

[root@prome_master_01 tgzs]# curl  -s  localhost:9100/metrics |grep node_  |head -20
# HELP node_arp_entries ARP entries by device
# TYPE node_arp_entries gauge
node_arp_entries{device="eth0"} 3
# HELP node_boot_time_seconds Node boot time, in unixtime.
# TYPE node_boot_time_seconds gauge
node_boot_time_seconds 1.616987084e+09
# HELP node_context_switches_total Total number of context switches.
# TYPE node_context_switches_total counter
node_context_switches_total 2.105979e+06
# HELP node_cooling_device_cur_state Current throttle state of the cooling device
# TYPE node_cooling_device_cur_state gauge
node_cooling_device_cur_state{name="0",type="Processor"} 0
node_cooling_device_cur_state{name="1",type="Processor"} 0
node_cooling_device_cur_state{name="2",type="Processor"} 0
node_cooling_device_cur_state{name="3",type="Processor"} 0
# HELP node_cooling_device_max_state Maximum throttle state of the cooling device
# TYPE node_cooling_device_max_state gauge
node_cooling_device_max_state{name="0",type="Processor"} 0
node_cooling_device_max_state{name="1",type="Processor"} 0
node_cooling_device_max_state{name="2",type="Processor"} 0

默认开启的采集项目介绍

关闭默认开启采集项

--no-collector.<name> flag
# 未开启前
[root@prome_master_01 node_exporter]# curl  -s  localhost:9100/metrics |grep node_cpu
# HELP node_cpu_guest_seconds_total Seconds the CPUs spent in guests (VMs) for each mode.
# TYPE node_cpu_guest_seconds_total counter
node_cpu_guest_seconds_total{cpu="0",mode="nice"} 0
node_cpu_guest_seconds_total{cpu="0",mode="user"} 0
node_cpu_guest_seconds_total{cpu="1",mode="nice"} 0
node_cpu_guest_seconds_total{cpu="1",mode="user"} 0
node_cpu_guest_seconds_total{cpu="2",mode="nice"} 0
node_cpu_guest_seconds_total{cpu="2",mode="user"} 0
node_cpu_guest_seconds_total{cpu="3",mode="nice"} 0
node_cpu_guest_seconds_total{cpu="3",mode="user"} 0
# HELP node_cpu_seconds_total Seconds the CPUs spent in each mode.
# TYPE node_cpu_seconds_total counter
node_cpu_seconds_total{cpu="0",mode="idle"} 17691.27
node_cpu_seconds_total{cpu="0",mode="iowait"} 8.9
node_cpu_seconds_total{cpu="0",mode="irq"} 0
node_cpu_seconds_total{cpu="0",mode="nice"} 0.32
node_cpu_seconds_total{cpu="0",mode="softirq"} 0.28
node_cpu_seconds_total{cpu="0",mode="steal"} 2.7
# 关闭cpu采集
 ./node_exporter --no-collector.cpu
curl  -s  localhost:9100/metrics |grep node_cpu

关闭默认采集器项而开机器某些采集

 --collector.disable-defaults --collector.<name> .
# 只开启mem采集
 ./node_exporter --collector.disable-defaults --collector.meminfo
# 只开启mem 和cpu 采集
./node_exporter --collector.disable-defaults --collector.meminfo --collector.cpu

默认关闭的关闭原因

太重
太慢
太多的开销

禁用golang sdk 指标

使用 --web.disable-exporter-metrics
promhttp_ 代表访问/metrics 的http情况

[root@prome_master_01 tgzs]# curl  -s  localhost:9100/metrics |grep promhttp_
# HELP promhttp_metric_handler_errors_total Total number of internal errors encountered by the promhttp metric handler.
# TYPE promhttp_metric_handler_errors_total counter
promhttp_metric_handler_errors_total{cause="encoding"} 0
promhttp_metric_handler_errors_total{cause="gathering"} 0
# HELP promhttp_metric_handler_requests_in_flight Current number of scrapes being served.
# TYPE promhttp_metric_handler_requests_in_flight gauge
promhttp_metric_handler_requests_in_flight 1
# HELP promhttp_metric_handler_requests_total Total number of scrapes by HTTP status code.
# TYPE promhttp_metric_handler_requests_total counter
promhttp_metric_handler_requests_total{code="200"} 8
promhttp_metric_handler_requests_total{code="500"} 0
promhttp_metric_handler_requests_total{code="503"} 0

go_代表goruntime信息等

# HELP go_goroutines Number of goroutines that currently exist.
# TYPE go_goroutines gauge
go_goroutines 7
# HELP go_info Information about the Go environment.
# TYPE go_info gauge
go_info{version="go1.15.8"} 1
# HELP go_memstats_alloc_bytes Number of bytes allocated and still in use.
# TYPE go_memstats_alloc_bytes gauge
go_memstats_alloc_bytes 2.781752e+06

process 代表进程信息等

# HELP process_cpu_seconds_total Total user and system CPU time spent in seconds.
# TYPE process_cpu_seconds_total counter
process_cpu_seconds_total 0.54
# HELP process_max_fds Maximum number of open file descriptors.
# TYPE process_max_fds gauge
process_max_fds 1024
# HELP process_open_fds Number of open file descriptors.
# TYPE process_open_fds gauge
process_open_fds 9
# HELP process_resident_memory_bytes Resident memory size in bytes.
# TYPE process_resident_memory_bytes gauge
process_resident_memory_bytes 1.5720448e+07

节点上自打点数据上报

🔲--collector.textfile.directory="" 配置本地采集目录
🔲在采集目录里创建.prom文件

# 创建目录
mkdir ./text_file_dir
# 准备 prom文件
cat <<EOF > ./text_file_dir/test.prom
# HELP nyy_test_metric just test
# TYPE nyy_test_metric gauge
nyy_test_metric{method="post",code="200"} 1027
EOF
# 启动服务
./node_exporter --collector.textfile.directory=./text_file_dir
# curl查看数据
[root@prome_master_01 tgzs]# curl  -s  localhost:9100/metrics |grep nyy
# HELP nyy_test_metric just test
# TYPE nyy_test_metric gauge
nyy_test_metric{code="200",method="post"} 1027

http传入参数，按采集器过滤指标

原理：通过http请求参数过滤采集器

func (h *handler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
  filters := r.URL.Query()["collect[]"]
  level.Debug(h.logger).Log("msg", "collect query:", "filters", filters)
  if len(filters) == 0 {
    // No filters, use the prepared unfiltered handler.
    h.unfilteredHandler.ServeHTTP(w, r)
    return
  }
  // To serve filtered metrics, we create a filtering handler on the fly.
  filteredHandler, err := h.innerHandler(filters...)
  if err != nil {
    level.Warn(h.logger).Log("msg", "Couldn't create filtered metrics handler:", "err", err)
    w.WriteHeader(http.StatusBadRequest)
    w.Write([]byte(fmt.Sprintf("Couldn't create filtered metrics handler: %s", err)))
    return
  }
  filteredHandler.ServeHTTP(w, r)
}

http访问

# 只看cpu采集器的指标
http://IP:9100/metrics?collect[]=cpu
# 只看cpu和mem采集器的指标
http://IP:9100/metrics?collect[]=cpu&collect[]=meminfo

prometheus配置

  params:
    collect[]:
      - cpu
      - meminfo
两种导入模式
- url导入
- json文件导入

和prometheus `relabel_config`的区别： `按采集器过滤 VS 按metric_name 或label过滤`
导入dashboard商城中的node_exporter模板
地址 https://grafana.com/grafana/dashboards

两种导入模式

- url导入

- json文件导入

https://grafana.com/grafana/dashboards/8919

prometheus专题—（四）Ansible批量安装node_exporther

基础设置

浏览器访问

本机访问数据

默认开启的采集项目介绍

关闭默认开启采集项

关闭默认采集器项而开机器某些采集

禁用golang sdk 指标

配置数据源

验证

热门文章

最新文章

相关课程

相关电子书

相关实验场景

探索云世界

热门

云计算

大数据

云原生

人工智能

数据库

开发与运维

活动广场

任务中心

开发者评测

高校计划

乘风者计划

训练营

阿里云MVP

话题

直播

下载

镜像站

技术资料

插件

prometheus专题—（四）Ansible批量安装node_exporther

基础设置

浏览器访问

本机访问数据

默认开启的采集项目介绍

关闭默认开启采集项

关闭默认采集器项而开机器某些采集

禁用golang sdk 指标

配置数据源

验证

热门文章

最新文章

相关课程

相关电子书

相关实验场景