使用Telegraf+Influxdb+Grafana配置VMware vSphere监控大屏

本文涉及的产品
可观测可视化 Grafana 版,10个用户账号 1个月
简介: 使用Telegraf+Influxdb+Grafana配置VMware vSphere监控大屏

在之前的文章的基础上

使用Telegraf+Grafana监控Microsoft SQLServer数据库

实现使用Telegraf+Influxdb+Grafana配置VMware vSphere监控大屏

一、修改telegraf配置文件

只需要修改telegraf配置文件中[[inputs.vsphere]]

vim /etc/telegraf/telegraf.conf

配置文件示例如下

请根据实际情况修改IP,账号,密码参数 注意最后的insecure_skip_verify = true参数要设置为true

[[inputs.vsphere]]
#   ## List of vCenter URLs to be monitored. These three lines must be uncommented
#   ## and edited for the plugin to work.
vcenters = ["https://vCenterIP地址/sdk" ]
username = "Administrator@vsphere.local"
password = "Password"
#
#   ## VMs
#   ## Typical VM metrics (if omitted or empty, all metrics are collected)
#   # vm_include = [ "/*/vm/**"] # Inventory path to VMs to collect (by default all are collected)
#   # vm_exclude = [] # Inventory paths to exclude
vm_metric_include = [
  "cpu.demand.average",
  "cpu.idle.summation",
  "cpu.latency.average",
  "cpu.readiness.average",
  "cpu.ready.summation",
  "cpu.run.summation",
  "cpu.usagemhz.average",
  "cpu.used.summation",
  "cpu.wait.summation",
  "mem.active.average",
  "mem.granted.average",
  "mem.latency.average",
  "mem.swapin.average",
  "mem.swapinRate.average",
  "mem.swapout.average",
  "mem.swapoutRate.average",
  "mem.usage.average",
  "mem.vmmemctl.average",
  "net.bytesRx.average",
  "net.bytesTx.average",
  "net.droppedRx.summation",
  "net.droppedTx.summation",
  "net.usage.average",
  "power.power.average",
  "virtualDisk.numberReadAveraged.average",
  "virtualDisk.numberWriteAveraged.average",
  "virtualDisk.read.average",
  "virtualDisk.readOIO.latest",
  "virtualDisk.throughput.usage.average",
  "virtualDisk.totalReadLatency.average",
  "virtualDisk.totalWriteLatency.average",
  "virtualDisk.write.average",
  "virtualDisk.writeOIO.latest",
  "sys.uptime.latest",
]
#   # vm_metric_exclude = [] ## Nothing is excluded by default
#   # vm_instances = true ## true by default
#
#   ## Hosts
#   ## Typical host metrics (if omitted or empty, all metrics are collected)
#   # host_include = [ "/*/host/**"] # Inventory path to hosts to collect (by default all are collected)
#   # host_exclude [] # Inventory paths to exclude
host_metric_include = [
  "cpu.coreUtilization.average",
  "cpu.costop.summation",
  "cpu.demand.average",
  "cpu.idle.summation",
  "cpu.latency.average",
  "cpu.readiness.average",
  "cpu.ready.summation",
  "cpu.swapwait.summation",
  "cpu.usage.average",
  "cpu.usagemhz.average",
  "cpu.used.summation",
  "cpu.utilization.average",
  "cpu.wait.summation",
  "disk.deviceReadLatency.average",
  "disk.deviceWriteLatency.average",
  "disk.kernelReadLatency.average",
  "disk.kernelWriteLatency.average",
  "disk.numberReadAveraged.average",
  "disk.numberWriteAveraged.average",
  "disk.read.average",
  "disk.totalReadLatency.average",
  "disk.totalWriteLatency.average",
  "disk.write.average",
  "mem.active.average",
  "mem.latency.average",
  "mem.state.latest",
  "mem.swapin.average",
  "mem.swapinRate.average",
  "mem.swapout.average",
  "mem.swapoutRate.average",
  "mem.totalCapacity.average",
  "mem.usage.average",
  "mem.vmmemctl.average",
  "net.bytesRx.average",
  "net.bytesTx.average",
  "net.droppedRx.summation",
  "net.droppedTx.summation",
  "net.errorsRx.summation",
  "net.errorsTx.summation",
  "net.usage.average",
  "power.power.average",
  "storageAdapter.numberReadAveraged.average",
  "storageAdapter.numberWriteAveraged.average",
  "storageAdapter.read.average",
  "storageAdapter.write.average",
  "sys.uptime.latest",
]
#     ## Collect IP addresses? Valid values are "ipv4" and "ipv6"
#   # ip_addresses = ["ipv6", "ipv4" ]
#
#   # host_metric_exclude = [] ## Nothing excluded by default
#   # host_instances = true ## true by default
#
#
#   ## Clusters
#   # cluster_include = [ "/*/host/**"] # Inventory path to clusters to collect (by default all are collected)
#   # cluster_exclude = [] # Inventory paths to exclude
cluster_metric_include = [] ## if omitted or empty, all metrics are collected
#   # cluster_metric_exclude = [] ## Nothing excluded by default
#   # cluster_instances = false ## false by default
#
#   ## Resource Pools
#   # datastore_include = [ "/*/host/**"] # Inventory path to datastores to collect (by default all are collected)
#   # datastore_exclude = [] # Inventory paths to exclude
#datastore_metric_include = [] ## if omitted or empty, all metrics are collected
#   # datastore_metric_exclude = [] ## Nothing excluded by default
#   # datastore_instances = false ## false by default
#
#   ## Datastores
#   # datastore_include = [ "/*/datastore/**"] # Inventory path to datastores to collect (by default all are collected)
#   # datastore_exclude = [] # Inventory paths to exclude
datastore_metric_include = [] ## if omitted or empty, all metrics are collected
#   # datastore_metric_exclude = [] ## Nothing excluded by default
#   # datastore_instances = false ## false by default
#
#   ## Datacenters
#   # datacenter_include = [ "/*/host/**"] # Inventory path to clusters to collect (by default all are collected)
#   # datacenter_exclude = [] # Inventory paths to exclude
datacenter_metric_include = [] ## if omitted or empty, all metrics are collected
#   datacenter_metric_exclude = [ "*" ] ## Datacenters are not collected by default.
#   # datacenter_instances = false ## false by default
#
#   ## Plugin Settings
#   ## separator character to use for measurement and field names (default: "_")
#   # separator = "_"
#
#   ## number of objects to retrieve per query for realtime resources (vms and hosts)
#   ## set to 64 for vCenter 5.5 and 6.0 (default: 256)
#   # max_query_objects = 256
#
#   ## number of metrics to retrieve per query for non-realtime resources (clusters and datastores)
#   ## set to 64 for vCenter 5.5 and 6.0 (default: 256)
#   # max_query_metrics = 256
#
#   ## number of go routines to use for collection and discovery of objects and metrics
#   # collect_concurrency = 1
#   # discover_concurrency = 1
#
#   ## the interval before (re)discovering objects subject to metrics collection (default: 300s)
#   # object_discovery_interval = "300s"
#
#   ## timeout applies to any of the api request made to vcenter
#   # timeout = "60s"
#
#   ## When set to true, all samples are sent as integers. This makes the output
#   ## data types backwards compatible with Telegraf 1.9 or lower. Normally all
#   ## samples from vCenter, with the exception of percentages, are integer
#   ## values, but under some conditions, some averaging takes place internally in
#   ## the plugin. Setting this flag to "false" will send values as floats to
#   ## preserve the full precision when averaging takes place.
#   # use_int_samples = true
#
#   ## Custom attributes from vCenter can be very useful for queries in order to slice the
#   ## metrics along different dimension and for forming ad-hoc relationships. They are disabled
#   ## by default, since they can add a considerable amount of tags to the resulting metrics. To
#   ## enable, simply set custom_attribute_exclude to [] (empty set) and use custom_attribute_include
#   ## to select the attributes you want to include.
#   ## By default, since they can add a considerable amount of tags to the resulting metrics. To
#   ## enable, simply set custom_attribute_exclude to [] (empty set) and use custom_attribute_include
#   ## to select the attributes you want to include.
#   # custom_attribute_include = []
#   # custom_attribute_exclude = ["*"]
#
#   ## The number of vSphere 5 minute metric collection cycles to look back for non-realtime metrics. In
#   ## some versions (6.7, 7.0 and possible more), certain metrics, such as cluster metrics, may be reported
#   ## with a significant delay (>30min). If this happens, try increasing this number. Please note that increasing
#   ## it too much may cause performance issues.
#   # metric_lookback = 3
#
#   ## Optional SSL Config
#   # ssl_ca = "/path/to/cafile"
#   # ssl_cert = "/path/to/certfile"
#   # ssl_key = "/path/to/keyfile"
#   ## Use SSL but skip chain & host verification
insecure_skip_verify = true
#
#   ## The Historical Interval value must match EXACTLY the interval in the daily
#   # "Interval Duration" found on the VCenter server under Configure > General > Statistics > Statistic intervals
#   # historical_interval = "5m"

当然你也可以单独设置Vcenter的其它账号只用于监控对接 然后重新telegraf服务

systemctl restart telegraf
systemctl status telegraf

二、Grafana导入DashBoard

https://grafana.com/grafana/dashboards/8159-vmware-vsphere-overview/
https://grafana.com/grafana/dashboards/8162-vmware-vsphere-datastore/
https://grafana.com/grafana/dashboards/8165-vmware-vsphere-hosts/
https://grafana.com/grafana/dashboards/8168-vmware-vsphere-vms/

以上的4个ID,建议下载2021-04月份的历史版本json文件后导入

推测当时influxdb还是1.X版本,还未到2.X版本

而我现在所用influxdb也是1.X版本 所以推荐用这个历史版本

(图片点击放大查看)

(图片点击放大查看)

三、最终的监控大屏效果如下

(图片点击放大查看)

(图片点击放大查看)

(图片点击放大查看)

相关实践学习
通过可观测可视化Grafana版进行数据可视化展示与分析
使用可观测可视化Grafana版进行数据可视化展示与分析。
相关文章
|
14天前
|
Prometheus 运维 监控
智能运维实战:Prometheus与Grafana的监控与告警体系
【10月更文挑战第26天】Prometheus与Grafana是智能运维中的强大组合,前者是开源的系统监控和警报工具,后者是数据可视化平台。Prometheus具备时间序列数据库、多维数据模型、PromQL查询语言等特性,而Grafana支持多数据源、丰富的可视化选项和告警功能。两者结合可实现实时监控、灵活告警和高度定制化的仪表板,广泛应用于服务器、应用和数据库的监控。
84 3
|
3月前
|
存储 监控 固态存储
【vSAN分布式存储服务器数据恢复】VMware vSphere vSAN 分布式存储虚拟化平台VMDK文件1KB问题数据恢复案例
在一例vSAN分布式存储故障中,因替换故障闪存盘后磁盘组失效,一台采用RAID0策略且未使用置备的虚拟机VMDK文件受损,仅余1KB大小。经分析发现,该VMDK文件与内部虚拟对象关联失效导致。恢复方案包括定位虚拟对象及组件的具体物理位置,解析分配空间,并手动重组RAID0结构以恢复数据。此案例强调了深入理解vSAN分布式存储机制的重要性,以及定制化数据恢复方案的有效性。
89 5
|
13天前
|
Prometheus 运维 监控
智能运维实战:Prometheus与Grafana的监控与告警体系
【10月更文挑战第27天】在智能运维中,Prometheus和Grafana的组合已成为监控和告警体系的事实标准。Prometheus负责数据收集和存储,支持灵活的查询语言PromQL;Grafana提供数据的可视化展示和告警功能。本文介绍如何配置Prometheus监控目标、Grafana数据源及告警规则,帮助运维团队实时监控系统状态,确保稳定性和可靠性。
73 0
|
2月前
|
虚拟化
VMware Workstation子网划分及bond配置案例
本文主要介绍了如何在VMware Workstation中进行子网划分以及如何添加和配置四块网卡以实现bond模式,并通过配置文件和命令行操作来验证bond模式的有效性。
68 2
VMware Workstation子网划分及bond配置案例
|
2月前
|
Ubuntu 开发工具 虚拟化
MacOS系统基于VMware Fusion配置Ubuntu 22.04LTS环境
这篇文章介绍了如何在MacOS系统上使用VMware Fusion虚拟化软件配置Ubuntu 22.04 LTS环境,包括自定义VMware Fusion网段、Ubuntu系统安装、配置root用户登录、设置静态IP地址、修改默认网卡名称、配置PS1变量、设置登录界面为字符界面、修改软件源和进行vim基础优化等步骤。
321 2
|
3月前
|
Prometheus 监控 数据可视化
Grafana 插件生态系统:扩展你的监控能力
【8月更文第29天】Grafana 是一个流行的开源平台,用于创建和共享统计数据的仪表板和可视化。除了内置的支持,Grafana 还有一个强大的插件生态系统,允许用户通过安装插件来扩展其功能。本文将介绍一些 Grafana 社区提供的插件,并探讨它们如何增强仪表盘的功能性。
242 1
|
3月前
|
存储 Prometheus 监控
Grafana 与 Prometheus 集成:打造高效监控系统
【8月更文第29天】在现代软件开发和运维领域,监控系统已成为不可或缺的一部分。Prometheus 和 Grafana 作为两个非常流行且互补的开源工具,可以协同工作来构建强大的实时监控解决方案。Prometheus 负责收集和存储时间序列数据,而 Grafana 则提供直观的数据可视化功能。本文将详细介绍如何集成这两个工具,构建一个高效、灵活的监控系统。
385 1
|
3月前
|
Prometheus 监控 Cloud Native
Grafana 入门指南:快速上手监控仪表盘
【8月更文第29天】Grafana 是一款开源的数据可视化和监控工具,它允许用户轻松地创建美观的仪表盘和图表,以便更好地理解和监控数据。无论您是需要监控系统性能指标、应用程序日志还是业务关键指标,Grafana 都能提供灵活而强大的解决方案。本指南将带领您快速上手 Grafana,包括安装、配置以及创建第一个监控面板。
536 1
|
3月前
|
存储 固态存储 虚拟化
【vSAN分布式存储服务器数据恢复】VMware vSphere vSAN ESXi超融合HCI分布式存储数据恢复案例
近期,我司处理了一个由10台华为OceanStor存储组成的vSAN超融合架构,其中一台存储闪存盘出现故障,用户取下后用新的闪存盘代替,然后对该闪存盘所在的磁盘组进行重建,导致集群中一台使用0置备策略的虚拟机数据丢失。
76 6
|
3月前
|
Prometheus Kubernetes 监控
Kubernetes(K8S) 监控 Prometheus + Grafana
Kubernetes(K8S) 监控 Prometheus + Grafana
240 2