ganglia metric introduce

简介:
本文主要介绍一下ganglia metric的概念, 为以后更好的了解metric的扩展.
metric即监控项, gmond自带的核心模块中, 包含了7类监控项目, 共计40几个监控项.
使用gmond -m可以查看所有的监控项.

我们先来了解一下metric的数据结构 : 
ganglia-3.6.0/lib/gm_protocol.h
struct Ganglia_25metric {
        int key;
        char *name;   // metric name, 一台主机的metric name必须唯一
        int tmax;    //  指发生metric对应组内的所有metric到send channel的时间间隔, 对应collection_group中time_threshold配置.
        Ganglia_value_types type;   // 值的类型, 如int8 , float ...
        char *units;  // 单位, 只用作显示, 不用做计算
        char *slope;   //  值的变化趋势, 如zero(不变), positive(只增), negative(只减), both(即增即减)
        char *fmt;    
        int msg_size;
        char *desc;   // 描述
        int *metadata;
};


然后dump一个XML来看看 : 
# netstat -anp|grep gmond
tcp        0      0 0.0.0.0:8649                0.0.0.0:*                   LISTEN      4271/gmond          
udp        0      0 172.16.3.221:63894          239.2.11.71:8649            ESTABLISHED 4271/gmond          
udp        0      0 239.2.11.71:8649            0.0.0.0:*                               4271/gmond

将gmond的数据dump出来.
[root@db-172-16-3-221 cpu]# telnet 127.0.0.1 8649

截取一部分metric的信息如下
<GANGLIA_XML VERSION="3.6.0" SOURCE="gmond">
<CLUSTER NAME="test" LOCALTIME="1411027577" OWNER="digoal" LATLONG="111 122" URL="http://dba.sky-mobi.com">

<HOST NAME="db-172-16-3-221.localdomain" IP="172.16.3.221" TAGS="" REPORTED="1411027564" TN="12" TMAX="20" DMAX="86400" LOCATION="1,2,3" GMOND_STARTED="1411026704">
........
<METRIC NAME="load_one" VAL="0.00" TYPE="float" UNITS=" " TN="62" TMAX="70" DMAX="0" SLOPE="both">
<EXTRA_DATA>
<EXTRA_ELEMENT NAME="GROUP" VAL="load"/>
<EXTRA_ELEMENT NAME="DESC" VAL="One minute load average"/>
<EXTRA_ELEMENT NAME="TITLE" VAL="One Minute Load Average"/>
</EXTRA_DATA>
</METRIC>
......
</HOST>

<HOST NAME="abc" IP="172.16.3.150" TAGS="" REPORTED="1411026788" TN="788" TMAX="20" DMAX="86400" LOCATION="unspecified" GMOND_STARTED="0">
<METRIC NAME="load_five" VAL="1.0" TYPE="float" UNITS="" TN="810" TMAX="60" DMAX="0" SLOPE="both">
<EXTRA_DATA>
<EXTRA_ELEMENT NAME="TITLE" VAL="load_five"/>
<EXTRA_ELEMENT NAME="DESC" VAL="load_five"/>
<EXTRA_ELEMENT NAME="GROUP" VAL="cpu"/>
<EXTRA_ELEMENT NAME="CLUSTER" VAL="test"/>
<EXTRA_ELEMENT NAME="SPOOF_HOST" VAL="172.16.3.150:abc"/>
</EXTRA_DATA>
</METRIC>
......
</HOST>

host相关XML的代码
 len = apr_snprintf(hostxml, 1024, 
           "<HOST NAME=\"%s\" IP=\"%s\" TAGS=\"%s\" REPORTED=\"%d\" TN=\"%d\" TMAX=\"%d\" DMAX=\"%d\" LOCATION=\"%s\" GMOND_STARTED=\"%d\">\n",
                     hostinfo->hostname, 
                     hostinfo->ip,
                     tags ? tags : "",
                     (int)(hostinfo->last_heard_from / APR_USEC_PER_SEC),
                     tn,
                     host_tmax,
                     host_dmax,
                     hostinfo->location? hostinfo->location: "unspecified", 
                     hostinfo->gmond_started);

含义参考

metric相关XML的代码

 len = apr_snprintf(metricxml, 1024,
          "<METRIC NAME=\"%s\" VAL=\"%s\" TYPE=\"%s\" UNITS=\"%s\" TN=\"%d\" TMAX=\"%d\" DMAX=\"%d\" SLOPE=\"%s\">\n",
              metricName,
              gmetric_value_to_str(&(val->message_u.v_message)),
              data->message_u.f_message.Ganglia_metadata_msg_u.gfull.metric.type,
              data->message_u.f_message.Ganglia_metadata_msg_u.gfull.metric.units,
              (int)((now - val->last_heard_from) / APR_USEC_PER_SEC),
              data->message_u.f_message.Ganglia_metadata_msg_u.gfull.metric.tmax,
              data->message_u.f_message.Ganglia_metadata_msg_u.gfull.metric.dmax,
              slope_to_cstr(data->message_u.f_message.Ganglia_metadata_msg_u.gfull.metric.slope));

含义参考

接下来, 我们可以看看gmond的核心模块中的其中一个load模块是如何采样的.

[root@db-172-16-3-221 cpu]# cat mod_load.c
#include <gm_metric.h>
#include <libmetrics.h>

mmodule load_module;


static int load_metric_init ( apr_pool_t *p )
{
    int i;

    libmetrics_init();

    for (i = 0; load_module.metrics_info[i].name != NULL; i++) {
        /* Initialize the metadata storage for each of the metrics and then
         *  store one or more key/value pairs.  The define MGROUPS defines
         *  the key for the grouping attribute. */
        MMETRIC_INIT_METADATA(&(load_module.metrics_info[i]),p);
        MMETRIC_ADD_METADATA(&(load_module.metrics_info[i]),MGROUP,"load");
    }

    return 0;
}

static void load_metric_cleanup ( void )
{
}

static g_val_t load_metric_handler ( int metric_index )
{
    g_val_t val;

    /* The metric_index corresponds to the order in which
       the metrics appear in the metric_info array
    */
    switch (metric_index) {
    case 0:
        return load_one_func();
    case 1:
        return load_five_func();
    case 2:
        return load_fifteen_func();
    }

    /* default case */
    val.f = 0;
    return val;
}

static Ganglia_25metric load_metric_info[] = 
{
    {0, "load_one",      70, GANGLIA_VALUE_FLOAT, " ", "both", "%.2f", UDP_HEADER_SIZE+8, "One minute load average"},
    {0, "load_five",    325, GANGLIA_VALUE_FLOAT, " ", "both", "%.2f", UDP_HEADER_SIZE+8, "Five minute load average"},
    {0, "load_fifteen", 950, GANGLIA_VALUE_FLOAT, " ", "both", "%.2f", UDP_HEADER_SIZE+8, "Fifteen minute load average"},
    {0, NULL}
};

mmodule load_module =
{
    STD_MMODULE_STUFF,
    load_metric_init,
    load_metric_cleanup,
    load_metric_info,
    load_metric_handler,
};

除了metric name, 其实还看到了metric group 的信息. group方便gweb归类.
例如核心模块中的group load包含了3个metric. (load_one, load_five, load_fifteen)

[root@db-172-16-3-221 cpu]# gmond -m|grep load
load_fifteen    Fifteen minute load average (module load_module)
load_one        One minute load average (module load_module)
load_five       Five minute load average (module load_module)


最后, 通过gmetric来了解一下metric.
gmetric是一个可以手工向udp send channel发送metric信息的工具. 它需要读gmond.conf配置文件, 从中得到udp send channel以及其他信息, 然后从命令行输入metric相关的值, 发送出去.

# man gmetric
GMETRIC(1)                       User Commands                      GMETRIC(1)

NAME
       gmetric - manual page for Ganglia Custom Metric Utility

SYNOPSIS
       gmetric [OPTIONS]...

DESCRIPTION
       The  Ganglia  Metric Client (gmetric) announces a metric on the list of defined send channels defined in a con-
       figuration file

       -h, --help
              Print help and exit

       -V, --version
              Print version and exit

       -c, --conf=STRING   //配置文件
              The configuration file to use for finding send channels (default=‘/etc/ganglia/gmond.conf’)

       -n, --name=STRING   // metric name
              Name of the metric

       -v, --value=STRING   // metric 值
              Value of the metric

       -t, --type=STRING   // metric 值类型
              Either string|int8|uint8|int16|uint16|int32|uint32|float|double

       -u, --units=STRING   // 单位
              Unit of measure for the value e.g. Kilobytes, Celcius (default=‘’)

       -s, --slope=STRING   // 值属性
              Either zero|positive|negative|both  (default=‘both’)

       -x, --tmax=INT    // 类似gmond里配置的 collection group 消息发送间隔
              The maximum time in seconds between gmetric calls (default=‘60’)

       -d, --dmax=INT    // metric 超时删除时间, 目前貌似没有用到这个.
              The lifetime in seconds of this metric  (default=‘0’)

       -g, --group=STRING    //  metric属于哪个组, 可以参考核心模块, 例如load_five属于load组.
              Group of the metric

       -D, --desc=STRING  // 描述
              Description of the metric
 
       -T, --title=STRING  // 在gweb中显示为 title
              Title of the metric

       -S, --spoof=STRING    // 指定metric的主机名, IP. (例如在A主机发送消息, 默认是会将metric归到A主机的, 但是如果你想从A主机发出B主机的metric, 那就需要spoof)  (因为metric是以主机名为归类分开存储的)
              IP address and name of host/device (colon separated) we are spoofing  (default=‘’)

       -H, --heartbeat   // 心跳消息
              spoof a heartbeat message (use with spoof option)

命令行帮助 : 
[root@db-172-16-3-221 cpu]# gmetric -h
gmetric 3.6.0

The Ganglia Metric Client (gmetric) announces a metric
on the list of defined send channels defined in a configuration file

Usage: gmetric [OPTIONS]...

  -h, --help            Print help and exit
  -V, --version         Print version and exit
  -c, --conf=STRING     The configuration file to use for finding send channels 
                           (default=`/opt/ganglia-core-3.6.0/etc/gmond.conf')
  -n, --name=STRING     Name of the metric
  -v, --value=STRING    Value of the metric
  -t, --type=STRING     Either 
                          string|int8|uint8|int16|uint16|int32|uint32|float|double
  -u, --units=STRING    Unit of measure for the value e.g. Kilobytes, Celcius  
                          (default=`')
  -s, --slope=STRING    Either zero|positive|negative|both  (default=`both')
  -x, --tmax=INT        The maximum time in seconds between gmetric calls  
                          (default=`60')
  -d, --dmax=INT        The lifetime in seconds of this metric  (default=`0')
  -g, --group=STRING    Group(s) of the metric (comma-separated)
  -C, --cluster=STRING  Cluster of the metric
  -D, --desc=STRING     Description of the metric
  -T, --title=STRING    Title of the metric
  -S, --spoof=STRING    IP address and name of host/device (colon separated) we 
                          are spoofing  (default=`')
  -H, --heartbeat       spoof a heartbeat message (use with spoof option)

分组在gweb中应用 : 
ganglia metric introduce - 德哥@Digoal - PostgreSQL research

[参考]
3. ganglia-3.6.0/gmond/modules/cpu/*
4. man gmetric
相关文章
|
索引
问题复盘:Kibana did not load properly. Check the server output for more information
问题复盘:Kibana did not load properly. Check the server output for more information
265 0
问题复盘:Kibana did not load properly. Check the server output for more information
|
存储 缓存 Java
ElasticSearch Tune for indexing speed Translation
关于如何提高es查询性能的文章,该文章完全是从官网上拿来翻译的,一字不差,希望通过翻译一边敲键盘一边进行更深层次地理解,另外也能为以后做个记忆储备,谈不上对社区的贡献啦,慢慢学好了
1108 0
|
消息中间件 监控 NoSQL

热门文章

最新文章