Hadoop集群基本部署完成,接下来就需要有一个监控系统,能及时发现性能瓶颈,给故障排除提供有力依据。监控hadoop集群系统好用的比较少,自身感觉ambari比较好用,但不能监控已有的集群环境,挺悲催的。ganglia在网上看到原生支持Hadoop、Hbase性能监控,试用一段时间感觉还不错,监控项比较全面,配置简单,软件包在epel网络源中,使用yum安装方便快捷。
Ganglia是一个开源集群监控系统,主要用来监控系统性能,如:cpu、mem、硬盘利用率、I/O负载、网络流量等。
Ganglia涉及到三个组件:
gmond:被监控端代理程序,用于收集监控信息,并发送给gmetad。
gmetad:定期从gmond获取数据,并将数据存储到RRD存储引擎中。
ganglia-web:WEB前端,RRD绘图后通过PHP展示。
1.CentOS6 YUM源自带epel网络源,直接安装
[root@sht-sgmhadoopnn-01 ~]# yum install epel-release
[root@sht-sgmhadoopnn-01 ~]# yum install ganglia-web ganglia-gmetad ganglia-gmond
2.配置监控程序
[root@sht-sgmhadoopnn-01 ~]# vi /etc/ganglia/gmetad.conf
data_source "hadoop_hbase_cluster" 172.16.101.55:8649 172.16.101.56:8649 172.16.101.58:8649 172.16.101.59:8649 172.16.101.60:8649
case_sensitive_hostnames 1
setuid_username "root"
gridname "MyGrid"
3.关联Apache,因为Ganglia自创建的配置ganglia.conf有问题,所以先删除,再创建个软连接到Apache根目录下
[root@sht-sgmhadoopnn-01 ~]# rm /etc/httpd/conf.d/ganglia.conf
rm: remove regular file `/etc/httpd/conf.d/ganglia.conf'? y
[root@sht-sgmhadoopnn-01 ~]# ln -s /usr/share/ganglia /var/www/html/ganglia
[root@sht-sgmhadoopnn-01 ~]#
4.启动Apache和Ganglia,并设置开机启动
[root@sht-sgmhadoopnn-01 ~]# chown -R root:root /var/lib/ganglia
[root@sht-sgmhadoopnn-01 ~]# service httpd start
Starting httpd: [ OK ]
[root@sht-sgmhadoopnn-01 ~]# service gmetad start
Starting GANGLIA gmetad: [ OK ]
[root@sht-sgmhadoopnn-01 ~]#
5.安装与配置被监控端(每台同样配置)
# yum install ganglia-gmond
# vi /etc/ganglia/gmond.conf
globals {
daemonize = yes
setuid = yes
user = root ##root
debug_level = 0
max_udp_msg_len = 1472
mute = no
deaf = no
allow_extra_data = yes
host_dmax = 86400 /*secs. Expires (removes from web interface) hosts in 1 day */
host_tmax = 20 /*secs */
cleanup_threshold = 300 /*secs */
gexec = no
# By default gmond will use reverse DNS resolution when displaying your hostname
# Uncommeting following value will override that value.
# override_hostname = "mywebserver.domain.com"
# If you are not using multicast this value should be set to something other than 0.
# Otherwise if you restart aggregator gmond you will get empty graphs. 60 seconds is reasonable
send_metadata_interval = 0 /*secs */
}
cluster{
name = "hadoop_hbase_cluster" #集群名,和上面那个一样
owner = "root" ##root
latlong = "unspecified"
url = "unspecified"
}
/* Thehost section describes attributes of the host, like the location */
host {
location = "unspecified"
}
/*Feel free to specify as many udp_send_channels as you like. Gmond
used to only support having a single channel*/
udp_send_channel{
#bind_hostname = yes # Highly recommended,soon to be default.
# This option tells gmond to use asource address
# that resolves to themachine's hostname. Without
# this, the metrics mayappear to come from any
# interface and the DNSnames associated with
# those IPs will be usedto create the RRDs.
#mcast_join = 239.2.11.71 #关闭多播
host = 172.16.101.55 #添加发送IP/主机名
port = 8649 #默认端口
ttl = 1
}
/* Youcan specify as many udp_recv_channels as you like as well. */
udp_recv_channel{
#mcast_join = 239.2.11.71
port = 8649
bind = 172.16.101.55 #------------ 本机的ip/hostname 接收地址
retry_bind = true
# Size of the UDP buffer. If you are handlinglots of metrics you really
# should bump it up to e.g. 10MB or evenhigher.
# buffer = 10485760
}
……
6.同步
[root@sht-sgmhadoopnn-01 ~]# scp /etc/ganglia/gmond.conf root@sht-sgmhadoopnn-02:/etc/ganglia/gmond.conf
gmond.conf 100% 8769 8.6KB/s 00:00
You have mail in /var/spool/mail/root
[root@sht-sgmhadoopnn-01 ~]# scp /etc/ganglia/gmond.conf root@sht-sgmhadoopdn-02:/etc/ganglia/gmond.conf
gmond.conf 100% 8769 8.6KB/s 00:00
[root@sht-sgmhadoopnn-01 ~]# scp /etc/ganglia/gmond.conf root@sht-sgmhadoopdn-01:/etc/ganglia/gmond.conf
gmond.conf 100% 8769 8.6KB/s 00:00
[root@sht-sgmhadoopnn-01 ~]# scp /etc/ganglia/gmond.conf root@sht-sgmhadoopdn-03:/etc/ganglia/gmond.conf
gmond.conf 100% 8769 8.6KB/s 00:00
#### 修改各个节点的gmond.conf的bind 为本节点ip
7.运行gmond (每台节点)
[root@sht-sgmhadoopnn-01 ~]# service gmond start
[root@sht-sgmhadoopnn-02 ~]# service gmond start
[root@sht-sgmhadoopdn-01 ~]# service gmond start
[root@sht-sgmhadoopdn-02 ~]# service gmond start
[root@sht-sgmhadoopdn-03 ~]# service gmond start
8.添加Hadoop被Ganglia监控,去掉文件中以***释并修改(每台同样配置)
[root@sht-sgmhadoopnn-01 ~]# vi $HADOOP_HOME/etc/hadoop/hadoop-metrics2.properties
*.sink.ganglia.class=org.apache.hadoop.metrics2.sink.ganglia.GangliaSink31
*.sink.ganglia.period=10
*.sink.ganglia.supportsparse=true
*.sink.ganglia.slope=jvm.metrics.gcCount=zero,jvm.metrics.memHeapUsedM=both
*.sink.ganglia.dmax=jvm.metrics.threadsBlocked=70,jvm.metrics.memHeapUsedM=40
namenode.sink.ganglia.servers=172.16.101.55:8649 #当有多个ganglia监控系统时,以逗号分隔
datanode.sink.ganglia.servers=172.16.101.55:8649 #都指定ganglia服务器
resourcemanager.sink.ganglia.servers=172.16.101.55:8649
nodemanager.sink.ganglia.servers=172.16.101.55:8649
[root@sht-sgmhadoopnn-01 ~]# scp /hadoop/hadoop-2.7.2/etc/hadoop/hadoop-metrics2.properties sht-sgmhadoopnn-02:/hadoop/hadoop-2.7.2/etc/hadoop/hadoop-metrics2.properties
hadoop-metrics2.properties 100% 3183 3.1KB/s 00:00
You have new mail in /var/spool/mail/root
[root@sht-sgmhadoopnn-01 ~]# scp /hadoop/hadoop-2.7.2/etc/hadoop/hadoop-metrics2.properties sht-sgmhadoopdn-03:/hadoop/hadoop-2.7.2/etc/hadoop/hadoop-metrics2.properties
hadoop-metrics2.properties 100% 3183 3.1KB/s 00:00
[root@sht-sgmhadoopnn-01 ~]# scp /hadoop/hadoop-2.7.2/etc/hadoop/hadoop-metrics2.properties sht-sgmhadoopdn-02:/hadoop/hadoop-2.7.2/etc/hadoop/hadoop-metrics2.properties
hadoop-metrics2.properties 100% 3183 3.1KB/s 00:00
[root@sht-sgmhadoopnn-01 ~]# scp /hadoop/hadoop-2.7.2/etc/hadoop/hadoop-metrics2.properties sht-sgmhadoopdn-01:/hadoop/hadoop-2.7.2/etc/hadoop/hadoop-metrics2.properties
hadoop-metrics2.properties 100% 3183 3.1KB/s 00:00
[root@sht-sgmhadoopnn-01 ~]#
9.添加HBase被Ganglia监控,添加如下(每台同样配置)
[root@sht-sgmhadoopnn-01 ~]# vi /hadoop/hbase/conf/hadoop-metrics2-hbase.properties
*.sink.ganglia.class=org.apache.hadoop.metrics2.sink.ganglia.GangliaSink31
*.sink.ganglia.period=10
hbase.sink.ganglia.period=10
hbase.sink.ganglia.servers=172.16.101.55:8649
[root@sht-sgmhadoopnn-01 ~]# scp /hadoop/hbase/conf/hadoop-metrics2-hbase.properties sht-sgmhadoopnn-02:/hadoop/hbase/conf/hadoop-metrics2-hbase.properties
[root@sht-sgmhadoopnn-01 ~]# scp /hadoop/hbase/conf/hadoop-metrics2-hbase.properties sht-sgmhadoopdn-01:/hadoop/hbase/conf/hadoop-metrics2-hbase.properties
[root@sht-sgmhadoopnn-01 ~]# scp /hadoop/hbase/conf/hadoop-metrics2-hbase.properties sht-sgmhadoopdn-02:/hadoop/hbase/conf/hadoop-metrics2-hbase.properties
[root@sht-sgmhadoopnn-01 ~]# scp /hadoop/hbase/conf/hadoop-metrics2-hbase.properties sht-sgmhadoopdn-03:/hadoop/hbase/conf/hadoop-metrics2-hbase.properties
10.查看
http://172.16.101.55/ganglia