一、环境准备
1.操作系统
centos 6.4 (32位)系统要是双网卡
2.配置各节点互相解析
node1:
[root@node1 ~]# uname -n
node1.test.com
[root@node1 ~]# cat /etc/hosts
127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4
::1 localhost localhost.localdomain localhost6 localhost6.localdomain6
192.168.1.31 node1.test.com node1
192.168.1.32 node2.test.com node2
node2:
[root@node2 ~]# uname -n
node2.test.com
[root@node2 ~]# cat /etc/hosts
127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4
::1 localhost localhost.localdomain localhost6 localhost6.localdomain6
192.168.1.31 node1.test.com node1
192.168.1.32 node2.test.com node2
3.配置各节点ssh互信
node1:
[root@node1 ~]# ssh-keygen -t rsa
[root@node1 ~]# ssh-copy-id -i .ssh/id_rsa.pub root@node2.test.com
node2:
[root@node2 ~]# ssh-keygen -t rsa
[root@node2 ~]# ssh-copy-id -i .ssh/id_rsa.pub root@node1.test.com
4.配置各节点时间同步
node1:
[root@node1 ~]#hwclock -s
node2:
[root@node2 ~]# hwclock -s
5.各节点关闭防火墙与SELinux
node1:
[root@node1 ~]# service iptables stop
[root@node1 ~]# chkconfig iptables off
[root@node1 ~]# cat /etc/selinux/config
# This file controls the state of SELinux on the system.
# SELINUX= can take one of these three values:
# enforcing - SELinux security policy is enforced.
# permissive - SELinux prints warnings instead of enforcing.
# disabled - No SELinux policy is loaded.
SELINUX=disabled
node2:
[root@node2 ~]# service iptables stop
[root@node2 ~]# chkconfig iptables off
[root@node2 ~]# cat /etc/selinux/config
# This file controls the state of SELinux on the system.
# SELINUX= can take one of these three values:
# enforcing - SELinux security policy is enforced.
# permissive - SELinux prints warnings instead of enforcing.
# disabled - No SELinux policy is loaded.
SELINUX=disabled
三、Corosync 安装与配置
1.安装Corosync
node1:
[root@node1 ~]# rpm -ivh corosync-1.4.1-17.el6_5.1.i686.rpm corosynclib-1.4.1-17.el6_5.1.i686.rpm libibverbs-1.1.7-1.el6.i686.rpm
librdmacm-1.0.17-1.el6.i686.rpm lm_sensors-libs-3.1.1-17.el6.i686.rpm net-snmp-libs-5.5-49.el6_5.4.i686.rpm openssl-1.0.1e-16.el6_5.15.i686.rpm
node2:
[root@node2 ~]# rpm -ivh corosync-1.4.1-17.el6_5.1.i686.rpm corosynclib-1.4.1-17.el6_5.1.i686.rpm libibverbs-1.1.7-1.el6.i686.rpm
librdmacm-1.0.17-1.el6.i686.rpm lm_sensors-libs-3.1.1-17.el6.i686.rpm net-snmp-libs-5.5-49.el6_5.4.i686.rpm openssl-1.0.1e-16.el6_5.15.i686.rpm
2.配置Corosync
[root@node1 ~]# cd /etc/corosync/
[root@node1 corosync]# ll
总用量 16
-rw-r--r-- 1 root root 445 5月 15 05:09 corosync.conf.example
-rw-r--r-- 1 root root 1084 5月 15 05:09 corosync.conf.example.udpu
drwxr-xr-x 2 root root 4096 5月 15 05:09 service.d
drwxr-xr-x 2 root root 4096 5月 15 05:09 uidgid.d
[root@node1 corosync]# cp corosync.conf.example corosync.conf
[root@node1 corosync]# vim corosync.conf
[root@node1 corosync]# cat corosync.conf
# Please read the corosync.conf.5 manual page
compatibility: whitetank
totem {
version: 2
secauth: on
threads: 0
interface {
ringnumber: 0
bindnetaddr: 192.168.188.0
mcastaddr: 226.94.10.10
mcastport: 5405
ttl: 1
}
}
logging {
fileline: off
to_stderr: no
to_logfile: yes
to_syslog: no
logfile: /var/log/cluster/corosync.log
debug: off
timestamp: on
logger_subsys {
subsys: AMF
debug: off
}
}
amf {
mode: disabled
}
service {
ver: 0
name: pacemaker
}
aisexec {
user: root
group: root
}
3.生成密钥文件
[root@node1 corosync]# mv /dev/{random,random.bak}
[root@node1 corosync]# ln -s /dev/urandom /dev/random
[root@node1 corosync]# corosync-keygen
Corosync Cluster Engine Authentication key generator.
Gathering 1024 bits for key from /dev/random.
Press keys on your keyboard to generate entropy.
Writing corosync key to /etc/corosync/authkey.
[root@node1 corosync]# ll
总用量 24
-r-------- 1 root root 128 8月 17 17:17 authkey
-rw-r--r-- 1 root root 544 8月 17 17:14 corosync.conf
-rw-r--r-- 1 root root 445 5月 15 05:09 corosync.conf.example
-rw-r--r-- 1 root root 1084 5月 15 05:09 corosync.conf.example.udpu
drwxr-xr-x 2 root root 4096 5月 15 05:09 service.d
drwxr-xr-x 2 root root 4096 5月 15 05:09 uidgid.d
4.将node1上配置文件复制到node2上
[root@node1 corosync]# scp -p authkey corosync.conf node2:/etc/corosync/
authkey 100% 128 0.1KB/s 00:00
corosync.conf 100% 542 0.5KB/s 00:00
好了,到这里corosync配置完成,下面我们配置pacemaker
四、Pacemaker 安装与配置
1.安装pacemaker
node1:
[root@node1 ~]# rpm -ivh clusterlib-3.0.12.1-59.el6_5.3.i686.rpm libqb-0.16.0-2.el6.i686.rpm pacemaker-1.1.10-14.el6_5.3.i686.rpm pacemaker-cli-1.1.10-14.el6_5.3.i686.rpm pacemaker-cluster-libs-1.1.10-14.el6_5.3.i686.rpm pacemaker-libs-1.1.10-14.el6_5.3.i686.rpm perl-TimeDate-1.16-11.1.el6.noarch.rpm resource-agents-3.9.2-40.el6_5.10.i686.rpm
node2:
[root@node2 ~]# rpm -ivh clusterlib-3.0.12.1-59.el6_5.3.i686.rpm libqb-0.16.0-2.el6.i686.rpm pacemaker-1.1.10-14.el6_5.3.i686.rpm pacemaker-cli-1.1.10-14.el6_5.3.i686.rpm pacemaker-cluster-libs-1.1.10-14.el6_5.3.i686.rpm pacemaker-libs-1.1.10-14.el6_5.3.i686.rpm perl-TimeDate-1.16-11.1.el6.noarch.rpm resource-agents-3.9.2-40.el6_5.10.i686.rpm
2.安装crmsh
node1:
[root@node1 ~]# rpm -ivh python-pssh-2.3.1-4.1.i686.rpm pssh-2.3.1-4.1.i686.rpm redhat-rpm-config-9.0.3-42.el6.noarch.rpm
crmsh-2.1-1.6.i686.rpm
[root@node1 ~]# crm
Cannot change active directory to /var/lib/pacemaker/cores/root: No such file or directory
crm(live)# help
This is crm shell, a Pacemaker command line interface.
Available commands:
cib manage shadow CIBs
resource resources management
configure CRM cluster configuration
node nodes management
options user preferences
history CRM cluster history
site Geo-cluster support
ra resource agents information center
status show cluster status
help,? show help (help topics for list of topics)
end,cd,up go back one level
quit,bye,exit exit the program
node2上和node1上的操作一样。
[root@node2 ~]# crm
Cannot change active directory to /var/lib/pacemaker/cores/root: No such file or directory
crm(live)# help
This is crm shell, a Pacemaker command line interface.
Available commands:
cib manage shadow CIBs
resource resources management
configure CRM cluster configuration
node nodes management
options user preferences
history CRM cluster history
site Geo-cluster support
ra resource agents information center
status show cluster status
help,? show help (help topics for list of topics)
end,cd,up go back one level
quit,bye,exit exit the program
3.启动corosync(注,在配置corosync时,将pacemaker整合进corosync中,corosync启动的同时也会启动
pacemaker)
[root@node1 ~]# ssh node2 "service corosync start"
Starting Corosync Cluster Engine (corosync): [确定]
[root@node1 ~]# service corosync start
Starting Corosync Cluster Engine (corosync): [确定]
4.查看启动信息
(1).查看corosync引擎是否正常启动
[root@node1 ~]# grep -e "Corosync Cluster Engine" -e "configuration file"
(2).查看初始化成员节点通知是否正常发出
(3).检查启动过程中是否有错误产生
(4).查看pacemaker是否正常启动
5.查看集群状态
五、DRBD 安装与配置
1.安装DRBD
node1:
编译获得的RPM包
drbd-8.4.3-2.el6.i686.rpm
drbd-bash-completion-8.4.3-2.el6.i686.rpm
drbd-heartbeat-8.4.3-2.el6.i686.rpm
drbd-km-2.6.32_358.el6.i686-8.4.3-2.el6.i686.rpm
drbd-pacemaker-8.4.3-2.el6.i686.rpm
drbd-udev-8.4.3-2.el6.i686.rpm
drbd-utils-8.4.3-2.el6.i686.rpm
drbd-xen-8.4.3-2.el6.i686.rpm
4)安装drbd rpm包
[root@node1 ~]#rpm -ivh drbd以上开头的所有包
查看模块是否加载成功
[root@node1 ~]# lsmod | grep drbd
drbd 292307 0
libcrc32c 841 1 drbd
[root@node2 ~]#rpm -ivh drbd开头的所有包
查看模块是否加载成功
[root@node2 ~]# lsmod | grep drbd
drbd 292307 0
libcrc32c 841 1 drbd
2.配置DRBD
[root@node1 ~]# cat /etc/drbd.d/global_common.conf
global {
usage-count no; #让linbit公司收集目前drbd的使用情况,yes为参加,我们这里不参加设置为no
#minor-count dialog-refresh disable-ip-verification
}
common {
handlers {
pri-on-incon-degr "/usr/lib/drbd/notify-pri-on-incon-degr.sh; /usr/lib/drbd/notify-emergency-
reboot.sh; echo b > /proc/sysrq-trigger ; reboot -f";
pri-lost-after-sb "/usr/lib/drbd/notify-pri-lost-after-sb.sh; /usr/lib/drbd/notify-emergency-
reboot.sh; echo b > /proc/sysrq-trigger ; reboot -f";
local-io-error "/usr/lib/drbd/notify-io-error.sh; /usr/lib/drbd/notify-emergency-shutdown.sh;
echo o > /proc/sysrq-trigger ; halt -f";
}
startup {
}
options {
}
disk {
on-io-error detach; #同步错误的做法是分离
}
net {
cram-hmac-alg "sha1"; #设置加密算法sha1
shared-secret "mydrbdlab"; #设置加密key
}
syncer{
rate 100M;
}
}
3.在node1和node2上新增磁盘如下图
如上图分一个5G大小的linux分区
4.增加资源
[root@node1 drbd.d]# cat web.res
resource web {
on node1.test.com {
device /dev/drbd0;
disk /dev/sda4;
address 192.168.188.31:7789;
meta-disk internal;
}
on node2.test.com {
device /dev/drbd0;
disk /dev/sda4;
address 192.168.188.32:7789;
meta-disk internal;
}
}
5.同步配置文件到node2’
[root@node1 drbd.d]# scp global_common.conf web.res node2:/etc/drbd.d/
6.node1与node2上初始化资源
node1:
[root@node1 ~]# drbdadm create-md web
Writing meta data...
initializing activity log
NOT initializing bitmap
New drbd meta data block successfully created.
node2:
[root@node2 ~]# drbdadm create-md web
Writing meta data...
initializing activity log
NOT initializing bitmap
New drbd meta data block successfully created.
如果有其他错误信息执行dd if=/dev/zero bs=1M count=1 of=/dev/sda4,在执行即可。
7.启动DRBD
node1:
[root@node1 ~]# service drbd start
node2:
[root@node2 ~]# service drbd start
8.查看一下状态
node1:
[root@node1 ~]# drbd-overview
0:web/0Connected Secondary/SecondaryInconsistent/InconsistentC r-----
node2:
[root@node2 ~]# drbd-overview
0:web/0Connected Secondary/SecondaryInconsistent/InconsistentC r-----
9.设置node1为主节点
[root@node1 ~]# drbdadm -- --overwrite-data-of-peer primary web
[root@node1 ~]# drbd-overview
0:web/0 Connected Primary/Secondary UpToDate/UpToDate C r-----
10.格式化并挂载
[root@node1 ~]# mkfs.ext4 /dev/drbd0
[root@node1 ~]# mkdir /mydata
[root@node1 ~]# mount /dev/drbd0 /mydata/
[root@node1 ~]# mount
/dev/sda2on / typeext4 (rw)
proc on /proctypeproc (rw)
sysfs on /systypesysfs (rw)
devpts on /dev/ptstypedevpts (rw,gid=5,mode=620)
tmpfs on /dev/shmtypetmpfs (rw)
/dev/sda1on /boottypeext4 (rw)
/dev/sda4on /mydatatypeext4 (rw)
none on /proc/sys/fs/binfmt_misctypebinfmt_misc (rw)
/dev/drbd0on /drbdtypeext4 (rw)
[root@node1 ~]# cd /mydata/
[root@node1 mydata]# cp /etc/inittab /mydata/
[root@node1 mydata]# ll
总用量 20
-rw-r--r-- 1 root root 884 8月 17 13:50 inittab
drwx------ 2 root root 16384 8月 17 13:49 lost+found
[root@node1 mydata]# mkdir data
[root@node1 ~]# ll /mydata/
总用量 20
drwxr-xr-x 4 mysql mysql 4096 10鏈10 21:31 data
-rw-r--r-- 1 root root 884 8月 17 13:50 inittab
drwx------ 2 root root 16384 8月 17 13:49 lost+found
11.设置node2为主节点
[root@node1 ~]# umount /mydata/
[root@node1 ~]# drbdadm secondary web
[root@node1 ~]# drbd-overview
[root@node2 ~]# drbdadm primary web
[root@node2 ~]# drbd-overview
0:web/0Connected Primary/SecondaryUpToDate/UpToDateC r-----
[root@node2 ~]# mkdir /mydata
[root@node2 ~]# mount /dev/drbd0 /mydata/
[root@node2 ~]# ll /mydata/
总用量 20
drwxr-xr-x 4 mysql mysql 4096 10鏈10 21:31 data
-rw-r--r-- 1 root root 884 8月 17 13:50 inittab
drwx------ 2 root root 16384 8月 17 13:49 lost+found
五、数据库配置
1.把一设为主节点:
[root@node2 ~]# umount /mydata/
[root@node2 ~]# drbdadm secondary web
[root@node2 ~]# drbd-overview
[root@node1 ~]# drbdadm primary web
[root@node1 ~]# drbd-overview
[root@node1 ~]# mount /dev/drbd0 /mydata/
0:web/0Connected Primary/SecondaryUpToDate/UpToDateC r-----
安装数据库:yum install -y mysql mysql-server
2.data目录为mysql的数据存放目录,故改变其属主属组:
# chown -R mysql.mysql /mydata/data/
3.mysql配置文件
# vim /etc/my.cnf
datadir = /mydata/data //数据存放路径
# scp /etc/my.cnf node2:/etc/
4.启动服务测试
# service mysqld start
#查看启动数据目录文件
[root@node1 data]# pwd
/mydata/data
[root@node1 data]# ls
ibdata1 ib_logfile0 ib_logfile1 mysql test
5.关闭mysqld服务,并保证开机不自动启动
# service mysqld stop && chkconfig mysqld off
6.node2上不用初始化了,直接挂载
[root@node1 ~]# umount /mydata/
[root@node1 ~]# drbdadm secondary web
[root@node1 ~]# drbd-overview
[root@node2 ~]# drbdadm primary web
[root@node2 ~]# drbd-overview
0:web/0Connected Primary/SecondaryUpToDate/UpToDateC r-----
[root@node2 ~]# mkdir /mydata
[root@node2 ~]# mount /dev/drbd0 /mydata/
[root@node2 data]# pwd
/mydata/data
[root@node2 data]# ls
ibdata1 ib_logfile0 ib_logfile1 mysql test
7.关闭mysqld服务,并保证开机不自动启动
# service mysqld stop && chkconfig mysqld off
# umount /mydata
六、集群配置
1.corosync默认启用了stonith,而当前集群并没有相应的stonith设备,因此此默认配置目前尚不可用,这
可以通过如下命令先禁用stonith
crm configure property stonith-enabled=false
2.对于双节点的集群来说,我们要配置此选项来忽略quorum,即这时候票数不起作用,一个节点也能正常运
行
crm configure property no-quorum-policy=ignore
3.关闭drbd设为开机不启动
[root@node1 ~]# service drbd stop && chkconfig drbd off
[root@node2 ~]# service drbd stop && chkconfig drbd off
4.drbd需要同时运行在两个节点上,但只能有一个节点(primary/secondary模型)是Master,而另一个节
点为Slave;因此,它是一种比较特殊的集群资源,其资源类型为多状态(Multi-state)clone类型,即主
机节点有Master和Slave之分,且要求服务刚启动时两个节点都处于slave状态
primitive mysqldrbd ocf:linbit:drbd params drbd_resource="mysql" op monitor role="Master" interval="30s" op monitor role="Slave" interval="31s" op start timeout="240s" op stop timeout="100s"
5.创建master类型的资源,将mydrbd加入
ms MS_mysqldrbd mysqldrbd meta master-max=1 master-node-max=1 clone-max=2 clone-node-max=1 notify="true"
6.我们实现将drbd设置自动挂载至/mydata目录。此外,此自动挂载的集群资源需要运行于drbd服务的
Master节点上,并且只能在drbd服务将某节点设置为Primary以后方可启动
primitive MysqlFS ocf:heartbeat:Filesystem params device="/dev/drbd0" directory="/mydata" fstype="ext4" op start timeout="60s" op stop timeout="60s"
7.mysql资源的定义
primitive myip ocf:heartbeat:IPaddr params ip=192.168.1.100
primitive mysqlserver lsb:mysqld
8.设置约束次序
colocation MysqlFS_with_mysqldrbd inf: MysqlFS MS_mysqldrbd:Master myip mysqlserver
order MysqlFS_after_mysqldrbd inf: MS_mysqldrbd:promote MysqlFS:start
order myip_after_MysqlFS mandatory: MysqlFS myip
order mysqlserver_after_myip mandatory: myip mysqlserver
提交查看状态:
七、常见脑列问题
1.查看主服务器
[root@node1 ~]# service drbd status
drbd driver loaded OK; device status:version: 8.4.3 (api:1/proto:86-101)GIT-hash: 599f286440bd633d15d5ff985204aff4bccffadd build by root@node1.test.com, 2013-11-03 00:03:40m:res cs ro ds p mounted fstype1:web StandAlone Primary/Unknown UpToDate/DUnknown r-----
2.查看备服务器
[root@node2 ~]# service drbd status
drbd driver loaded OK; device status:version: 8.4.3 (api:1/proto:86-101)GIT-hash: 599f286440bd633d15d5ff985204aff4bccffadd build by root@node1.test.com, 2013-11-03 00:03:40m:res cs ro ds p mounted fstype1:web StandAlone Secondary/Unknown UpToDate/DUnknown r-----
解决方法:
在备服务器操作:其中web是资源名
[root@node2 ~]# drbdadm secondary web
[root@node2 ~]# drbdadm --discard-my-data connect web
在主服务器操作
[root@node1 ~]# drbdadm connect web
[root@node1 ~]# cat /proc/drbd
version: 8.4.3 (api:1/proto:86-101)GIT-hash: 599f286440bd633d15d5ff985204aff4bccffadd build by root@node1.test.com,2013-11-03 00:03:40 1: cs:SyncSource ro:Primary/Secondary ds:UpToDate/Inconsistent C r----- ns:6852 nr:0 dw:264460 dr:8393508al:39 bm:512 lo:0 pe:2 ua:0 ap:0 ep:1 wo:d oos:257728[>....................] sync'ed: 4.7% (257728/264412)K finish: 0:03:47speed: 1,112 (1,112) K/sec
备机上查看:DRBD恢复正常
[root@node2 ~]#service drbd status
drbd driver loaded OK; device status:version: 8.4.3 (api:1/proto:86-101)GIT-hash: 599f286440bd633d15d5ff985204aff4bccffadd build by root@node2.test.com, 2013-11-03 00:03:40m:res cs ro ds p mounted fstype1:web Connected Secondary/Primary UpToDate/UpToDate C
本文转自 Linux_woniu 51CTO博客,原文链接:http://blog.51cto.com/linuxcgi/1965319