主节点 10.0.0.10
远程NFS归档目录(postgres用户读写权限) /data/recoverydir/pg_arch
本地归档目录(postgres用户读写权限
) /data1/archivedir/pg_arch
备份目录(postgres用户读写权限
) /data1/pg_backup
日志目录(postgres用户读写权限
) /data1/pg_run_log ; ln -s /data1/pg_run_log /var/log/pg_run_log
df
/dev/sdb1 670G 48G 589G 8% /data1
10.0.0.20:/data1/archivedir 670G 66G 571G 11% /data/recoverydir
/etc/fstab
10.0.0.20:/data1/archivedir /data/recoverydir nfs rw,rsize=8192,wsize=8192,noatime 1 3
/etc/exports
/data1/archivedir 10.0.0.20/32(rw,sync,wdelay,no_root_squash,anonuid=0,anongid=0)
exportfs -av
固定nfs监听端口,如修改 /etc/services
# Local services
mountd 845/tcp #rpc.mountd
mountd 842/udp #rpc.mountd
rquotad 790/tcp #rpc.rquotad
rquotad 787/udp #rpc.rquotad
备节点 10.0.0.20
远程NFS归档目录(postgres用户读写权限
) /data/recoverydir/pg_arch
本地归档目录(postgres用户读写权限
) /data1/archivedir/pg_arch
备份目录(postgres用户读写权限
) /data1/pg_backup
日志目录(postgres用户读写权限
) /data1/pg_run_log ; ln -s /data1/pg_run_log /var/log/pg_run_log
df
/dev/sdb1 670G 48G 589G 8% /data1
10.0.0.10:/data1/archivedir 670G 66G 571G 11% /data/recoverydir
/etc/fstab
10.0.0.10:/data1/archivedir /data/recoverydir nfs rw,rsize=8192,wsize=8192,noatime 1 3
/etc/exports
/data1/archivedir 10.0.0.10/32(rw,sync,wdelay,no_root_squash,anonuid=0,anongid=0)
固定nfs监听端口,如修改 /etc/services
# Local services
mountd 845/tcp #rpc.mountd
mountd 842/udp #rpc.mountd
rquotad 790/tcp #rpc.rquotad
rquotad 787/udp #rpc.rquotad
# On 备库节点,配置rsync
# Rsyncd On Standby Database Server & Backup Server
cat /etc/rsyncd.postgres.conf
# Rsyncd On Standby Database Server & Backup Server
port = 873
hosts deny = 0.0.0.0/0
read only = false
write only = false
gid = 0
uid = 0
use chroot = no
max connections = 10
pid file = /var/run/rsync.pid
lock file = /var/run/rsync.lock
log file = /var/log/rsync.log
[pgdata]
path = /data1/pg_data
comment = Building Database Dir.
hosts allow = 10.0.0.20,10.0.0.10
[pgbackup]
path = /data1/pg_backup
comment = Database Backup Dir.
hosts allow = 10.0.0.20,10.0.0.10
# On 备库节点,开启rsync后台进程,如果配置修改的话重新执行一遍以下程序,表示reload
rsync -v --daemon --config=/etc/rsyncd.postgres.conf
# On 主库节点,开启归档
fsync = on
full_page_writes = on
archive_mode = on
archive_command = 'cp -f %p /data1/archivedir/pg_arch/%f 2>>/var/log/pg_run_log/archive_cp_5432.log'
archive_timeout = 300
# 如果原来archive_mode = off , 修改后需要重启数据库,否则只需要reload配置
# On 备库节点,新建standby, 1.删除已经存在的数据文件目录,(用于重新建立)
rm -rf /data1/pg_data
# On 主库节点,同步数据文件到备库
psql -h 127.0.0.1 postgres postgres -c "select pg_start_backup(now()::text);"
rsync -acvz --exclude=pg_xlog /data1/pg_data/* 10.0.0.20::pgdata
psql -h 127.0.0.1 postgres postgres -c "select pg_stop_backup();"
# On 备库节点,新建standby, 2.配置standby
# recovery.conf
restore_command = 'pg_standby -d -s 2 -t /tmp/pgsql.trigger.5432 /data/recoverydir/pg_arch %f %p 2>>/var/log/pg_run_log/restore_standby_5432.log'
recovery_target_timeline = 'latest'
recovery_end_command = 'rm -f /tmp/pgsql.trigger.5432'
# 新建目录
mkdir /data1/pg_data/pg_xlog
rm -f /data1/pg_data/recovery.done
chown -R postgres:postgres /data1/pg_data
chmod -R 700 /data1/pg_data
# 启动standby库,进入recovery模式
pg_ctl start -D /data1/pg_data
# 至此,standby就建立好了.
下面来谈谈怎么做备份 :
# On 主库节点,备份数据库,一周一次,crontab: 1 3 * * 2 /usr/local/postgres/backupsh/backup_database_5432.sh >>/var/log/pg_run_log/backup_database_5432.log 2>&1
vi /usr/local/postgres/backupsh/backup_database_5432.sh
chmod 500 /usr/local/postgres/backupsh/backup_database_5432.sh
#!/bin/bash
DATE=`date +%Y%m%d`
TIME=`date +%F`
echo -e "$TIME : select pg_start_backup();\n"
psql -h 127.0.0.1 postgres postgres -c "select pg_start_backup(now()::text);"
RESULT=$?
echo -e "TIME:selectpgstartbackup();result:RESULT\n"
if [ $RESULT -ne 0 ]; then
exit $RESULT
fi
echo -e "$TIME : Backup Database use rsync.\n"
rsync -acvz --exclude=pg_xlog /data1/pg_data/* 10.0.0.20::pgbackup/data_backup_$DATE
RESULT=$?
echo -e "TIME:BackupDatabaseusersync.result:RESULT\n"
echo -e "$TIME : select pg_stop_backup();\n"
psql -h 127.0.0.1 postgres postgres -c "select pg_stop_backup();"
RESULT=$?
echo -e "TIME:selectpgstopbackup();result:RESULT\n"
exit $RESULT
# On 主库节点,备份归档日志,一天一次,crontab: 1 1 * * * /usr/local/postgres/backupsh/backup_archive_5432.sh >>/var/log/pg_run_log/backup_archive_5432.log 2>&1
vi /usr/local/postgres/backupsh/backup_archive_5432.sh
chmod 500 /usr/local/postgres/backupsh/backup_archive_5432.sh
#!/bin/bash
TIME=`date +%F`
echo -e "$TIME : Backup archivelog use rsync.\n"
rsync -acvz /data1/archivedir/pg_arch/* 10.0.0.20::pgbackup/arch_backup
RESULT=$?
echo -e "TIME:Backuparchivelogusersync.result:RESULT\n"
exit $RESULT
# 至此,备份就做好了
下面来谈谈怎么做一个策略,定期清理历史备份.假设清理15天前的备份.
# On 备库节点,清理超过保留时间窗口的数据,一天一次,crontab: 1 6 * * * /usr/local/postgres/backupsh/purge_backup.sh >>/var/log/pg_run_log/purge_backup_5432.log 2>&1
vi /usr/local/postgres/backupsh/purge_backup_5432.sh
chmod 500 /usr/local/postgres/backupsh/purge_backup_5432.sh
#!/bin/bash
TIME=`date +%F`
echo -e "$TIME : Delete Archivelog From Primary Host Dir.\n"
find /data/recoverydir/pg_arch/* -mtime +15 -exec rm -rf {} \;
echo -e "$TIME : Delete Archivelog From Backup Host Dir.\n"
find /data1/pg_backup/arch_backup/* -mtime +15 -exec rm -rf {} \;
echo -e "$TIME : Delete Datafile From Backup Host Dir.\n"
for i in `ls -1rt /data1/pg_backup/|grep data_backup_|head --lines=-1`
do
rm -rf /data1/pg_backup/$i
done
# 至此,清楚历史备份也部署好了.
下面来谈谈PITR,基于时间点的恢复测试.
# On 备库节点,数据恢复测试,数据恢复
mkdir /data1/pg_backup/data_backup_$DATE/pg_xlog
chown -R postgres:postgres /data1/pg_backup
chmod 700 /data1/pg_backup/data_backup_$DATE
rm -f /data1/pg_backup/data_backup_$DATE/recovery.done
# 编辑配置文件postgresql.conf,修改监听端口与本地已经存在的端口分开.
port = 5433
archive_command = 'cp -f %p /data1/archivedir/pg_arch/%f 2>>/var/log/pg_run_log/archive_cp_5433.log'
# 编辑配置文件,recovery.conf
restore_command = 'pg_standby -d -s 2 -t /tmp/pgsql.trigger.5433 /data/recoverydir/pg_arch %f %p 2>>/var/log/pg_run_log/restore_standby_5433.log'
# recovery_target_timeline = 'latest'
# 以下时间来自数据库select now()这种的输出,然后推算;
recovery_target_time = '2011-07-20 11:13:15.64642+02'
recovery_target_inclusive = 'true'
recovery_end_command = 'rm -f /tmp/pgsql.trigger.5433'
# 启动备库开始恢复
pg_ctl start -D /data1/pg_backup/data_backup_$DATE
# 触发激活(三选一,空文件默认smart,fast表示立刻激活,不找下一个WAL文件.smart表示自动找下一个wal文件,找到就APPLY,直到没有下一个WAL了就激活)
touch /tmp/pgsql.trigger.5433
echo "fast" > /tmp/pgsql.trigger.5433
echo "smart" > /tmp/pgsql.trigger.5433
# On 备库节点,数据恢复测试,reindex HASH索引
# 至此,PITR的测试结束,
下面谈谈应该监控哪些日志.
# 监控日志文件
# 主库节点
/var/log/pg_run_log/archive_cp_5432.log
/var/log/pg_run_log/backup_database_5432.log
/var/log/pg_run_log/backup_archive_5432.log
# 备节点
/var/log/pg_run_log/restore_standby_5432.log
/var/log/rsync.log
/var/log/pg_run_log/purge_backup_5432.log
【note】
由于备份对系统的IO有一定影响,建议在执行备份脚本的时候或者脚本里面的rsync前面加上nice -n 19。这样对系统影响最小.
Nicenesses range from -20 (most favorable scheduling) to 19 (least favorable).
【补充】
其他测试点:
数据文件备份crontab脚本测试
归档文件备份crontab脚本测试
清理超过保留时间窗口的数据crontab脚本测试
时间点恢复测试,激活时smart和fast对比,是否达到一致效果
standby激活测试
存储空间测算
监控日志文件
时间线文件删掉之后是否可以做恢复,是否可以做standby等