MongoDB在启用复制集(Replica Set)功能后,原先一个简单的索引添加,在之上会变得相对复杂,尤其是在数据量巨大的时候,需要考虑尽可能将性能影响降低到最小。基于此我们需要采取逐个节点创建索引的方式来达成。如下本文描述。
一、复制集索引创建的过程
MongoDB从节点上复制集上索引的创建,通常是在主节点索引创建完成之后。
在分片集群环境中,mongos将发送createindex()命令到每一个shard的主成员节点,
当主副本成员完成索引创建后,辅助副本开始创建索引。
二、如何最小化复制集上索引创建的影响?
确保你的oplog size足够大以允许索引或索引操作完成而不至于从节点有严重的滞后。
在索引创建期间,一个辅助成员脱离复制集,这将在某一个时间仅仅影响复制集的一个成员,而不是同时影响到所有的复制集成员
在2.6版本之前的后台索引创建操作在复制复制集成员上变成前台索引操作。2.6版本后则是后台创建索引。
三、复制集最小化索引创建影响的步骤
1、停止一个从复制集节点
停止一个复制集从节点mongod进程,并重启这个mongod(启动时不使用--replSet选项,且使用一个不同的端口
即将从节点复制集实例变成一个单实例
假定当前的某个复制集实例运行在缺省端口27017且使用了--replSet
停止后启动这个实例
mongod --port 47017(使用不同端口的目的是防止在索引创建期间有其他进程直接连接到本实例)
2、创建索引
db.records.createIndex( { username: 1 } )
3、重启mongod实例
当索引创建完成后,重启mongod实例,回复到复制集状态,并且使得该副本追上主副本
mongod --port 27017 --replSet rs0
4、在所有的从节点上按照步骤1-3的方式来创建索引
5、在主复制集节点上创建索引(以下2中方式可选)
在主节点以后台的方式创建索引
在mongo shell使用rs.stepDown()平滑的停止主节点从而引发选取,以此产生一个新的主节点
然后重复上面的步骤1-3的方式来创建索引
四、演示复制集创建索引
//演示环境描述
//一主二从
//192.168.1.244:27000(PRIMARY)
//192.168.1.244:27001(SECONDARY)
//192.168.1.244:27002(SECONDARY)
1、主节点上复制集相关信息
repSetTest:PRIMARY> db.version()
3.2.11
//当前的users集合记录有100w
repSetTest:PRIMARY> db.users.count()
1000000
//当前的users集合无自定义索引
repSetTest:PRIMARY> db.users.getIndexes()
[
{
"v" : 1,
"key" : {
"_id" : 1
},
"name" : "_id_",
"ns" : "test.users"
}
]
//整个复制集的状态信息
repSetTest:PRIMARY> rs.status()
{
"set" : "repSetTest",
"date" : ISODate("2017-03-10T06:43:37.545Z"),
"myState" : 1,
"term" : NumberLong(1),
"heartbeatIntervalMillis" : NumberLong(2000),
"members" : [
{
"_id" : 0,
"name" : "localhost:27000",
"health" : 1,
"state" : 1,
"stateStr" : "PRIMARY",
.........
},
{
"_id" : 1,
"name" : "localhost:27001",
"health" : 1,
"state" : 2,
"stateStr" : "SECONDARY",
........
},
{
"_id" : 2,
"name" : "localhost:27002",
"health" : 1,
"state" : 2,
"stateStr" : "SECONDARY",
.......
}
],
"ok" : 1
}
//oplog
repSetTest:PRIMARY> rs.printReplicationInfo()
configured oplog size: 128MB
log length start to end: 201secs (0.06hrs)
oplog first event time: Fri Mar 10 2017 14:17:57 GMT+0800 (CST)
oplog last event time: Fri Mar 10 2017 14:21:18 GMT+0800 (CST)
now: Fri Mar 10 2017 14:44:00 GMT+0800 (CST)
2、从节点27001上添加索引
//关闭从节点27001
# mongo localhost:27001
MongoDB shell version: 3.2.11
connecting to: localhost:27001/test
repSetTest:SECONDARY> use admin
switched to db admin
repSetTest:SECONDARY> db.shutdownServer()
server should be down...
//使用一个新端口(27100),不带复制集方式启动
# mongod --dbpath /app/data/n2 --logpath /app/data/n2/n2.log --port 27100 \
> --smallfiles --oplogSize 128 --fork
# mongo localhost:27100
MongoDB shell version: 3.2.11
connecting to: localhost:27100/test
> db.users.count()
1000000
> db.users.find({},{_id:0}).limit(1)
{ "i" : 0, "username" : "user0", "age" : 109, "created" : ISODate("2017-03-10T06:17:19.168Z") }
//为集合users添加索引
> db.users.createIndex({username:1,created:1})
{
"createdCollectionAutomatically" : false,
"numIndexesBefore" : 1,
"numIndexesAfter" : 2,
"ok" : 1
}
//添加索引期间,立马在主节点插入数据(以下部分在主节点执行)
# mongo localhost:27000
MongoDB shell version: 3.2.11
connecting to: localhost:27000/test
//为原集合在主节点新增数据
repSetTest:PRIMARY> load('filling200w.js')
//在从节点27100(原27001)查看索引创建完毕后的大小
> db.stats()
{
"db" : "test",
"collections" : 3,
"objects" : 1039732,
"avgObjSize" : 89.7096626823066,
"dataSize" : 93274007,
"storageSize" : 29040640,
"numExtents" : 0,
"indexes" : 5,
"indexSize" : 28250112, //当前所有索引的大小
"ok" : 1
}
//关闭创建完索引的从节点(27100)
> use admin
switched to db admin
> db.shutdownServer()
//将创建完索引的节点切回到从
# mongod --replSet repSetTest --dbpath /app/data/n2 --logpath /app/data/n2/n2.log \
> --port 27001 --smallfiles --oplogSize 128 --fork
# mongo localhost:27001
MongoDB shell version: 3.2.11
connecting to: localhost:27001/test
repSetTest:SECONDARY> rs.slaveOk()
repSetTest:SECONDARY> db.users.count() //可以看到当前的27001从节点记录的增加
1000038 //也就是从节点已经开始apply oplog
repSetTest:SECONDARY> db.users.count()
1000040 //集合上的文档在持续被同步
repSetTest:SECONDARY> db.stats()
{
"db" : "test",
"collections" : 3,
"objects" : 1039773,
"avgObjSize" : 89.70963469911221,
"dataSize" : 93277656,
"storageSize" : 29040640,
"numExtents" : 0,
"indexes" : 5,
"indexSize" : 28291072, //索引的尺寸在增加,也就是说新增的记录都被添加了索引
"ok" : 1
}
3、从节点27002上添加索引
//关闭从节点27002
# mongo localhost:27002
repSetTest:SECONDARY> use admin
repSetTest:SECONDARY> db.shutdownServer()
//使用新的端口27200,不带复制集启动从节点
# mongod --dbpath /app/data/n3 --logpath /app/data/n3/n3.log --port 27200 \
> --smallfiles --oplogSize 128 --fork
# mongo localhost:27200
//此时当前节点上的数据为1000054
> db.users.count()
1000054
//查看索引信息,没有自定义的索引
> db.users.getIndexes()
[
{
"v" : 1,
"key" : {
"_id" : 1
},
"name" : "_id_",
"ns" : "test.users"
}
]
//为该节点集合users添加索引
> db.users.createIndex({username:1,created:1})
{
"createdCollectionAutomatically" : false,
"numIndexesBefore" : 1,
"numIndexesAfter" : 2,
"ok" : 1
}
//创建完毕后,关闭数据库
> use admin
> db.shutdownServer()
//以复制集方式启动,切回从节点状态
# mongod --replSet repSetTest --dbpath /app/data/n3 --logpath /app/data/n3/n3.log \
> --port 27002 --smallfiles --oplogSize 128 --fork
//基于27002查看从节点状态
[root@ydq05 ~]# mongo localhost:27002
MongoDB shell version: 3.2.11
connecting to: localhost:27002/test
repSetTest:SECONDARY> rs.status()
{
"set" : "repSetTest",
"date" : ISODate("2017-03-10T08:16:29.440Z"),
"myState" : 2,
"term" : NumberLong(1),
"syncingTo" : "localhost:27001",
"heartbeatIntervalMillis" : NumberLong(2000),
"members" : [
{
"_id" : 0,
"name" : "localhost:27000",
"health" : 1,
"state" : 1,
"stateStr" : "PRIMARY",
"uptime" : 22,
......
},
{
"_id" : 1,
"name" : "localhost:27001",
"health" : 1,
"state" : 2,
"stateStr" : "SECONDARY",
..........
},
{
"_id" : 2,
"name" : "localhost:27002",
"health" : 1,
"state" : 2,
"stateStr" : "SECONDARY",
....
}
],
"ok" : 1
}
4、主从节点切换
//从另外一个客户端连接到主节点27000
//可以看到当前主节点27000上users集合文档在不停增加
repSetTest:PRIMARY> db.users.count()
1000129
//强制关闭该主节点
repSetTest:PRIMARY> use admin
switched to db admin
repSetTest:PRIMARY> db.shutdownServer()
server should be down...
//此时之前连接的主节点添加文档报错,如下
repSetTest:PRIMARY> load('filling200w.js')
2017-03-10T16:17:31.513+0800 I NETWORK [thread1] DBClientCursor::init call() failed
2017-03-10T16:17:31.514+0800 E QUERY [thread1] Error: Error: error doing query: failed :
DBCollection.prototype.insert@src/mongo/shell/collection.js:358:23
@filling200w.js:2:1
@(shell):1:1
//节点已经发生切换,如下
repSetTest:SECONDARY> rs.status()
{
"set" : "repSetTest",
"date" : ISODate("2017-03-10T08:18:29.368Z"),
"myState" : 2,
"term" : NumberLong(2),
"syncingTo" : "localhost:27001",
"heartbeatIntervalMillis" : NumberLong(2000),
"members" : [ //Author : Leshami
{ //Blog : http://blog.csdn.net/leshami
"_id" : 0,
"name" : "localhost:27000",
"health" : 0,
"state" : 8,
"stateStr" : "(not reachable/healthy)",//原主节点不可达
"uptime" : 0,
"optime" : {
"ts" : Timestamp(0, 0),
"t" : NumberLong(-1)
},
"optimeDate" : ISODate("1970-01-01T00:00:00Z"),
"lastHeartbeat" : ISODate("2017-03-10T08:18:27.674Z"),
"lastHeartbeatRecv" : ISODate("2017-03-10T08:17:28.709Z"),
"pingMs" : NumberLong(0),
"lastHeartbeatMessage" : "Connection refused",
"configVersion" : -1
},
{
"_id" : 1,
"name" : "localhost:27001",
"health" : 1,
"state" : 1,
"stateStr" : "PRIMARY", //当前为主节点
.....
},
{
"_id" : 2,
"name" : "localhost:27002",
"health" : 1,
"state" : 2,
"stateStr" : "SECONDARY",
...
}
],
"ok" : 1
}
//连接到新主节点27001
[root@ydq05 ~]# mongo localhost:27001
MongoDB shell version: 3.2.11
connecting to: localhost:27001/test
//当前的文档数位1000131,也就是说原主节点停机时刻最终的文档数位1000131
repSetTest:PRIMARY> db.users.count()
1000131
5、原主节点添加索引
//启动原主节点(方式同上,不带复制集方式)
# mongod --dbpath /app/data/n1 --logpath /app/data/n1/n1.log \
--port 27300 --smallfiles --oplogSize 128 --fork
//连接到27300
# mongo localhost:27300
MongoDB shell version: 3.2.11
connecting to: localhost:27300/test
repSetTest:SECONDARY> rs.slaveOk()
repSetTest:SECONDARY> db.users.count() //此时原主上的数据也为1000131
1000131
> db.users.getIndexes() //原主节点
[
{
"v" : 1,
"key" : {
"_id" : 1
},
"name" : "_id_",
"ns" : "test.users"
}
]
> db.users.createIndex({username:1,created:1})
{
"createdCollectionAutomatically" : false,
"numIndexesBefore" : 1,
"numIndexesAfter" : 2,
"ok" : 1
}
> use admin
> db.shutdownServer()
//将原主节点切回到复制集
[root@ydq05 ~]# mongod --replSet repSetTest --dbpath /app/data/n1 --logpath /app/data/n1/n1.log \
> --port 27000 --smallfiles --oplogSize 128 --fork
6、后记
a、本次演示模拟了依旧有客户端对原集合进行操作,真实生产环境应尽量在业务低峰期操作
b、在发生主从切换时,使用rs.stepDown()平滑切换,本次操作未按此方式
c、发生切换期间可以采用VIP漂移或者暂停业务
d、索引添加期间应考虑有启用从库读的问题