Luminous版本的ceph新增了一个crush class的功能,可以为新增的设备指定为类,创建rule的时候直接指定class即可。
实验环境
[root@node1 ~]# cat /etc/redhat-release
CentOS Linux release 7.5.1804 (Core)
[root@node1 ~]# ceph --version
ceph version 12.2.9 (9e300932ef8a8916fb3fda78c58691a6ab0f4217) luminous (stable)
由于实验环境中并没有ssd磁盘,所以这里操作时假设每一个主机有一块ssd盘,操作时手动把对应的osd调整class标签。
修改crush class
1、查看当前osd分布情况
[root@node1 ~]# ceph osd tree
ID CLASS WEIGHT TYPE NAME STATUS REWEIGHT PRI-AFF
-1 0.74797 root default
-15 0.21999 rack rack1
-9 0.08800 host node1
3 hdd 0.04399 osd.3 up 1.00000 1.00000
10 hdd 0.04399 osd.10 up 0.98001 1.00000
11 hdd 0.04399 osd.11 up 0.96002 1.00000
-3 0.13199 host storage-0
4 hdd 0.04399 osd.4 up 0.96002 1.00000
5 hdd 0.04399 osd.5 up 0.96002 1.00000
14 hdd 0.04399 osd.14 up 0.98001 1.00000
-16 0.26399 rack rack2
-5 0.13199 host node2
0 hdd 0.04399 osd.0 up 0.98628 1.00000
6 hdd 0.04399 osd.6 up 1.00000 1.00000
16 hdd 0.04399 osd.16 up 1.00000 1.00000
-7 0.13199 host storage-1
2 hdd 0.04399 osd.2 up 1.00000 1.00000
8 hdd 0.04399 osd.8 up 1.00000 1.00000
12 hdd 0.04399 osd.12 up 1.00000 1.00000
-17 0.26399 rack rack3
-11 0.13199 host node3
1 hdd 0.04399 osd.1 up 1.00000 1.00000
7 hdd 0.04399 osd.7 up 1.00000 1.00000
15 hdd 0.04399 osd.15 up 1.00000 1.00000
-13 0.13199 host storage-2
9 hdd 0.04399 osd.9 up 1.00000 1.00000
13 hdd 0.04399 osd.13 up 1.00000 1.00000
17 hdd 0.04399 osd.17 up 0.96002 1.00000
2、查看当前集群的crush class
[root@node1 ~]# ceph osd crush class ls
[
"hdd",
]
3、删除osd.0,osd.1,osd.2,osd.3,osd.4,osd.9的class
[root@node1 ~]# for i in 0 1 2 3 4 9 ; do ceph osd crush rm-device-class osd.$i ; done
done removing class of osd(s): 0
done removing class of osd(s): 1
done removing class of osd(s): 2
done removing class of osd(s): 3
done removing class of osd(s): 4
done removing class of osd(s): 9
[root@node1 ~]# ceph osd tree
ID CLASS WEIGHT TYPE NAME STATUS REWEIGHT PRI-AFF
-1 0.74797 root default
-15 0.21999 rack rack1
-9 0.08800 host node1
3 0.04399 osd.3 up 1.00000 1.00000
10 hdd 0.04399 osd.10 up 0.98001 1.00000
11 hdd 0.04399 osd.11 up 0.96002 1.00000
-3 0.13199 host storage-0
4 0.04399 osd.4 up 0.96002 1.00000
5 hdd 0.04399 osd.5 up 0.96002 1.00000
14 hdd 0.04399 osd.14 up 0.98001 1.00000
-16 0.26399 rack rack2
-5 0.13199 host node2
0 0.04399 osd.0 up 0.98628 1.00000
6 hdd 0.04399 osd.6 up 1.00000 1.00000
16 hdd 0.04399 osd.16 up 1.00000 1.00000
-7 0.13199 host storage-1
2 0.04399 osd.2 up 1.00000 1.00000
8 hdd 0.04399 osd.8 up 1.00000 1.00000
12 hdd 0.04399 osd.12 up 1.00000 1.00000
-17 0.26399 rack rack3
-11 0.13199 host node3
1 0.04399 osd.1 up 1.00000 1.00000
7 hdd 0.04399 osd.7 up 1.00000 1.00000
15 hdd 0.04399 osd.15 up 1.00000 1.00000
-13 0.13199 host storage-2
9 0.04399 osd.9 up 1.00000 1.00000
13 hdd 0.04399 osd.13 up 1.00000 1.00000
17 hdd 0.04399 osd.17 up 0.96002 1.00000
4、设置osd.0,osd.1,osd.2,osd.3,osd.4,osd.9的class为ssd
[root@node1 ~]# for i in 0 1 2 3 4 9 ; do ceph osd crush set-device-class ssd osd.$i ; done
set osd(s) 0 to class 'ssd'
set osd(s) 1 to class 'ssd'
set osd(s) 2 to class 'ssd'
set osd(s) 3 to class 'ssd'
set osd(s) 4 to class 'ssd'
set osd(s) 9 to class 'ssd'
[root@node1 ~]# ceph osd tree
ID CLASS WEIGHT TYPE NAME STATUS REWEIGHT PRI-AFF
-1 0.74797 root default
-15 0.21999 rack rack1
-9 0.08800 host node1
10 hdd 0.04399 osd.10 up 0.98001 1.00000
11 hdd 0.04399 osd.11 up 0.96002 1.00000
3 ssd 0.04399 osd.3 up 1.00000 1.00000
-3 0.13199 host storage-0
5 hdd 0.04399 osd.5 up 0.96002 1.00000
14 hdd 0.04399 osd.14 up 0.98001 1.00000
4 ssd 0.04399 osd.4 up 0.96002 1.00000
-16 0.26399 rack rack2
-5 0.13199 host node2
6 hdd 0.04399 osd.6 up 1.00000 1.00000
16 hdd 0.04399 osd.16 up 1.00000 1.00000
0 ssd 0.04399 osd.0 up 0.98628 1.00000
-7 0.13199 host storage-1
8 hdd 0.04399 osd.8 up 1.00000 1.00000
12 hdd 0.04399 osd.12 up 1.00000 1.00000
2 ssd 0.04399 osd.2 up 1.00000 1.00000
-17 0.26399 rack rack3
-11 0.13199 host node3
7 hdd 0.04399 osd.7 up 1.00000 1.00000
15 hdd 0.04399 osd.15 up 1.00000 1.00000
1 ssd 0.04399 osd.1 up 1.00000 1.00000
-13 0.13199 host storage-2
13 hdd 0.04399 osd.13 up 1.00000 1.00000
17 hdd 0.04399 osd.17 up 0.96002 1.00000
9 ssd 0.04399 osd.9 up 1.00000 1.00000
5、再次查看crush class,多了个ssd
[root@node1 ~]# ceph osd crush class ls
[
"hdd",
"ssd"
]
6、创建一个优先使用ssd的crush rule
[root@node1 ~]# ceph osd crush rule create-replicated rule-ssd default rack ssd
[root@node1 ~]# ceph osd crush rule dump rule-ssd
{
"rule_id": 1,
"rule_name": "rule-ssd",
"ruleset": 1,
"type": 1,
"min_size": 1,
"max_size": 10,
"steps": [
{
"op": "take",
"item": -30,
"item_name": "default~ssd"
},
{
"op": "chooseleaf_firstn",
"num": 0,
"type": "rack"
},
{
"op": "emit"
}
]
}
验证
方法一:
1、获取crushmap
[root@node1 monmap]# ceph osd getcrushmap -o monmap
60
2、反编译crushmap
[root@node1 monmap]# crushtool -d monmap -o monmap.txt
[root@node1 monmap]# cat monmap.txt
…………
rule rule-ssd {
id 1
type replicated
min_size 1
max_size 10
step take default class ssd
step chooseleaf firstn 0 type rack
step emit
}
可以看到在crushmap中多了一项rule-ssd的crush rule
3、测试
[root@node1 monmap]# crushtool -i monmap --test --min-x 0 --max-x 9 --num-rep 3 --ruleset 1 --show_mappings
CRUSH rule 1 x 0 [3,2,9]
CRUSH rule 1 x 1 [2,4,9]
CRUSH rule 1 x 2 [1,4,0]
CRUSH rule 1 x 3 [9,0,3]
CRUSH rule 1 x 4 [2,9,3]
CRUSH rule 1 x 5 [1,2,4]
CRUSH rule 1 x 6 [1,3,0]
CRUSH rule 1 x 7 [1,0,4]
CRUSH rule 1 x 8 [0,4,1]
CRUSH rule 1 x 9 [0,1,3]
从测试结果可以看到一共10次的测试中,所有的副本所在的OSD都是前面设置了class为ssd的OSD
方法二:
1、创建名为ssdtest的pool,并设置crush rule为rule-ssd
[root@node1 monmap]# ceph osd pool create ssdtest 64 64 rule-ssd
pool 'ssdtest' created
[root@node1 ~]# ceph osd pool get ssdtest crush_rule
crush_rule: rule-ssd
2、上传对象
[root@node1 ~]# rados -p ssdtest put init.txt init.sh
3、查询对象的OSD组
[root@node1 ~]# ceph osd map ssdtest init.txt
osdmap e286 pool 'ssdtest' (10) object 'init.txt' -> pg 10.66387d99 (10.19) -> up ([9,2,3], p9) acting ([9,2,3], p9)
从查询结果可以看出,对象所在的OSD,是class为ssd的OSD