简介
ceph从L版本开始新增了个功能叫crush class,又被称之为磁盘智能分组。因为这个功能就是根据磁盘类型自动进行属性关联,然后进行分类减少了很多的人为操作。在这个功能之前,如果我们需要对ssd和hdd进行分组的时候,需要大量的修改crushmap,然后绑定不同的存储池到不同的crush树上面,而这个功能让我们简化了这种逻辑。
ceph中的每个设备都可以选择一个class类型与之关联,通常有三种class类型:
- hdd
- ssd
- nvme
配置crush class
1. 创建ssd class
默认情况下,我们所有的osd都会class类型都是hdd:
root@ceph:~# ceph osd crush class ls
[
"hdd"
]
查看当前的osd布局:
root@ceph:~# ceph osd tree
ID CLASS WEIGHT TYPE NAME STATUS REWEIGHT PRI-AFF
-1 47.37482 root default
-3 11.84311 host ctnr.a1-56-11.pub.unp
0 hdd 1.81879 osd.0 up 1.00000 1.00000
25 hdd 1.81879 osd.25 up 1.00000 1.00000
26 hdd 1.81879 osd.26 up 1.00000 1.00000
27 hdd 1.81879 osd.27 up 1.00000 1.00000
28 hdd 1.81879 osd.28 up 1.00000 1.00000
29 hdd 1.81879 osd.29 up 1.00000 1.00000
30 hdd 0.46519 osd.30 up 1.00000 1.00000
31 hdd 0.46519 osd.31 up 1.00000 1.00000
-2 11.84430 host ctnr.a1-56-14.pub.unp
1 hdd 1.81898 osd.1 up 1.00000 1.00000
11 hdd 1.81898 osd.11 up 1.00000 1.00000
12 hdd 1.81898 osd.12 up 1.00000 1.00000
13 hdd 1.81898 osd.13 up 1.00000 1.00000
14 hdd 1.81898 osd.14 up 1.00000 1.00000
15 hdd 1.81898 osd.15 up 1.00000 1.00000
16 hdd 0.46519 osd.16 up 1.00000 1.00000
17 hdd 0.46519 osd.17 up 1.00000 1.00000
-7 11.84430 host ctnr.a1-56-15.pub.unp
2 hdd 1.81898 osd.2 up 1.00000 1.00000
3 hdd 1.81898 osd.3 up 1.00000 1.00000
5 hdd 1.81898 osd.5 up 1.00000 1.00000
6 hdd 1.81898 osd.6 up 1.00000 1.00000
7 hdd 1.81898 osd.7 up 1.00000 1.00000
8 hdd 1.81898 osd.8 up 1.00000 1.00000
9 hdd 0.46519 osd.9 up 1.00000 1.00000
10 hdd 0.46519 osd.10 up 1.00000 1.00000
-11 11.84311 host ctnr.a1-56-16.pub.unp
4 hdd 1.81879 osd.4 up 1.00000 1.00000
18 hdd 1.81879 osd.18 up 1.00000 1.00000
19 hdd 1.81879 osd.19 up 1.00000 1.00000
20 hdd 1.81879 osd.20 up 1.00000 1.00000
21 hdd 1.81879 osd.21 up 1.00000 1.00000
22 hdd 1.81879 osd.22 up 1.00000 1.00000
23 hdd 0.46519 osd.23 up 1.00000 1.00000
24 hdd 0.46519 osd.24 up 1.00000 1.00000
可以看到,当前有四个osd节点,每个节点上有8个osd,我们假设说每个节点上的最后两个osd为ssd磁盘。现在需要为其创建ssd的class。我们需要先将所有的ssd的osd从hdd class中删除:
for i in 30 31 16 17 9 10 23 24; do ceph osd crush rm-device-class osd.$i;done
这个时候,如果我们再次使用ceph osd tree
查看osd布局,会看到被我们指定的osd前面不再有hdd标识,事实上啥也没有了。
此时可通过如下指令将这些osd添加至ssd class:
for i in 30 31 16 17 9 10 23 24; do ceph osd crush set-device-class ssd osd.$i;done
添加完成之后,我们再次查看osd布局:
root@ceph:~# ceph osd tree
ID CLASS WEIGHT TYPE NAME STATUS REWEIGHT PRI-AFF
-1 47.37482 root default
-3 11.84311 host ctnr.a1-56-11.pub.unp
0 hdd 1.81879 osd.0 up 1.00000 1.00000
25 hdd 1.81879 osd.25 up 1.00000 1.00000
26 hdd 1.81879 osd.26 up 1.00000 1.00000
27 hdd 1.81879 osd.27 up 1.00000 1.00000
28 hdd 1.81879 osd.28 up 1.00000 1.00000
29 hdd 1.81879 osd.29 up 1.00000 1.00000
30 ssd 0.46519 osd.30 up 1.00000 1.00000
31 ssd 0.46519 osd.31 up 1.00000 1.00000
-2 11.84430 host ctnr.a1-56-14.pub.unp
1 hdd 1.81898 osd.1 up 1.00000 1.00000
11 hdd 1.81898 osd.11 up 1.00000 1.00000
12 hdd 1.81898 osd.12 up 1.00000 1.00000
13 hdd 1.81898 osd.13 up 1.00000 1.00000
14 hdd 1.81898 osd.14 up 1.00000 1.00000
15 hdd 1.81898 osd.15 up 1.00000 1.00000
16 ssd 0.46519 osd.16 up 1.00000 1.00000
17 ssd 0.46519 osd.17 up 1.00000 1.00000
-7 11.84430 host ctnr.a1-56-15.pub.unp
2 hdd 1.81898 osd.2 up 1.00000 1.00000
3 hdd 1.81898 osd.3 up 1.00000 1.00000
5 hdd 1.81898 osd.5 up 1.00000 1.00000
6 hdd 1.81898 osd.6 up 1.00000 1.00000
7 hdd 1.81898 osd.7 up 1.00000 1.00000
8 hdd 1.81898 osd.8 up 1.00000 1.00000
9 ssd 0.46519 osd.9 up 1.00000 1.00000
10 ssd 0.46519 osd.10 up 1.00000 1.00000
-11 11.84311 host ctnr.a1-56-16.pub.unp
4 hdd 1.81879 osd.4 up 1.00000 1.00000
18 hdd 1.81879 osd.18 up 1.00000 1.00000
19 hdd 1.81879 osd.19 up 1.00000 1.00000
20 hdd 1.81879 osd.20 up 1.00000 1.00000
21 hdd 1.81879 osd.21 up 1.00000 1.00000
22 hdd 1.81879 osd.22 up 1.00000 1.00000
23 ssd 0.46519 osd.23 up 1.00000 1.00000
24 ssd 0.46519 osd.24 up 1.00000 1.00000
可以看到我们选定的osd的class都变为了ssd。
然后我们再次查看crush class,也多出了一个名为ssd的class:
root@ceph:~# ceph osd crush class ls
[
"hdd",
"ssd"
]
2. 创建基于ssd的class rule
创建一个class rule,取名为ssd_rule,使用ssd的osd:
ceph osd crush rule create-replicated ssd_rule default host ssd
查看集群rule:
root@ceph:~# ceph osd crush rule ls
replicated_rule
ssd_rule
通过如下方式查看详细的crushmap信息:
root@ceph:~# ceph osd getcrushmap -o crushmap
172
root@ceph:~# crushtool -d crushmap -o crushmap.txt
root@ceph:~# cat crushmap.txt
# begin crush map
tunable choose_local_tries 0
tunable choose_local_fallback_tries 0
tunable choose_total_tries 50
tunable chooseleaf_descend_once 1
tunable chooseleaf_vary_r 1
tunable straw_calc_version 1
tunable allowed_bucket_algs 54
# devices
device 0 osd.0 class hdd
device 1 osd.1 class hdd
device 2 osd.2 class hdd
device 3 osd.3 class hdd
device 4 osd.4 class hdd
device 5 osd.5 class hdd
device 6 osd.6 class hdd
device 7 osd.7 class hdd
device 8 osd.8 class hdd
device 9 osd.9 class ssd
device 10 osd.10 class ssd
device 11 osd.11 class hdd
device 12 osd.12 class hdd
device 13 osd.13 class hdd
device 14 osd.14 class hdd
device 15 osd.15 class hdd
device 16 osd.16 class ssd
device 17 osd.17 class ssd
device 18 osd.18 class hdd
device 19 osd.19 class hdd
device 20 osd.20 class hdd
device 21 osd.21 class hdd
device 22 osd.22 class hdd
device 23 osd.23 class ssd
device 24 osd.24 class ssd
device 25 osd.25 class hdd
device 26 osd.26 class hdd
device 27 osd.27 class hdd
device 28 osd.28 class hdd
device 29 osd.29 class hdd
device 30 osd.30 class ssd
device 31 osd.31 class ssd
# types
type 0 osd
type 1 host
type 2 chassis
type 3 rack
type 4 row
type 5 pdu
type 6 pod
type 7 room
type 8 datacenter
type 9 region
type 10 root
# buckets
host ctnr.a1-56-11.pub.unp {
id -3 # do not change unnecessarily
id -4 class hdd # do not change unnecessarily
id -9 class ssd # do not change unnecessarily
# weight 11.843
alg straw2
hash 0 # rjenkins1
item osd.0 weight 1.819
item osd.25 weight 1.819
item osd.26 weight 1.819
item osd.27 weight 1.819
item osd.28 weight 1.819
item osd.29 weight 1.819
item osd.30 weight 0.465
item osd.31 weight 0.465
}
host ctnr.a1-56-15.pub.unp {
id -7 # do not change unnecessarily
id -8 class hdd # do not change unnecessarily
id -10 class ssd # do not change unnecessarily
# weight 11.844
alg straw2
hash 0 # rjenkins1
item osd.2 weight 1.819
item osd.3 weight 1.819
item osd.5 weight 1.819
item osd.6 weight 1.819
item osd.7 weight 1.819
item osd.8 weight 1.819
item osd.9 weight 0.465
item osd.10 weight 0.465
}
host ctnr.a1-56-14.pub.unp {
id -2 # do not change unnecessarily
id -5 class hdd # do not change unnecessarily
id -13 class ssd # do not change unnecessarily
# weight 11.844
alg straw2
hash 0 # rjenkins1
item osd.1 weight 1.819
item osd.11 weight 1.819
item osd.12 weight 1.819
item osd.13 weight 1.819
item osd.14 weight 1.819
item osd.15 weight 1.819
item osd.16 weight 0.465
item osd.17 weight 0.465
}
host ctnr.a1-56-16.pub.unp {
id -11 # do not change unnecessarily
id -12 class hdd # do not change unnecessarily
id -14 class ssd # do not change unnecessarily
# weight 11.843
alg straw2
hash 0 # rjenkins1
item osd.4 weight 1.819
item osd.18 weight 1.819
item osd.19 weight 1.819
item osd.20 weight 1.819
item osd.21 weight 1.819
item osd.22 weight 1.819
item osd.23 weight 0.465
item osd.24 weight 0.465
}
root default {
id -1 # do not change unnecessarily
id -6 class hdd # do not change unnecessarily
id -15 class ssd # do not change unnecessarily
# weight 47.375
alg straw2
hash 0 # rjenkins1
item ctnr.a1-56-11.pub.unp weight 11.843
item ctnr.a1-56-15.pub.unp weight 11.844
item ctnr.a1-56-14.pub.unp weight 11.844
item ctnr.a1-56-16.pub.unp weight 11.843
}
# rules
rule replicated_rule {
id 0
type replicated
min_size 1
max_size 10
step take default
step chooseleaf firstn 0 type host
step emit
}
rule ssd_rule {
id 1
type replicated
min_size 1
max_size 10
step take default class ssd
step chooseleaf firstn 0 type host
step emit
}
# end crush map
3. 创建基于ssd_rule规则的存储池
- 创建一个基于该ssdh_rule规则的存储池:
ceph osd pool create cache 64 64 ssd_rule
# 查看cache的信息可以看到使用的crush_rule为1,也就是ssd_rule
root@ceph:~# ceph osd pool get cache crush_rule
crush_rule: ssd_rule
- 将一个现有的池迁移至ssd的osd上:
ceph osd pool set cephfs_metadata crush_rule ssd_rule
root@ceph:~# ceph osd pool get cephfs_metadata crush_rule
crush_rule: ssd_rule
4. 测试基于ssd的池
root@ceph:~# echo "hello world" > test.txt
root@ceph:~# rados -p cache put test test.txt
root@ceph:~# rados -p cache get test
root@ceph:~# rados -p cache ls |grep test
test
root@ceph:~# ceph osd map cache test
osdmap e3156 pool 'cache' (4) object 'test' -> pg 4.40e8aab5 (4.35) -> up ([23,30,16], p23) acting ([23,30,16], p23)