背景
Ceph 集群在运行一段时间后常会碰到OSD 数据不均衡的时候,有的OSD 使用率超过的80%,有的甚至不足60%。一般有两种方法去均衡各个OSDs 间的数据
OSD Reweight
其实就是给各个OSDs 设置均衡权重(区别OSD weight 是根据容量设置的固定权重)
调整数据量超过阀值的OSD的权重,阀值默认值为120%。
ceph osd reweight-by-utilization [threshold]
若要预览效果,则可以使用以下命令:
ceph osd test-reweight-by-utilization [threshold]
当然,也可以根据每个OSD上的PG数量来调整,阀值默认值也是120%。
ceph osd reweight-by-pg [threshold]
若要预览效果,则可以使用以下命令:
ceph osd test-reweight-by-pg [threshold]
Ceph Balancer
从Luminous 开始,Ceph 新增的了balancer ,可以将PG 在各个OSD上自动迁移,已达到均衡的效果。推荐使用。
1)查看balancer 模块状态
ceph mgr module ls # 查看各个模块的状态
ceph balancer status
2)启用balancer 模块(默认enable)
ceph mgr module enable balancer
3)启用均衡(默认active 是false)
ceph balancer on
4)设置模式(修改PG mapping)
ceph balancer mode upmap
或设置模式(修改weight)
ceph balancer mode crush-compat
其中upmap 模式设置兼容版本
ceph osd set-require-min-compat-client luminous
Luminous 需要手动设置均衡计划?
ceph balancer eval #对集群所有pool进行权重调整计算,针对特定pool 采用
ceph balancer eval <POOL_NAME>
ceph balancer optimize plan2 #生成一个调优配置,或指定存储池ceph balancer optimize plan2 default.rgw.buckets.data
ceph balancer eval plan2 #执行调优计算
ceph balancer show plan2 #查看调优计算最终的结果
ceph balancer execute plan2 #根据上面模拟计算的结果,执行最终的权重调整
5)查看调整结果
ceph osd df
6)关闭自动调整
ceph balancer off
ceph balancer status
OSD PG 数统计脚本:包含osd pool的排序,包含osd的排序,输出平均pg数目,输出最大的osd编号,输出最大超过平均值的百分比,输出最少pg的osd编号,输出最小低于平均值的百分比,
用于辅助查看集群各个OSD 的PG 分布情况,参考武汉-磨渣的博客《查询osd上的pg数》
ceph pg dump | awk '
/^PG_STAT/ { col=1; while($col!="UP") {col++}; col++ }
/^[0-9a-f]+.[0-9a-f]+/ { match($0,/^[0-9a-f]+/); pool=substr($0, RSTART, RLENGTH); poollist[pool]=0;
up=$col; i=0; RSTART=0; RLENGTH=0; delete osds; while(match(up,/[0-9]+/)>0) { osds[++i]=substr(up,RSTART,RLENGTH); up = substr(up, RSTART+RLENGTH) }
for(i in osds) {array[osds[i],pool]++; osdlist[osds[i]];}
}
END {
printf("
");
slen=asorti(poollist,newpoollist);
printf("pool : ");for (i=1;i<=slen;i++) {printf("%s ", newpoollist[i])}; printf("| SUM
");
for (i in poollist) printf("--------"); printf("----------------
");
slen1=asorti(osdlist,newosdlist)
delete poollist;
for (j=1;j<=slen;j++) {maxpoolosd[j]=0};
for (j=1;j<=slen;j++) {for (i=1;i<=slen1;i++){if (array[newosdlist[i],newpoollist[j]] >0 ){minpoolosd[j]=array[newosdlist[i],newpoollist[j]] ;break } }};
for (i=1;i<=slen1;i++) { printf("osd.%i ", newosdlist[i]); sum=0;
for (j=1;j<=slen;j++) { printf("%i ", array[newosdlist[i],newpoollist[j]]); sum+=array[newosdlist[i],newpoollist[j]]; poollist[j]+=array[newosdlist[i],newpoollist[j]];if(array[newosdlist[i],newpoollist[j]] != 0){poolhasid[j]+=1 };if(array[newosdlist[i],newpoollist[j]]>maxpoolosd[j]){maxpoolosd[j]=array[newosdlist[i],newpoollist[j]];maxosdid[j]=newosdlist[i]};if(array[newosdlist[i],newpoollist[j]] != 0){if(array[newosdlist[i],newpoollist[j]]<=minpoolosd[j]){minpoolosd[j]=array[newosdlist[i],newpoollist[j]];minosdid[j]=newosdlist[i]}}}; printf("| %i
",sum)} for (i in poollist) printf("--------"); printf("----------------
");
slen2=asorti(poollist,newpoollist);
printf("SUM : "); for (i=1;i<=slen;i++) printf("%s ",poollist[i]); printf("|
");
printf("Osd : "); for (i=1;i<=slen;i++) printf("%s ",poolhasid[i]); printf("|
");
printf("AVE : "); for (i=1;i<=slen;i++) printf("%.2f ",poollist[i]/poolhasid[i]); printf("|
");
printf("Max : "); for (i=1;i<=slen;i++) printf("%s ",maxpoolosd[i]); printf("|
");
printf("Osdid : "); for (i=1;i<=slen;i++) printf("osd.%s ",maxosdid[i]); printf("|
");
printf("per: "); for (i=1;i<=slen;i++) printf("%.1f% ",100*(maxpoolosd[i]-poollist[i]/poolhasid[i])/(poollist[i]/poolhasid[i])); printf("|
");
for (i=1;i<=slen2;i++) printf("--------");printf("----------------
");
printf("min : "); for (i=1;i<=slen;i++) printf("%s ",minpoolosd[i]); printf("|
");
printf("osdid : "); for (i=1;i<=slen;i++) printf("osd.%s ",minosdid[i]); printf("|
");
printf("per: "); for (i=1;i<=slen;i++) printf("%.1f% ",100*(minpoolosd[i]-poollist[i]/poolhasid[i])/(poollist[i]/poolhasid[i])); printf("|
");
}'