一、直接上shell代码:
1 cat slow_recovery.sh 2 ceph osd set noscrub && ceph osd set nodeep-scrub 3 for i in `ceph osd ls`; do ceph tell osd.$i injectargs "--osd_recovery_max_active 3"; done 4 for i in `ceph osd ls`; do ceph tell osd.$i injectargs "--osd_recovery_op_priority 1"; done 5 for i in `ceph osd ls`; do ceph tell osd.$i injectargs "--osd_recovery_max_single_start 1"; done 6 for i in `ceph osd ls`; do ceph tell osd.$i injectargs "--osd_max_backfills 1"; done
二、参数含义解析:
pg的recovery模式分为两种:
- 基于pg log的recovery,osd故障时间不长,需要恢复的数据可以通过pg log回放找回来。
- backfill recovery. 是说无法通过pg log回放找全数据,只能通过全量回填(backfill)拷贝
1 "mds_max_file_recover": "32", 2 "osd_min_recovery_priority": "0", 3 "osd_allow_recovery_below_min_size": "true", 4 "osd_recovery_threads": "1", 5 "osd_recover_clone_overlap": "true", 6 "osd_recover_clone_overlap_limit": "10", 7 "osd_recovery_thread_timeout": "30", 8 "osd_recovery_thread_suicide_timeout": "300", 9 "osd_recovery_delay_start": "0", 10 "osd_recovery_max_active": "10", #一个osd上承载多个pg, 可能好几个pg都需要recover,这个值限定该osd最多同时有多少pg做recover。 11 "osd_recovery_max_single_start": "5", #这个值限定了每个pg可以启动recover操作的最大数。 12 "osd_recovery_max_chunk": "8388608", 13 "osd_recovery_forget_lost_objects": "false", 14 "osd_recovery_op_priority": "4", 15 "osd_recovery_op_warn_multiple": "16", 16 17 "osd_max_backfills": "4", #一个osd上承载了多个pg。可能很多pg都需要做第二种recovery,即backfill。 设定这个参数来指明在一个osd上最多能有多少个pg同时做backfill。 18 "osd_backfill_full_ratio": "0.85", 19 "osd_backfill_retry_interval": "10", 20 "osd_backfill_scan_min": "64", 21 "osd_backfill_scan_max": "512", 22 "osd_kill_backfill_at": "0", 23 "osd_debug_skip_full_check_in_backfill_reservation": "false", 24 "osd_debug_reject_backfill_probability": "0",
三.如何调整recovery限流值?
默认值并不能很好的适应线上情况,通常导致后端osd请求过高并发过高,影响心跳和导致客户端请求挂起。结果表现为,客户端读写受影响,正常OSD因为没有心跳回应而被标记为down.
在更换SATA盘时,限流建议值如下:
osd_max_backfills:1
osd_recovery_max_active:3
osd_recovery_max_single_start:1
实验结果表明: 更换的SATA盘在做backfill时,磁盘util保持在40~70%的合理区间。
全SSD集群的更换磁盘时,参数值应该可以适当调大。但在没有实践结果支持的情况下,建议保守设置为跟SATA盘一样的限流值。
还有一个额外的参数:osd_recovery_op_priority从默认值10改到1. 将recovery优先级降到最低。也会降低对客户端读写的影响;