zoukankan      html  css  js  c++  java
  • Redis Sentinel集群双机房容灾实施步骤

    概要目标
    防止双机房情况下任一个机房完全无法提供服务时如何让Redis继续提供服务。
    架构设计
    A、B两机房,其中A机房有一Master一Slave和两个Sentinel,B机房只有2个Sentinel,如下图。

    初始规划
    A机房
    192.168.71.213 S+哨兵
    192.168.71.214 M+哨兵
    B机房
    192.168.70.214 S
    192.168.70.215 S

    目录创建
    --redis软件目录
    mkdir -p /home/redis
    --pidfile文件存放目录
    mkdir -p /home/redis/redisrun/
    解压redis截止到 /home/redis

    集群配置
    【Master】
    选择71.214作为Master
    [root@node-71 redis]# vi /home/redis/redis.conf
    #后台启动
    daemonize yes
    pidfile "/home/redis/redisrun/redis_6379.pid"
    port 6379
    timeout 0
    tcp-keepalive 0
    loglevel notice
    logfile "/home/redis/redis.log"
    databases 16
    save 900 1
    save 300 10
    save 60 10000
    stop-writes-on-bgsave-error yes
    rdbcompression yes
    rdbchecksum yes
    dbfilename "dump.rdb"
    dir "/home/redis/redisdb"
    #如果做故障切换,不论主从节点都要填写密码且要保持一致
    masterauth "123456"
    slave-serve-stale-data yes
    slave-read-only yes
    repl-disable-tcp-nodelay no
    slave-priority 98
    #当前redis密码
    requirepass "123456"
    appendonly yes
    # appendfsync always
    appendfsync everysec
    # appendfsync no
    no-appendfsync-on-rewrite no
    auto-aof-rewrite-percentage 100
    auto-aof-rewrite-min-size 64mb
    lua-time-limit 5000
    slowlog-log-slower-than 10000
    slowlog-max-len 128
    notify-keyspace-events ""
    hash-max-ziplist-entries 512
    hash-max-ziplist-value 64
    list-max-ziplist-entries 512
    list-max-ziplist-value 64
    set-max-intset-entries 512
    zset-max-ziplist-entries 128
    zset-max-ziplist-value 64
    activerehashing yes
    client-output-buffer-limit normal 0 0 0
    client-output-buffer-limit slave 256mb 64mb 60
    client-output-buffer-limit pubsub 32mb 8mb 60
    hz 10
    aof-rewrite-incremental-fsync yes
    # Generated by CONFIG REWRITE


    【Slave】
    选择其余3个几点作为Slave
    [root@node-71 redis]# vi /home/redis/redis.conf
    daemonize yes
    pidfile "/home/redis/redisrun/redis_6379.pid"
    port 6379
    timeout 0
    tcp-keepalive 0
    loglevel notice
    logfile "/home/redis/redis.log"
    databases 16
    save 900 1
    save 300 10
    save 60 10000
    stop-writes-on-bgsave-error yes
    rdbcompression yes
    rdbchecksum yes
    dbfilename "dump.rdb"
    dir "/home/redis/redisdb"
    #主节点密码
    masterauth "123456"
    slave-serve-stale-data yes
    slave-read-only yes
    repl-disable-tcp-nodelay no
    slave-priority 98
    requirepass "123456"
    appendonly yes
    # appendfsync always
    appendfsync everysec
    # appendfsync no
    no-appendfsync-on-rewrite no
    auto-aof-rewrite-percentage 100
    auto-aof-rewrite-min-size 64mb
    lua-time-limit 5000
    slowlog-log-slower-than 10000
    slowlog-max-len 128
    notify-keyspace-events ""
    hash-max-ziplist-entries 512
    hash-max-ziplist-value 64
    list-max-ziplist-entries 512
    list-max-ziplist-value 64
    set-max-intset-entries 512
    zset-max-ziplist-entries 128
    zset-max-ziplist-value 64
    activerehashing yes
    client-output-buffer-limit normal 0 0 0
    client-output-buffer-limit slave 256mb 64mb 60
    client-output-buffer-limit pubsub 32mb 8mb 60
    hz 10
    aof-rewrite-incremental-fsync yes
    # Generated by CONFIG REWRITE
    #配置主节点信息
    slaveof 192.168.71.214 6379


    --检查修正
    daemonize yes
    pidfile "/home/redis/redisrun//redis_6379.pid"
    logfile "/home/redis/redis.log"


    【sentinel.conf】
    选择A机房2节点作为sentinel
    vi /home/redis/sentinel.conf
    port 26379
    #1表示在sentinel集群中只要有两个节点检测到redis主节点出故障就进行切换,单sentinel节点无效(自己测试发现的)
    #如果3s内mymaster无响应,则认为mymaster宕机了
    #如果10秒后,mysater仍没活过来,则启动failover
    sentinel monitor mymaster 192.168.71.214 6379 1
    sentinel down-after-milliseconds mymaster 3000
    sentinel failover-timeout mymaster 10000
    daemonize yes
    #指定工作目录
    dir "/home/redis/sentinel-work"
    protected-mode no
    logfile "/home/redis/sentinellog/sentinel.log"
    #redis主节点密码
    sentinel auth-pass mymaster 123456
    # Generated by CONFIG REWRITE


    --检查修正
    sentinel monitor mymaster 192.168.71.214 6379 1
    dir "/home/redis/sentinel-work"
    logfile "/home/redis/sentinellog/sentinel.log"

    启动检查
    【启动集群与日志监控】
    每个几点都执行
    cd /home/redis/src/
    ./redis-server /home/redis/redis.conf

    tail -f /home/redis/redis.log

    只在sentinel节点执行
    cd /home/redis/src/
    ./redis-sentinel /home/redis/sentinel.conf

    tail -f /home/redis/sentinellog/sentinel.log

    【Master检查】
    cd /home/redis/src/
    [root@localhost src]# ./redis-cli -h 192.168.70.214 -p 6379 -a 123456
    192.168.70.214:6379> info Replication
    # Replication
    role:master
    connected_slaves:3
    slave0:ip=192.168.71.213,port=6379,state=online,offset=1107595,lag=1
    slave1:ip=192.168.70.214,port=6379,state=online,offset=1107742,lag=0
    slave2:ip=192.168.70.215,port=6379,state=online,offset=1107889,lag=0
    master_repl_offset:1107889
    repl_backlog_active:1
    repl_backlog_size:1048576
    repl_backlog_first_byte_offset:59314
    repl_backlog_histlen:1048576
    192.168.70.214:6379> set test zgy
    OK
    192.168.70.214:6379> get test
    "zgy"
    192.168.70.214:6379>

    【Slave检查,只读】
    192.168.71.214:6379> get test
    "zgy"
    192.168.71.214:6379> set test zgy2
    (error) READONLY You can't write against a read only slave.
    192.168.71.214:6379> info Replication
    # Replication
    role:slave
    master_host:192.168.70.214
    master_port:6379
    master_link_status:up
    master_last_io_seconds_ago:1
    master_sync_in_progress:0
    slave_repl_offset:42385
    slave_priority:100
    slave_read_only:1
    connected_slaves:0
    master_repl_offset:0
    repl_backlog_active:0
    repl_backlog_size:1048576
    repl_backlog_first_byte_offset:0
    repl_backlog_histlen:0
    192.168.71.214:6379>


    断网断电测试
    断网
    通过开启防火墙来模拟
    service iptables status
    --service iptables start
    --70网段2节点的防火墙配置
    [root@localhost redis]# cat /etc/sysconfig/iptables
    # Firewall configuration written by system-config-firewall
    # Manual customization of this file is not recommended.
    *filter
    :INPUT ACCEPT [0:0]
    :FORWARD ACCEPT [0:0]
    :OUTPUT ACCEPT [0:0]
    #屏蔽A机房2个节点
    -I INPUT -s 192.168.71.213 -j DROP
    -I INPUT -s 192.168.71.214 -j DROP
    -A INPUT -m state --state ESTABLISHED,RELATED -j ACCEPT
    -A INPUT -p icmp -j ACCEPT
    -A INPUT -i lo -j ACCEPT
    -A INPUT -m state --state NEW -m tcp -p tcp --dport 22 -j ACCEPT
    -A INPUT -j REJECT --reject-with icmp-host-prohibited
    -A FORWARD -j REJECT --reject-with icmp-host-prohibited
    COMMIT

    断网
    B机房断网前
    --前
    192.168.71.214:6379> info Replication
    # Replication
    role:master
    connected_slaves:3
    slave0:ip=192.168.71.213,port=6379,state=online,offset=12825868,lag=1
    slave1:ip=192.168.70.214,port=6379,state=online,offset=12825868,lag=1
    slave2:ip=192.168.70.215,port=6379,state=online,offset=12826015,lag=0
    master_repl_offset:12826162
    repl_backlog_active:1
    repl_backlog_size:1048576
    repl_backlog_first_byte_offset:11777587
    repl_backlog_histlen:1048576
    192.168.71.214:6379>

    --后
    --明显找不到70网段的那2个节点啦
    192.168.71.214:6379> info Replication
    # Replication
    role:master
    connected_slaves:1
    slave0:ip=192.168.71.213,port=6379,state=online,offset=12909588,lag=1
    master_repl_offset:12909588
    repl_backlog_active:1
    repl_backlog_size:1048576
    repl_backlog_first_byte_offset:11861013
    repl_backlog_histlen:1048576
    192.168.71.214:6379>

    而Master还能继续对外提供服务

    A机房断网前、后

    192.168.71.214:6379> info Replication
    # Replication
    role:master
    connected_slaves:3
    slave0:ip=192.168.71.213,port=6379,state=online,offset=12942691,lag=1
    slave1:ip=192.168.70.214,port=6379,state=online,offset=12942691,lag=1
    slave2:ip=192.168.70.215,port=6379,state=online,offset=12942838,lag=0
    master_repl_offset:12942838
    repl_backlog_active:1
    repl_backlog_size:1048576
    repl_backlog_first_byte_offset:11894263
    repl_backlog_histlen:1048576

    后,出现2个Master??
    192.168.71.214:6379> info Replication
    # Replication
    role:master
    connected_slaves:0
    master_repl_offset:12957363
    repl_backlog_active:1
    repl_backlog_size:1048576
    repl_backlog_first_byte_offset:11908788
    repl_backlog_histlen:1048576
    192.168.71.214:6379>

    192.168.71.213:6379> info replication
    # Replication
    role:master
    connected_slaves:0
    master_repl_offset:12943881
    repl_backlog_active:0
    repl_backlog_size:1048576
    repl_backlog_first_byte_offset:0
    repl_backlog_histlen:0
    192.168.71.213:6379>

    断电
    通过kill redis进程来模拟
    ps -ef|grep redis
    断电前
    192.168.71.213:6379> info replication
    # Replication
    role:master
    connected_slaves:3
    slave0:ip=192.168.70.215,port=6379,state=online,offset=13091227,lag=0
    slave1:ip=192.168.70.214,port=6379,state=online,offset=13091227,lag=0
    slave2:ip=192.168.71.214,port=6379,state=online,offset=13091080,lag=1
    master_repl_offset:13091227
    repl_backlog_active:1
    repl_backlog_size:1048576
    repl_backlog_first_byte_offset:13087442
    repl_backlog_histlen:3786

    192.168.71.214:6379> info Replication
    # Replication
    role:master
    connected_slaves:1
    slave0:ip=192.168.71.213,port=6379,state=online,offset=13096642,lag=1
    master_repl_offset:13096642
    repl_backlog_active:1
    repl_backlog_size:1048576
    repl_backlog_first_byte_offset:13092272
    repl_backlog_histlen:4371
    192.168.71.214:6379>


    断电后
    192.168.70.214:6379> info Replication
    # Replication
    role:slave
    master_host:192.168.71.214
    master_port:6379
    master_link_status:down
    master_last_io_seconds_ago:-1
    master_sync_in_progress:0
    slave_repl_offset:13159324
    master_link_down_since_seconds:18
    slave_priority:100
    slave_read_only:1
    connected_slaves:0
    master_repl_offset:0
    repl_backlog_active:0
    repl_backlog_size:1048576
    repl_backlog_first_byte_offset:0
    repl_backlog_histlen:0

    92.168.70.215:6379> info Replication
    # Replication
    role:slave
    master_host:192.168.71.214
    master_port:6379
    master_link_status:down
    master_last_io_seconds_ago:-1
    master_sync_in_progress:0
    slave_repl_offset:13159324
    master_link_down_since_seconds:28
    slave_priority:100
    slave_read_only:1
    connected_slaves:0
    master_repl_offset:0
    repl_backlog_active:0
    repl_backlog_size:1048576
    repl_backlog_first_byte_offset:0
    repl_backlog_histlen:0

    70网段都变成Slave无法正常提供服务了。。。

    此时,需要修改其中一个节点的配置来向外提供服务
    先Kill掉redis进程,再修改某一节点的redis参数,指向其中一个节点,如70.215,并检查另外一台,删除这一项,最后重启2个节点,对外正常提供服务
    vi /home/redis/redis.conf
    slaveof 192.168.70.214 6379

    [root@localhost src]# ./redis-cli -h 192.168.70.214 -p 6379 -a 123456
    192.168.70.214:6379> info Replication
    # Replication
    role:master
    connected_slaves:1
    slave0:ip=192.168.70.215,port=6379,state=online,offset=15,lag=1
    master_repl_offset:15
    repl_backlog_active:1
    repl_backlog_size:1048576
    repl_backlog_first_byte_offset:2
    repl_backlog_histlen:14
    192.168.70.214:6379>

    【还原初始】
    修改71.214 之外的参数
    vi /home/redis/redis.conf
    slaveof 192.168.71.214 6379

    vi /home/redis/sentinel.conf
    sentinel monitor mymaster 192.168.71.214 6379 1
    并删除最后几行

    数据校验
    Master执行更新数据会同步Slave
    注意事项
    见每步后面

  • 相关阅读:
    【原】泛型委托
    【原】web页面登陆验证
    【原】在一般处理程序中设置session
    16Aspx.com-PHP企业整站源码 景观石材大理石类织梦模板 含手机移动端 完整源码 APP+PC
    16Aspx.com-将15位身份证转换成18位
    16Aspx.com-书通网中小学生免费在线学习网站源码 带采集带手机版帝国cms内核
    16Aspx.com源码2014年7月详细
    Web电子商务网(三层)V2.0源码
    毫秒级百万数据分页存储过程
    C#做的一个加密/解密的类
  • 原文地址:https://www.cnblogs.com/ritchy/p/9983485.html
Copyright © 2011-2022 走看看