zoukankan      html  css  js  c++  java
  • MySQL集群搭建(6)-双主+keepalived高可用

    双主 + keepalived 是一个比较简单的 MySQL 高可用架构,适用于中小 MySQL 集群,今天就说说怎么用 keepalived 做 MySQL 的高可用。

    1 概述

    1.1 keepalived 简介

    简单地说,keepalived 就是通过管理 VIP 来实现机器的高可用的,在使用 keepalived 的情况下,只有一台服务器能够提供服务(通过 VIP 来实现),当 Master 主机宕机后,VIP 会自动飘移到另一台服务器

    keepalived 采用 Master/Slave 模式, 在 Master 上设置配置文件的 VIP,当 Master 宕机后,VIP 自动漂移到另一台 keepalived 服务器上

    keepalived 可以用来做各种软件的高可用集群,它会一直检测服务器的状态,如果有一台服务器宕机,或工作出现故障,keepalived 将检测到,并将有故障的服务器从系统中剔除,同时使用其他服务器代替该服务器的工作,当服务器工作正常后 keepalived 自动将服务器加入到服务器群中。

    1.2 keepalived 配合双主

    keepalived 使用默认配置只能做到主机级别的高可用,但是我们的 MySQL 要做高可用至少要增加以下功能

    • 能够检测 MySQL 服务状态
    • 主节点 read_only=0,备节点 read_only=1
    • 切换时,备节点要等待主节点同步完成

    所以,keepalived 实现 MySQL 高可用需要使用自定义脚本来进行扩展

    2 环境准备

    2.1 数据库环境

    操作前已经准备好了一套主主架构数据库,搭建方法参考 MySQL集群搭建(2)-主主从模式

    节点信息

    IP 系统 端口 MySQL版本 节点 读写 说明
    10.0.0.247 Centos6.5 3306 5.7.9 Master 读写 主节点
    10.0.0.248 Centos6.5 3306 5.7.9 Standby 只读,可切换为读写 备主节点

    VIP 信息

    简称 VIP 类型
    RW-VIP 10.0.0.237 读写VIP

    Master 参考配置

    [client]
    port = 3306
    default-character-set=utf8mb4
    socket = /data/mysql_db/test_db/mysql.sock
    
    [mysqld]
    datadir = /data/mysql_db/test_db
    basedir = /usr/local/mysql57
    tmpdir = /tmp
    socket = /data/mysql_db/test_db/mysql.sock
    pid-file = /data/mysql_db/test_db/mysql.pid
    skip-external-locking = 1
    skip-name-resolve = 1
    port = 3306
    server_id = 2473306
    
    default-storage-engine = InnoDB
    character-set-server = utf8mb4
    default_password_lifetime=0
    
    auto_increment_offset = 1
    auto_increment_increment = 2
    
    #### log ####
    log_timestamps=system
    log_bin = /data/mysql_log/test_db/mysql-bin
    log_bin_index = /data/mysql_log/test_db/mysql-bin.index
    binlog_format = row
    relay_log_recovery=ON
    relay_log=/data/mysql_log/test_db/mysql-relay-bin
    relay_log_index=/data/mysql_log/test_db/mysql-relay-bin.index
    log_error = /data/mysql_log/test_db/mysql-error.log
    
    #### replication ####
    log_slave_updates = 1
    replicate_wild_ignore_table = information_schema.%,performance_schema.%,sys.%
    
    #### semi sync replication settings #####
    plugin_dir=/usr/local/mysql57/lib/plugin
    plugin_load = "rpl_semi_sync_master=semisync_master.so;rpl_semi_sync_slave=semisync_slave.so"
    loose_rpl_semi_sync_master_enabled = 1
    loose_rpl_semi_sync_slave_enabled = 1
    

    Slave 参考配置

    [client]
    port = 3306
    default-character-set=utf8mb4
    socket = /data/mysql_db/test_db/mysql.sock
    
    [mysqld]
    datadir = /data/mysql_db/test_db
    basedir = /usr/local/mysql57
    tmpdir = /tmp
    socket = /data/mysql_db/test_db/mysql.sock
    pid-file = /data/mysql_db/test_db/mysql.pid
    skip-external-locking = 1
    skip-name-resolve = 1
    port = 3306
    server_id = 2483306
    
    default-storage-engine = InnoDB
    character-set-server = utf8mb4
    default_password_lifetime=0
    
    auto_increment_offset = 2
    auto_increment_increment = 2
    
    #### log ####
    log_timestamps=system
    log_bin = /data/mysql_log/test_db/mysql-bin
    log_bin_index = /data/mysql_log/test_db/mysql-bin.index
    binlog_format = row
    relay_log_recovery=ON
    relay_log=/data/mysql_log/test_db/mysql-relay-bin
    relay_log_index=/data/mysql_log/test_db/mysql-relay-bin.index
    log_error = /data/mysql_log/test_db/mysql-error.log
    
    #### replication ####
    log_slave_updates = 1
    replicate_wild_ignore_table = information_schema.%,performance_schema.%,sys.%
    
    #### semi sync replication settings #####
    plugin_dir=/usr/local/mysql57/lib/plugin
    plugin_load = "rpl_semi_sync_master=semisync_master.so;rpl_semi_sync_slave=semisync_slave.so"
    loose_rpl_semi_sync_master_enabled = 1
    loose_rpl_semi_sync_slave_enabled = 1
    

    2.2 创建监控用的账号

    - 由于是测试环境,账号密码设置比较随便
    create user monitor@'localhost' identified by 'monitor';
    grant all on *.* to monitor@'localhost';
    flush privileges;
    

    2.3 安装 keepalived

    我们在 Master 和 Slave 上部署 keepalived

    1). yum 安装

    如果有对应的 yum 源,直接安装就可以了

    yum install -y keepalived
    

    2). 源码安装

    下载安装包, 下载地址 keepalived, 使用 1.2.24 版本举例

    # 安装依赖
    yum install -y gcc popt-devel openssl openssl-devel libssl-dev libnl-devel popt-devel libnfnetlink-devel
    
    # 下载包
    wget http://www.keepalived.org/software/keepalived-1.2.24.tar.gz
    
    # 解压安装
    tar -xvz -f  keepalived-1.2.24.tar.gz
    cd keepalived-1.2.24
    ./configure --prefix=/usr/local/keepalived
    make && make install
    
    cp /usr/local/keepalived/sbin/keepalived /usr/sbin/
    cp /usr/local/keepalived/etc/rc.d/init.d/keepalived /etc/init.d/
    cp /usr/local/keepalived/etc/sysconfig/keepalived /etc/sysconfig/
    mkdir /etc/keepalived/
    cp /usr/local/keepalived/etc/keepalived/keepalived.conf /etc/keepalived/
    

    3 配置高可用

    3.1 keepalived 配置

    打开 /etc/keepalived/keepalived.conf 文件, 按照实际情况加上下面的配置

    global_defs {
       router_id MYSQL_MM  # 标识
       vrrp_skip_check_adv_addr
       vrrp_strict        # 严格执行 VRRP 协议规范
       vrrp_garp_interval 0
       vrrp_gna_interval 0
    }
    
    vrrp_script check_mysql {
        script "/bin/sh /etc/keepalived/keepalived_mysql_check.sh"  # 检查脚本
        interval 10  # 检查周期
    }
    
    vrrp_instance MYSQL_MM {
        state BACKUP            # 都设为 BACKUP,避免起来后抢占
        interface eth0          # 网卡名称,根据实际情况填写
        virtual_router_id 243   # 用来区分 VRRP 组播的标记,取值 0-255
        priority 100
        advert_int 1
        nopreempt               # 设为非抢占
        authentication {
            auth_type PASS
            auth_pass 1111
        }
    
        # Master 节点可以注释掉下面语句,防止启动 keepalived 的时候执行脚本
        notify_master "/bin/sh /etc/keepalived/keepalived_mysql_start.sh"  # 变为 MASTER 时执行
    
        virtual_ipaddress {
            10.0.0.237
        }
    
        # Slave 节点可以注释下面检查脚本,Slave 没有必要一直检查
        track_script {
            check_mysql
        }
    }
    

    3.2 配置检查脚本

    打开 /etc/keepalived/keepalived_mysql_check.sh, 写入检测脚本

    #!/bin/sh
    # @Author: chengqm
    # MySQL 检测脚本
    MyPath=$(cd $(dirname $0); pwd)
    cd $MyPath
    
    ThisTime=`date '+%F %T'`
    log_file='/var/log/keepalived_mysql.log'
    
    # MySQL 连接方式,根据实际情况调整
    export MYSQL_PWD='monitor'
    MYSQL_USER='monitor'
    MYSQL_SOCKET="/data/mysql_db/test_db/mysql.sock"
    mysql_connect="mysql -u${MYSQL_USER} -S${MYSQL_SOCKET} "
    
    # 美化输出
    function techo() {
        message=$1
        message_level=$2
        if [ -e $message_level ];then
            message_level='info'
        fi
        echo "`date '+%F %T'` - [${message_level}] $message" >> $log_file
    }
    
    # 检查函数, 正常返回 0
    function check {
        ret=`$mysql_connect -N -e 'select 1 as value'`
        if [ $? -ne 0 ] || [ $ret -ne '1' ];then
            return 1
        else
            return 0
        fi
    }
    
    function read_only {
        param=$1
        $mysql_connect -e "set global read_only = ${param}"
        techo "设置是否只读 read_only ${param}"
    }
    
    # 失效转移
    function failover {
        techo "开始执行失效转移"
        # 1. 停止 keepalived
        killall keepalived
    
        # 2. 如果还能执行的话,设为 read_only
        read_only 1
    
        if [ $? -eq 0 ];then
            # 3. 如果还能执行,kill 所有的连接
            $mysql_connect -e "select concat('KILL ',id,';') from information_schema.processlist where user!='root' AND db is not null into outfile '/tmp/kill.txt.${ThisTime}';"
            if [ $? -eq 0 ];then
                $mysql_connect -e "source /tmp/kill.txt.${ThisTime};"
            fi
        fi
    
        # 4. 其他操作,比如说自动关机
    
        techo "失效转移执行成功,当前数据库关闭访问"
    }
    
    # 有问题检查 4 次
    for ((i=1; i<=4; i ++))  
    do  
        check
        if [ $? -eq 0 ];then
            techo "MySQL is ok"
            # 正常退出脚本
            exit 0
        else
            techo "Connection failed $i time(s)"
            sleep 1
        fi
    done
    
    techo '无法连接当前数据库'
    
    # 失效转移
    failover
    

    注意:脚本没有经过严格测试,需要根据实际情况调整

    3.3 配置提升为 Master 时执行的脚本

    打开 /bin/sh /etc/keepalived/keepalived_mysql_start.sh", 写入脚本内容

    #!/bin/sh
    # @Author: chengqm
    # keepalived 变为 Master 时执行
    MyPath=$(cd $(dirname $0); pwd)
    cd $MyPath
    
    ThisTime=`date '+%F %T'`
    log_file='/var/log/keepalived_mysql.log'
    
    # MySQL 连接方式,根据实际情况调整
    export MYSQL_PWD='monitor'
    MYSQL_USER='monitor'
    MYSQL_SOCKET="/data/mysql_db/test_db/mysql.sock"
    mysql_connect="mysql -u${MYSQL_USER} -S${MYSQL_SOCKET} "
    
    # 美化输出
    function techo() {
        message=$1
        message_level=$2
        if [ -e $message_level ];then
            message_level='info'
        fi
        echo "`date '+%F %T'` - [${message_level}] $message" >> $log_file
    }
    
    # 检查函数, 正常返回 0
    function check {
        ret=`$mysql_connect -N -e 'select 1 as value'`
        if [ $? -ne 0 ] || [ $ret -ne '1' ];then
            return 1
        else
            return 0
        fi
    }
    
    # 获取 slave status 的信息
    function slave_info() {
        tmp_file=/tmp/slave_info.tmp
        $mysql_connect -e 'show slave statusG' > /tmp/slave_info.tmp
        slave_sql=`grep 'Slave_SQL_Running:' $tmp_file | sed 's/s*//g' | tr "A-Z" "a-z"  | awk -F":" '{print $2}'`
        seconds_behind_master=`grep 'Seconds_Behind_Master:' $tmp_file | sed 's/s*//g' | tr "A-Z" "a-z"  | awk -F":" '{print $2}'`
    
        master_log_file=`grep 'Master_Log_File:' $tmp_file | head -1 | sed 's/s*//g' | tr "A-Z" "a-z"  | awk -F":" '{print $2}'`
        master_log_pos=`grep 'Read_Master_Log_Pos:' $tmp_file | sed 's/s*//g' | tr "A-Z" "a-z"  | awk -F":" '{print $2}'`
    
        relay_master_log_file=`grep 'Relay_Master_Log_File:' $tmp_file | sed 's/s*//g' | tr "A-Z" "a-z"  | awk -F":" '{print $2}'`
        exec_master_log_pos=`grep 'Exec_Master_Log_Pos:' $tmp_file | sed 's/s*//g' | tr "A-Z" "a-z"  | awk -F":" '{print $2}'`
    
    }
    
    # 设置是否可读
    function read_only {
        param=$1
        $mysql_connect -e "set global read_only = ${param}"
        techo "设置是否只读 read_only ${param}"
    }
    
    # 处理数据同步
    function sync_master_log() {
        # 如果是数据一致性优先,等待同步完毕。如果是服务可用性优先,可以注销下面的代码
        slave_info
        if [ $slave_sql == "yes" ];then
            techo "当前同步位置 Master ${master_log_file} ${master_log_pos}"
            techo "等待同步到 Master ${master_log_file} ${master_log_pos}"
            $mysql_connect -e "select master_pos_wait('$master_log_file', $master_log_pos);" > /dev/null
            techo "同步完毕"
        fi
    }
    
    techo "当前数据库提升为主库"
    
    check
    if [ $? -ne 0 ];then
        techo "无法连接当前数据库"
        exit 1
    fi
    
    # 等待同步
    sync_master_log
    
    # 设为可写
    read_only 0
    

    注意:脚本没有经过严格测试,需要根据实际情况调整

    3.4 启动 keepalived

    由于配置了 BACKUP 模式,所以两个 keepalived 先起来的是主,先后在主备节点执行

    /etc/init.d/keepalived start
    

    检查 /var/log/message 日志,确认 keepalived 没有报错

    检查 Master IP 状态, 确认设置了 VIP

    [root@cluster01 shell]# ip addr
    1: lo: <LOOPBACK,UP,LOWER_UP> mtu 16436 qdisc noqueue state UNKNOWN 
        link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
        inet 127.0.0.1/8 scope host lo
        inet6 ::1/128 scope host 
           valid_lft forever preferred_lft forever
    2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP qlen 1000
        link/ether fa:16:3e:de:80:33 brd ff:ff:ff:ff:ff:ff
        inet 10.0.0.247/16 brd 10.0.255.255 scope global eth0
        inet 10.0.0.237/32 scope global eth0
        inet6 fe80::f816:3eff:fede:8033/64 scope link 
           valid_lft forever preferred_lft forever
    

    检查 MySQL 检测脚本执行情况,确认正常运行

    [root@cluster01 ~]# tail -f /var/log/keepalived_mysql.log 
    ...
    2019-01-28 15:04:18 - [info] MySQL is ok
    2019-01-28 15:04:28 - [info] MySQL is ok
    

    4 失效转移测试

    mytest 库里新建 nowdate 测试表,只有 idctime 字段,然后每秒插入一条数据

    [root@cluster03 ~]# while true; do date;mysql -h10.0.0.237 -P3306 -umytest -e 'use mytest;insert into nowdate values (null, now());'; sleep 1;done
    Mon Jan 28 15:04:26 CST 2019
    Mon Jan 28 15:04:27 CST 2019
    ...
    

    kill 掉 Master 进程

    killall mysqld
    

    查看旧 Master 日志

    2019-01-28 15:04:48 - [info] MySQL is ok
    2019-01-28 15:04:58 - [info] Connection failed 1 time(s)
    2019-01-28 15:04:59 - [info] Connection failed 2 time(s)
    2019-01-28 15:05:00 - [info] Connection failed 3 time(s)
    2019-01-28 15:05:01 - [info] Connection failed 4 time(s)
    2019-01-28 15:05:02 - [info] 无法连接当前数据库
    2019-01-28 15:05:02 - [info] 开始执行失效转移
    2019-01-28 15:05:02 - [info] 设置是否只读 read_only 1
    2019-01-28 15:05:02 - [info] 失效转移执行成功,当前数据库关闭访问
    

    查看新 Master 日志

    2019-01-28 15:05:04 - [info] 当前数据库提升为主库
    2019-01-28 15:05:04 - [info] 当前同步位置 Master mysql-bin.000015 32338
    2019-01-28 15:05:04 - [info] 等待同步到 Master mysql-bin.000015 32338
    2019-01-28 15:05:04 - [info] 同步完毕
    2019-01-28 15:05:04 - [info] 设置是否只读 read_only 0
    2019-01-28 15:05:05 - [info] MySQL is ok
    

    查看新 Master IP,确认 VIP 已经飘过来了

    [root@cluster02 ~]# ip addr
    1: lo: <LOOPBACK,UP,LOWER_UP> mtu 16436 qdisc noqueue state UNKNOWN 
        link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
        inet 127.0.0.1/8 scope host lo
        inet6 ::1/128 scope host 
           valid_lft forever preferred_lft forever
    2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP qlen 1000
        link/ether fa:16:3e:66:7e:e8 brd ff:ff:ff:ff:ff:ff
        inet 10.0.0.248/16 brd 10.0.255.255 scope global eth0
        inet 10.0.0.237/32 scope global eth0
        inet6 fe80::f816:3eff:fe66:7ee8/64 scope link 
           valid_lft forever preferred_lft forever
    

    查看插入数据执行情况,大概有 12 秒是不可用的

    Mon Jan 28 15:04:51 CST 2019
    ERROR 2003 (HY000): Can't connect to MySQL server on '10.0.0.237' (111)
    Mon Jan 28 15:04:52 CST 2019
    ERROR 2003 (HY000): Can't connect to MySQL server on '10.0.0.237' (111)
    Mon Jan 28 15:04:53 CST 2019
    ERROR 2003 (HY000): Can't connect to MySQL server on '10.0.0.237' (111)
    Mon Jan 28 15:04:54 CST 2019
    ERROR 2003 (HY000): Can't connect to MySQL server on '10.0.0.237' (111)
    Mon Jan 28 15:04:55 CST 2019
    ERROR 2003 (HY000): Can't connect to MySQL server on '10.0.0.237' (111)
    Mon Jan 28 15:04:56 CST 2019
    ERROR 2003 (HY000): Can't connect to MySQL server on '10.0.0.237' (111)
    Mon Jan 28 15:04:57 CST 2019
    ERROR 2003 (HY000): Can't connect to MySQL server on '10.0.0.237' (111)
    Mon Jan 28 15:04:58 CST 2019
    ERROR 2003 (HY000): Can't connect to MySQL server on '10.0.0.237' (111)
    Mon Jan 28 15:05:00 CST 2019
    ERROR 2003 (HY000): Can't connect to MySQL server on '10.0.0.237' (111)
    Mon Jan 28 15:05:01 CST 2019
    ERROR 2003 (HY000): Can't connect to MySQL server on '10.0.0.237' (111)
    Mon Jan 28 15:05:02 CST 2019
    ERROR 2003 (HY000): Can't connect to MySQL server on '10.0.0.237' (111)
    Mon Jan 28 15:05:03 CST 2019
    

    失效切换成功

    5 总结

    使用双主 + keepalived 的优点是部署简单,双主加半同步情况下,理论上不会丢数据,适用于中小型 MySQL 集群。缺点也比较明显,就是增加从节点的情况下,从节点不会主动切换同步对象,而且脚本需要自己实现,有一定风险。

  • 相关阅读:
    Linux内存初始化
    linux PCI设备初始化过程
    Linux网络地址转换分析
    Linux内核中流量控制
    IPSEC实现
    ip_conntrack 实现
    module_init宏解析
    IP隧道基础研究
    IPV6介绍
    Golang的接口
  • 原文地址:https://www.cnblogs.com/sanduzxcvbnm/p/12938043.html
Copyright © 2011-2022 走看看