zoukankan      html  css  js  c++  java
  • Mysql

    一、概述

    本文将介绍mysql的MM+Keepalived方案。该方案由两个mysql服务器组成,这两个mysql互为主备。其中一台主作为写服务器,另一台主作为读服务器。通过keepalived软件管理写vip,当承担写服务器的mysql出现故障时,将写vip漂移到读服务器上,实现高可用。


    二、节点介绍

    本次实验采用2台虚拟机,操作系统版本Centos6.10,mysql版本5.7.25
    node1    10.40.16.61  主库  提供写服务
    node2    10.40.16.62  主库  提供读服务

    还须预留1个vip,现在不用配置,这里先提一下,后面的安装步骤用得到
    10.40.16.71  写vip


    三、安装

    1. 配置双主架构

    安利一个自己写的mysql一键安装脚本https://www.cnblogs.com/ddzj01/p/10678296.html
    mysql搭建完成后,就可以配置互为主备的架构了。

    image

    image

    这样node1和node2就互为主备了

    在node2上将数据库设置为只读模式
    (root@localhost)[(none)]> set global read_only = 1;

    2. 安装keepalive软件

    node1&node2:
    yum install -y keepalived


    四、修改配置文件

    1. node1

    编辑配置文件/etc/keepalived/keepalived.conf

    ! Configuration File for keepalived
    
    vrrp_script chk_mysql {
        script "/etc/keepalived/check_mysql.sh" # 自定义检查脚本
        interval 30                             # 设置检查间隔时长,可自行设定
    }
    
    vrrp_instance VI_1 {
        state BACKUP                            # BACKUP状态,具体意思后面介绍
        interface eth0
        virtual_router_id 51
        priority 100
        advert_int 1
        nopreempt                               # 防止主库切换到从库后,主库恢复后自动切换回主库
        authentication {
            auth_type PASS
            auth_pass 1111
        }
        track_script {
            chk_mysql
        }
        virtual_ipaddress {
            10.40.16.71/24                      # vip
        }
    }

    编辑检查mysql主库的脚本文件/etc/keepalived/check_mysql.sh

    #!/bin/bash
    source /root/.bash_profile
    ###填数据库相关信息###
    DB_USER='root'
    DB_PASSWD='root'
    U_EMAIL='xxxx@163.com'
    ######################
    
    ###判断如果上次检查的脚本还没执行完,则退出此次执行
    if [ `ps -ef | grep -w "$0" | grep -v "grep" | wc -l` -gt 2 ]; then
        exit 0
    fi
    mysql_con="mysql -u$DB_USER -p$DB_PASSWD"
    error_log="/etc/keepalived/logs/check_mysql.err"
    
    ###如果error_log目录不存在则创建目录
    if [ -d /etc/keepalived/logs ]; then
        usleep
    else
        mkdir -p /etc/keepalived/logs
    fi
    
    ###定义一个简单判断mysql是否可用的函数
    function execute_query {
        $mysql_con -e "select 1;" 2>> $error_log
    }
    
    ###定义无法执行查询,且mysql服务异常时的处理函数
    function service_error {
        echo -e "`date "+%F %H:%M:%S"`    ----mysql service error, now stop keepalived----" >> $error_log
        service keepalived stop >> $error_log 2>&1
        echo "master1 keepalived stopped" | mail -s "master1 keepalived stopped, please take notice!" $U_EMAIL 2>> $error_log
        echo -e "
    ---------------------------------------------------------
    " >> $error_log
    }
    
    ###定义无法执行查询,但mysql服务正常的处理函数
    function query_error {
        echo -e "`date "+%F %H:%M:%S"`    ----query error, but mysql service ok, retry after 30s----" >> $error_log
        sleep 30
        execute_query
        if [ $? -ne 0 ]; then
            echo -e "`date "+%F %H:%M:%S"`    ----still can't execute query----" >> $error_log
    
            ###关闭本机mysql
            echo -e "`date "+%F %H:%M:%S"`    ----stop mysql service----" >> $error_log
            service mysql stop &>> $error_log
            sleep 2    ###给执行和缓冲时间
    
            ###关闭本机keepalived
            echo -e "`date "+%F %H:%M:%S"`    ----stop keepalived----" >> $error_log
            service keepalived stop &>> $error_log
            echo "master1 keepalived stopped" | mail -s "master1 keepalived stopped, please take notice!" $U_EMAIL 2>> $error_log
            echo -e "
    ---------------------------------------------------------
    " >> $error_log
        else
            echo -e "`date "+%F %H:%M:%S"`    ----query ok after 30s----" >> $error_log
            echo -e "
    ---------------------------------------------------------
    " >> $error_log
        fi
    }
    
    ###检查开始: 执行查询
    execute_query
    if [ $? -ne 0 ]; then
        service mysql status &> /dev/null
        if [ $? -ne 0 ]; then
            service_error
        else
            query_error
        fi
    fi
    
    
    View Code

    chmod +x /etc/keepalived/check_mysql.sh
    该脚本的作用有两个
    a. mysql进程挂掉了,直接关闭keepalived服务
    b. mysql进程正常,但是无法执行查询,比如进程卡死了等等原因。则同时关闭mysql和keepalived

    2. node2

    编辑配置文件/etc/keepalived/keepalived.conf

    ! Configuration File for keepalived
    
    vrrp_instance VI_1 {
        state BACKUP                # BACKUP状态,具体意思后面介绍
        interface eth0
        virtual_router_id 51
        priority 90                 # 优先级设置为90,这个值设置比节点1低
        advert_int 1
        authentication {
            auth_type PASS
            auth_pass 1111
        }
        notify_master /etc/keepalived/notify_master_mysql.sh
        virtual_ipaddress {
            10.40.16.71/24          # vip
        }
    }

    编辑检查mysql从库的脚本文件/etc/keepalived/notify_master_mysql.sh

    #!/bin/bash
    source /root/.bash_profile
    ###当keepalived监测到本机转为MASTER状态时,执行该脚本
    ###填数据库相关信息###
    DB_USER='root'
    DB_PASSWD='root'
    U_EMAIL='xxxx@163.com'
    ######################
    
    change_log='/etc/keepalived/logs/state_change.log'
    mysql_con="mysql -u$DB_USER -p$DB_PASSWD"
    echo -e "`date "+%F %H:%M:%S"`    ----master2 keepalived change to MASTER----" >> $change_log
    
    ###如果error_log目录不存在则创建目录
    if [ -d /etc/keepalived/logs ]; then
        usleep
    else
        mkdir -p /etc/keepalived/logs
    fi
    
    slave_info() {
        ###检查从库状态
        slave_stat=`$mysql_con -e "show slave statusG"`
        Slave_IO_Running=`echo $slave_stat | egrep -w "Slave_IO_Running" | awk '{print $2}'`
        Slave_SQL_Running=`echo $slave_stat | egrep -w "Slave_SQL_Running" | awk '{print $2}'`
        Master_Log_File=`echo $slave_stat | egrep -w "Master_Log_File" | awk '{print $2}'`
        Read_Master_Log_Pos=`echo $slave_stat | egrep -w "Read_Master_Log_Pos" | awk '{print $2}'`
        Relay_Master_Log_File=`echo $slave_stat | egrep -w "Relay_Master_Log_File" | awk '{print $2}'`
        Exec_Master_Log_Pos=`echo $slave_stat | egrep -w "Exec_Master_Log_Pos" | awk '{print $2}'`
    }
    
    action() {
        ###解除read_only属性
        echo -e "`date "+%F %H:%M:%S"`    ----set read_only = 0 on master2----" >> $change_log
        $mysql_con -e "set global read_only = 0;" 2>> $change_log
        echo "master2 keepalived change to MASTER,线上数据库切换至master2" | mail -s "master2 keepalived change to MASTER" $U_EMAIL 2>> $change_log
        echo -e "---------------------------------------------------------
    " >> $change_log
    }
    
    slave_info
    if [ $Slave_SQL_Running == 'Yes' ]; then
        i=0    #一个计数器
        ###判断从master接收到的binlog是否全部在本地执行(这样仍无法完全确定从库已追上主库,因为无法完全保证io_thread没有延时(但由网络传输问题导致的从库落后的概率很小)
        until [ $Master_Log_File == $Relay_Master_Log_File -a $Read_Master_Log_Pos == $Exec_Master_Log_Pos ]
         do
            if [ $i -lt 10 ]; then    #将等待exec_pos追上read_pos的时间限制为20s
                echo -e "`date "+%F %H:%M:%S"`    ----Relay_Master_Log_File=$Relay_Master_Log_File, Exec_Master_Log_Pos=$Exec_Master_Log_Pos is behind Master_Log_File=$Master_Log_File, Read_Master_Log_Pos=$Read_Master_Log_Pos, wait......" >> $change_log    #输出消息到日志,等待exec_pos=read_pos
                i=$(($i+1))
                sleep 2
                slave_info
            else
                echo -e "The waits time is more than 20s,now force change. Master_Log_File=$Master_Log_File Read_Master_Log_Pos=$Read_Master_Log_Pos Relay_Master_Log_File=$Relay_Master_Log_File Exec_Master_Log_Pos=$Exec_Master_Log_Pos" >> $change_log
                action
                exit 0
            fi
         done
        action
    else
        echo -e "master2's slave status is not running,now force change. Master_Log_File=$Master_Log_File Read_Master_Log_Pos=$Read_Master_Log_Pos Relay_Master_Log_File=$Relay_Master_Log_File Exec_Master_Log_Pos=$Exec_Master_Log_Pos" >> $change_log
        action
    fi
    View Code

    chmod +x /etc/keepalived/notify_master_mysql.sh
    该脚本的作用有三个
    a. slave sql线程没有运行,直接将从库只读关闭
    c. slave sql线程正在运行,如果从库没有延迟,直接将从库只读关闭
    b. slave sql线程正在运行,如果从库有延迟,等待一段时间(这个自己设置)再将从库只读关闭

    这里解释下keepalived.conf的"state BACKUP"的意思,在Keepalived中有两种模式,分别是master->backup模式和backup->backup模式,这两种模式有什么区别呢?
    在master->backup模式下,一旦主库宕掉,虚拟IP会自动漂移到从库,当主库修复后,keepalived启动后,还会把虚拟IP抢过来,即使你设置nopreempt(不抢占)的方式抢占IP的动作也会发生。
    在backup->backup模式下,当主库宕掉后虚拟IP会自动漂移到从库上,当原主恢复之后重启keepalived服务,并不会抢占新主的虚拟IP,即使是原主优先级高于从库的优先级别,也不会抢占虚拟IP。
    所以,为了减少虚拟IP的漂移次数,生产中我们通常是把修复好的主库当做新主库的备库。因而采用backup->backup模式居多。


    五、启动Keepailived

    node1&node2:
    service keepalived start
    注意因为我们使用的是backup->backup模式,所以启动keepalived的顺序需要先启动node1,再启动node2,这样vip才会在node1上。如果先启动node2,再启动node1,node1并不会把虚拟IP抢过来。

    image


    六、测试

    1. 模拟主库宕机的情况

    关闭node1的mysql数据库
    service mysql stop

    查看node1的keepalived日志/etc/keepalived/logs/check_mysql.err
    image

    查看node1和node2的vip,发现vip已经转移到了node2
    image

    node2的read_only参数也变成了off      
    image

    node2的切换日志可以查看/etc/keepalived/logs/state_change.log
    看看有没有收到邮件,呵呵:-)
    failover正常!

    2. 模拟主库正常,但是无法查询

    重新打开node1的mysql数据库和keepalived服务
    service mysql start
    service keepalived start

    重启node2的keepalived服务
    service keepalived restart

    在node2上将数据库设置为只读模式
    (root@localhost)[(none)]> set global read_only = 1;

    这样就跟最初的状态一致了,主库是node1,从库是node2,vip在node1上。

    在node1上将参数max_connections设置得足够小
    (root@localhost)[(none)]> set global max_connections = 2;

    在node1上然后多开几个连接,直到出现无法连接的情况。以此来模拟无法查询的情况。
    image

    查看node1的keepalived日志/etc/keepalived/logs/check_mysql.err
    image

    查看node1,发现keepalived和mysql服务都已经停止
    [root@mysqla ~]# service mysql status
    MySQL is not running                                    
    [root@mysqla ~]# service keepalived status
    keepalived is stopped

    node2的read_only参数也变成了off
    image

    vip也已经漂移过来了
    [root@mysqlb ~]# ip a
    image

    failover正常!


    七、手动切换

    还有一个场景就是,如何手工切换。举个例子,node2目前是主库,但是node1通过各种办法修复好了,我想让node1当主库。

    重启node1的mysql服务
    service mysql restart

    这样node2是主库,node1是备库,vip在node2上

    在node2上创建手工切换的脚本vi /etc/keepalived/manual_switch_to_master

    #!/bin/bash
    source /root/.bash_profile
    ###在master2上手动执行将主库切换回master1的操作
    ###填数据库相关信息###
    DB_USER='root'
    DB_PASSWD='root'
    U_EMAIL='xxxx@163.com'
    MASTER1='10.40.16.61'
    MASTER2='10.40.16.62'
    REPL_USER='repl'
    REPL_PASSWD='123456'
    MASTER1_MYSQL_PATH='/usr/local/mysql/bin'
    ######################
    
    ###如果error_log目录不存在则创建目录
    if [ -d /etc/keepalived/logs ]; then
        usleep
    else
        mkdir -p /etc/keepalived/logs
    fi
    
    mysql_con="mysql -u$DB_USER -p$DB_PASSWD"
    
    echo -e "`date "+%F %H:%M:%S"`    ----change to BACKUP manually----" >> /etc/keepalived/logs/state_change.log
    echo -e "`date "+%F %H:%M:%S"`    ----set read_only = 1 on master2----" >> /etc/keepalived/logs/state_change.log
    $mysql_con -e "set global read_only = 1;" >> /etc/keepalived/logs/state_change.log
    
    ###kill掉当前客户端连接
    echo -e "`date "+%F %H:%M:%S"`    ----kill current client thread----" >> /etc/keepalived/logs/state_change.log
    if [ -e /tmp/kill.sql ]; then
        rm -f /tmp/kill.sql &> /dev/null
    fi
    ###这里其实是一个批量kill线程的小技巧
    $mysql_con -e 'select concat("kill ",id,";") from information_schema.processlist where command="Query" or command="Execute" into outfile "/tmp/kill.sql";'
    $mysql_con -e "source /tmp/kill.sql" 2>> /etc/keepalived/logs/state_change.log
    sleep 2    ###给kill一个执行和缓冲时间
    
    slave_info() {
        ###检查从库状态
        slave_stat=`mysql -u$REPL_USER -p$REPL_PASSWD -h$MASTER1 -e "show slave statusG"`
        Master_Log_File=`echo $slave_stat | egrep -w "Master_Log_File" | awk '{print $2}'`
        Read_Master_Log_Pos=`echo $slave_stat | egrep -w "Read_Master_Log_Pos" | awk '{print $2}'`
        Relay_Master_Log_File=`echo $slave_stat | egrep -w "Relay_Master_Log_File" | awk '{print $2}'`
        Exec_Master_Log_Pos=`echo $slave_stat | egrep -w "Exec_Master_Log_Pos" | awk '{print $2}'`
    }
    
    slave_info
    until [ $Read_Master_Log_Pos = $Exec_Master_Log_Pos -a $Master_Log_File = $Relay_Master_Log_File ]
    do
        echo -e "`date "+%F %H:%M:%S"`    ----Relay_Master_Log_File=$Relay_Master_Log_File, Exec_Master_Log_Pos=$Exec_Master_Log_Pos is behind Master_Log_File=$Master_Log_File, Read_Master_Log_Pos=$Read_Master_Log_Pos, wait......" >> /etc/keepalived/logs/state_change.log
        sleep 2
        slave_info
    done
    
    ###然后解除master1的read_only属性并打开keepalived服务
    echo -e "`date "+%F %H:%M:%S"`    ----set read_only = 0 on master1----" >> /etc/keepalived/logs/state_change.log
    ssh $MASTER1 "$MASTER1_MYSQL_PATH/mysql -u$DB_USER -p$DB_PASSWD -e 'set global read_only = 0;' && /etc/init.d/keepalived start" 2>> /etc/keepalived/logs/state_change.log
    
    ###重启master2的keepalived服务,使VIP漂移到master1
    echo -e "`date "+%F %H:%M:%S"`    ----make VIP move to master1----" >> /etc/keepalived/logs/state_change.log
    /etc/init.d/keepalived restart &>> /etc/keepalived/logs/state_change.log
    echo "master2 keepalived restart,vip change to master1" | mail -s "master2 keepalived change to BACKUP" $U_EMAIL 2>> /etc/keepalived/logs/state_change.log
    echo -e "
    --------------------------------------------------
    " >> /etc/keepalived/logs/state_change.log
    
    View Code

    在node2上执行脚本
    chmod +x /etc/keepalived/manual_switch_to_master
    [root@mysqlb keepalived]# sh /etc/keepalived/manual_switch_to_master
    image

    在node2上查看ip,发现vip不见了
    image

    在node2上查看read_only参数
    image

    在node1上查看vip,发现vip已经漂移过来了
    image

    手工切换完成!


    八、总结

    mm+keepalive的配置简单,相对于传统的主从架构,能实现比较简单的写库故障转移。
    文章参考:https://www.cnblogs.com/ivictor/p/5522383.html

  • 相关阅读:
    XGBoost参数
    算法
    Python2 和Python3 的区别
    解决ubuntu上ifconfig没有eth0/ens33且无法上网的问题
    Ubuntu 忘记root user密码 关闭图形界面
    rabbitmq消息队列
    CMDB 数据加密 最终整合API验证+AES数据加密
    CMDB API验证
    使用Python生成ASCII字符画
    google 技巧
  • 原文地址:https://www.cnblogs.com/ddzj01/p/11558135.html
Copyright © 2011-2022 走看看