、恢复MHA故障
1.手动修复
1)修复挂掉的数据库
[root@db01 ~]# systemctl start mysqld
2)找到主从语句
[root@db03 ~]# grep 'CHANGE MASTER TO' /service/mha/manager
Mon Nov 9 20:14:17 2020 - [info] All other slaves should start replication from here. Statement should be: CHANGE MASTER TO MASTER_HOST='172.16.1.52', MASTER_PORT=3306, MASTER_LOG_FILE='mysql-bin.000007', MASTER_LOG_POS=120, MASTER_USER='rep', MASTER_PASSWORD='xxx';
3)修复的数据库执行change语句
#修改一下语句中的密码,执行即可
mysql> CHANGE MASTER TO MASTER_HOST='172.16.1.52', MASTER_PORT=3306, MASTER_LOG_FILE='mysql-bin.000007', MASTER_LOG_POS=120, MASTER_USER='rep', MASTER_PASSWORD='123';
Query OK, 0 rows affected, 2 warnings (0.01 sec)
mysql> start slave;
Query OK, 0 rows affected (0.00 sec)
4)查看主从状态
mysql> show slave statusG
*************************** 1. row ***************************
Slave_IO_State: Waiting for master to send event
Master_Host: 172.16.1.52
Master_User: rep
Master_Port: 3306
Connect_Retry: 60
Master_Log_File: mysql-bin.000007
Read_Master_Log_Pos: 211
Relay_Log_File: db01-relay-bin.000002
Relay_Log_Pos: 374
Relay_Master_Log_File: mysql-bin.000007
Slave_IO_Running: Yes
Slave_SQL_Running: Yes
5)修复MHA配置
[root@db03 ~]# vim /service/mha/app1.cnf
[server default]
manager_log=/service/mha/manager
manager_workdir=/service/mha/app1
master_binlog_dir=/usr/local/mysql/data
password=mha
ping_interval=2
repl_password=123
repl_user=rep
ssh_user=root
user=mha
[server1]
hostname=172.16.1.51
port=3306
[server2]
hostname=172.16.1.52
port=3306
[server3]
hostname=172.16.1.53
port=3306
6)重新启动MHA
[root@db03 ~]# nohup masterha_manager --conf=/service/mha/app1.cnf --remove_dead_master_conf --ignore_last_failover < /dev/null > /service/mha/app1/manager.log 2>&1 &
2.脚本修复
1)创建脚本目录
[root@db02 ~]# mkdir /scripts
2)写脚本
[root@db02 ~]# vim /scripts/start_mha.sh
[root@db02 ~]# cat /scripts/start_mha.sh
#!/bin/bash
#1.启动数据库
systemctl start mysqld
#2.获取配置主从语句
change=`ssh 172.16.1.53 "grep 'CHANGE MASTER TO' /service/mha/manager | tail -1 | sed 's#xxx#123#g'" | awk -F: '{print $4}'`
#3.执行主从语句并启动线程
mysql -e "$change; start slave"
#4.替换MHA配置文件
ssh 172.16.1.53 "cp /service/mha/app1.bak /service/mha/app1.cnf"
#5.启动MHA
ssh 172.16.1.53 "nohup masterha_manager --conf=/service/mha/app1.cnf --remove_dead_master_conf --ignore_last_failover < /dev/null > /service/mha/app1/manager.log 2>&1 &"
3)加上判断
[root@db02 ~]# cat /scripts/start_mha.sh
#!/bin/bash
#1.mysql进程数赋值
mysqlpid=`ps -ef | grep [m]ysql | wc -l`
#2.判断MySQL是否假死,如果假死杀掉重启,如果关闭则启动
if [ $mysqlpid -eq 0 ];then
systemctl start mysqld
else
pkill mysqld
systemctl start mysqld
fi
#3.获取配置主从语句
change=`ssh 172.16.1.53 "grep 'CHANGE MASTER TO' /service/mha/manager | tail -1 | sed 's#xxx#123#g'" | awk -F: '{print $4}'`
#4.执行主从语句并启动线程
mysql -e "$change; start slave"
#5.替换MHA配置文件
ssh 172.16.1.53 "cp /service/mha/app1.bak /service/mha/app1.cnf"
#6.启动MHA
ssh 172.16.1.53 "nohup masterha_manager --conf=/service/mha/app1.cnf --remove_dead_master_conf --ignore_last_failover < /dev/null > /service/mha/app1/manager.log 2>&1 &"
三、MHA切换机制
1.MHA切换机制
1.读取配置中指定优先级的配置
candidate_master=1
check_repl_delay=0
2.如果没有配置优先级,读取数据最新的
3.如果数据量相同,读取主机标签,值越小越优先
2.测试标签优先级
#1.停掉MHA
[root@db03 ~]# masterha_stop --conf=/service/mha/app1.cnf
#2.配置MHA
[root@db03 ~]# vim /service/mha/app1.cnf
... ...
[server1]
hostname=172.16.1.51
port=3306
[server2]
hostname=172.16.1.52
port=3306
[server3]
hostname=172.16.1.53
port=3306
#3.重启MHA
[root@db03 ~]# nohup masterha_manager --conf=/service/mha/app1.cnf --remove_dead_master_conf --ignore_last_failover < /dev/null > /service/mha/app1/manager.log 2>&1 &
#4.停掉主库
[root@db02 ~]# systemctl stop mysqld
#5.查看主从
[root@db01 ~]# mysql
mysql> show slave statusG
*************************** 1. row ***************************
Slave_IO_State: Waiting for master to send event
Master_Host: 172.16.1.53
Master_User: rep
Master_Port: 3306
Connect_Retry: 60
Master_Log_File: mysql-bin.000008
Read_Master_Log_Pos: 120
Relay_Log_File: db01-relay-bin.000002
Relay_Log_Pos: 283
Relay_Master_Log_File: mysql-bin.000008
Slave_IO_Running: Yes
Slave_SQL_Running: Yes
3.指定优先级测试
#配置优先级
[root@db03 ~]# vim /service/mha/app1.cnf
... ...
[server3]
candidate_master=1
check_repl_delay=0
hostname=172.16.1.53
port=3306
#重启MHA
[root@db03 ~]# nohup masterha_manager --conf=/service/mha/app1.cnf --remove_dead_master_conf --ignore_last_failover < /dev/null > /service/mha/app1/manager.log 2>&1 &
#停止主库
[root@db01 ~]# systemctl stop mysqld
4.测试数据最新的优先级
1)去掉优先级配置
[root@db03 ~]# masterha_stop --conf=/service/mha/app1.cnf
#去掉优先级配置
#candidate_master=1
#check_repl_delay=0
#重启mha
[root@db03 ~]# nohup masterha_manager --conf=/service/mha/app1.cnf --remove_dead_master_conf --ignore_last_failover < /dev/null > /service/mha/app1/manager.log 2>&1 &
2)主库建库建表
mysql> create database youxianji;
Query OK, 1 row affected (0.00 sec)
mysql> use youxianji
Database changed
mysql> create table linux10(id int not null primary key auto_increment,name varchar(10));
Query OK, 0 rows affected (0.02 sec)
3)编写脚本插入数据
[root@db03 ~]# mkdir /scripts
[root@db03 ~]# vim /scripts/insert.sh
#!/bin/bash
while true;do
mysql -e "use youxianji;insert linux10(name) values('qiudao')"
done
4)停掉db01的IO线程
[root@db01 ~]# mysql
mysql> stop slave io_thread;
5)停掉主库
[root@db03 ~]# systemctl stop mysqld
6)查看从库状态
[root@db01 ~]# mysql
mysql> show slave statusG
*************************** 1. row ***************************
Slave_IO_State: Waiting for master to send event
Master_Host: 172.16.1.52
Master_User: rep
Master_Port: 3306
Connect_Retry: 60
Master_Log_File: mysql-bin.000010
Read_Master_Log_Pos: 1524539
Relay_Log_File: db01-relay-bin.000002
Relay_Log_Pos: 283
Relay_Master_Log_File: mysql-bin.000010
Slave_IO_Running: Yes
Slave_SQL_Running: Yes
四、主库断电,binlog如何保存
1.配置binlog-server
[root@db03 ~]# cat /service/mha/app1.cnf
... ...
[server1]
hostname=172.16.1.51
port=3306
[server2]
hostname=172.16.1.52
port=3306
[server3]
hostname=172.16.1.53
port=3306
[binlog1]
no_master=1
hostname=172.16.1.53
master_binlog_dir=/data/mysql/binlog/
2.创建存放binlog的目录
[root@db03 ~]# mkdir /data/mysql/binlog/ -p
3.手动执行实时备份binlog的命令
[root@db03 ~]# cd /data/mysql/binlog/
#备份binlog命令
[root@db03 /data/mysql/binlog]# mysqlbinlog -R --host=172.16.1.51 --user=mha --password=mha --raw --stop-never mysql-bin.000001 &
#参数详解
-R # 从远端服务器获取binlog
--host=172.16.1.51 # 指定远端的主机
--user=mha # 数据库mha用户
--password=mha # 数据库mha用户的密码
--raw # binlog获取时的一种格式
--stop-never mysql-bin.000001 # 从mysql-bin.000001开始不停的备份binlog
4.启动mha
#启动mha
nohup masterha_manager --conf=/service/mha/app1.cnf --remove_dead_master_conf --ignore_last_failover < /dev/null > /service/mha/app1/manager.log 2>&1 &
5.加入恢复MHA进程脚本
[root@db02 ~]# cat /scripts/start_mha.sh
#!/bin/bash
#1.mysql进程数赋值
mysqlpid=`ps -ef | grep [m]ysql | wc -l`
#2.判断MySQL是否假死,如果假死杀掉重启,如果关闭则启动
if [ $mysqlpid -eq 0 ];then
systemctl start mysqld
else
pkill mysqld
systemctl start mysqld
fi
#3.获取配置主从语句
change=`ssh 172.16.1.53 "grep 'CHANGE MASTER TO' /service/mha/manager | tail -1 | sed 's#xxx#123#g'" | awk -F: '{print $4}'`
#4.执行主从语句并启动线程
mysql -e "$change; start slave"
#5.获取主节点IP
master_ip=`ssh 172.16.1.53 "mysql -e 'show slave statusG'" | awk 'NR==3 {print $2}'`
#6.启动实时获取binlog进程
ssh 172.16.1.53 "cd /data/mysql/binlog && mysqlbinlog -R --host=$master_ip --user=mha --password=mha --raw --stop-never mysql-bin.000001 &> /dev/null &"
#7.替换MHA配置文件
ssh 172.16.1.53 "cp /service/mha/app1.bak /service/mha/app1.cnf"
#8.启动MHA
ssh 172.16.1.53 "nohup masterha_manager --conf=/service/mha/app1.cnf --remove_dead_master_conf --ignore_last_failover < /dev/null > /service/mha/app1/manager.log 2>&1 &"
6.拆成两个脚本
[root@db01 ~]# cat /scripts/start_mha.sh
#!/bin/bash
#1.mysql进程数赋值
mysqlpid=`ps -ef | grep [m]ysql | wc -l`
#2.判断MySQL是否假死,如果假死杀掉重启,如果关闭则启动
if [ $mysqlpid -eq 0 ];then
systemctl start mysqld
else
pkill mysqld
systemctl start mysqld
fi
sleep 3
#3.获取配置主从语句
change=`ssh 172.16.1.53 "grep 'CHANGE MASTER TO' /service/mha/manager | tail -1 | sed 's#xxx#123#g'" | awk -F: '{print $4}'`
#4.执行主从语句并启动线程
mysql -e "$change; start slave" 2>&1
#5.远程执行启动mha脚本
ssh 172.16.1.53 "sh /scripts/recovery.sh"
[root@db03 ~]# cat /scripts/recovery.sh
#1.获取主节点IP
master_ip=`mysql -e 'show slave statusG' | awk 'NR==3 {print $2}'`
#2.进入保存binlog目录
cd /data/mysql/binlog
#3.启动实时获取binlog进程
mysqlbinlog -R --host=$master_ip --user=mha --password=mha --raw --stop-never mysql-bin.000001&> /dev/null &
#4.替换MHA配置文件
/usr/bin/cp /service/mha/app1.bak /service/mha/app1.cnf
#8.启动MHA
nohup masterha_manager --conf=/service/mha/app1.cnf --remove_dead_master_conf --ignore_last_failover < /dev/null > /service/mha/app1/manager.log 2>&1 &