故障场景:昨天自建机房突然断电,检查一下数据库状态以及主从复制状态
(crmDB)root@localhost [(none)]> show slave statusG
*************************** 1. row ***************************
Slave_IO_State:
Master_Host: 172.16.117.247
Master_User: slave
Master_Port: 3306
Connect_Retry: 60
Master_Log_File: mysql-bin.000262
Read_Master_Log_Pos: 342439502
Relay_Log_File: relay-bin.000042
Relay_Log_Pos: 342439665
Relay_Master_Log_File: mysql-bin.000262
Slave_IO_Running: No
Slave_SQL_Running: Yes
Replicate_Do_DB:
Replicate_Ignore_DB:
Replicate_Do_Table:
Replicate_Ignore_Table:
Replicate_Wild_Do_Table:
Replicate_Wild_Ignore_Table:
Last_Errno: 0
Last_Error:
Skip_Counter: 0
Exec_Master_Log_Pos: 342439502
Relay_Log_Space: 342439879
Until_Condition: None
Until_Log_File:
Until_Log_Pos: 0
Master_SSL_Allowed: No
Master_SSL_CA_File:
Master_SSL_CA_Path:
Master_SSL_Cert:
Master_SSL_Cipher:
Master_SSL_Key:
Seconds_Behind_Master: NULL
Master_SSL_Verify_Server_Cert: No
Last_IO_Errno: 1236
Last_IO_Error: Got fatal error 1236 from master when reading data from binary log: 'Client requested master to start replication from position > file size; the first event 'mysql-bin.000262' at 342439502, the last event read from '/data/data/mysql-bin.000262' at 4, the last byte read from '/data/data/mysql-bin.000262' at 4.'
Last_SQL_Errno: 0
Last_SQL_Error:
Replicate_Ignore_Server_Ids:
Master_Server_Id: 247
Master_UUID: db5c1497-9cf4-11e3-8259-77a6dd604747
Master_Info_File: /data/3306/data/master.info
SQL_Delay: 0
SQL_Remaining_Delay: NULL
Slave_SQL_Running_State: Slave has read all relay log; waiting for the slave I/O thread to update it
Master_Retry_Count: 86400
Master_Bind:
Last_IO_Error_Timestamp: 150717 09:59:39
Last_SQL_Error_Timestamp:
Master_SSL_Crl:
Master_SSL_Crlpath:
Retrieved_Gtid_Set:
Executed_Gtid_Set:
Auto_Position: 0
1 row in set (0.00 sec)
(crmDB)root@localhost [(none)]>
根据报错信息,看看主库binlog发生了啥
mysqlbinlog --start-position=342439502 /data/data/mysql-bin.000262 > /tmp/binlog.tx
[root@crmdbM data]# cat /tmp/binlog.txt
/*!50530 SET @@SESSION.PSEUDO_SLAVE_MODE=1*/;
/*!40019 SET @@session.max_insert_delayed_threads=0*/;
/*!50003 SET @OLD_COMPLETION_TYPE=@@COMPLETION_TYPE,COMPLETION_TYPE=0*/;
DELIMITER /*!*/;
# at 4
#150716 4:10:02 server id 247 end_log_pos 120 CRC32 0x0b64a00d Start: binlog v 4, server v 5.6.14-log created 150716 4:10:02
BINLOG '
Gr6mVQ/3AAAAdAAAAHgAAAAAAAQANS42LjE0LWxvZwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AAAAAAAAAAAAAAAAAAAAAAAAEzgNAAgAEgAEBAQEEgAAXAAEGggAAAAICAgCAAAACgoKGRkAAQ2g
ZAs=
'/*!*/;
DELIMITER ;
# End of log file
ROLLBACK /* added by mysqlbinlog */;
/*!50003 SET COMPLETION_TYPE=@OLD_COMPLETION_TYPE*/;
/*!50530 SET @@SESSION.PSEUDO_SLAVE_MODE=0*/;
################################
没有看到这个位置,这是mysql-bin.000262最开头的位置,
[root@crmdbM data]# mysqlbinlog /data/data/mysql-bin.000262 | head -n 10
/*!50530 SET @@SESSION.PSEUDO_SLAVE_MODE=1*/;
/*!40019 SET @@session.max_insert_delayed_threads=0*/;
/*!50003 SET @OLD_COMPLETION_TYPE=@@COMPLETION_TYPE,COMPLETION_TYPE=0*/;
DELIMITER /*!*/;
# at 4
#150716 4:10:02 server id 247 end_log_pos 120 CRC32 0x0b64a00d Start: binlog v 4, server v 5.6.14-log created 150716 4:10:02
BINLOG '
Gr6mVQ/3AAAAdAAAAHgAAAAAAAQANS42LjE0LWxvZwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AAAAAAAAAAAAAAAAAAAAAAAAEzgNAAgAEgAEBAQEEgAAXAAEGggAAAAICAgCAAAACgoKGRkAAQ2g
ZAs=
'/*!*/;
既然没有,那么看一下整个binlog最后部分pos点信息,(报错是说slave IO接受到的pos点信息比主库的还大)
那么我们看一下解析出来的binlog最后几行信息
mysqlbinlog /data/data/mysql-bin.000262 > /tmp/binlog1.txt
[root@crmdbM data]# tail -n 20 /tmp/binlog1.txt
#150717 1:21:06 server id 247 end_log_pos 342403057 CRC32 0xbac8e194 Table_map: `crm`.`sky_consultant_statis` mapped to number 7724
# at 342403057
#150717 1:21:06 server id 247 end_log_pos 342403240 CRC32 0x38327494 Write_rows: table id 7724 flags: STMT_END_F
BINLOG '
AuinVRP3AAAAaQAAAPGnaBQAACweAAAAAAEAA2NybQAVc2t5X2NvbnN1bHRhbnRfc3RhdGlzACII
DwoRAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwP2AwMDBYcAAAsC8P///wOU4ci6
AuinVR73AAAAtwAAAKioaBQAACweAAAAAAEAAgAi//////8AAAAM/Eg+AAAAAAAACmNoZW5qdW5o
dWnwvg9Vp9UAAAAAABoAAAAAAAAAoyMAAI8AAAAKAAAABAAAAAYAAAAAAAAAAAAAAAAAAAAAAAAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAIUAAAAEAAAAgQAAAAAAAAAAAAAAgAAAAAAAAAAA
AAAAAAAAAACUdDI4
'/*!*/;
# at 342403240
#150717 1:21:06 server id 247 end_log_pos 342403271 CRC32 0xd0b514e1 Xid = 2027427035
COMMIT/*!*/;
DELIMITER ;
# End of log file
ROLLBACK /* added by mysqlbinlog */;
/*!50003 SET COMPLETION_TYPE=@OLD_COMPLETION_TYPE*/;
/*!50530 SET @@SESSION.PSEUDO_SLAVE_MODE=0*/;
看到没有,主库binlog最大才342403240 ,报错信息是342439502 相差36262,既然从库说已经超过了主库的binlog,SQL线程已经执行到这个位置了
Exec_Master_Log_Pos: 342439502,而且主库断电重启,重新生成了binlog,那么我们重新指定下一个binlog文件开始
[root@crmdbM data]# mysqlbinlog /data/data/mysql-bin.000263 | head -n 20
/*!50530 SET @@SESSION.PSEUDO_SLAVE_MODE=1*/;
/*!40019 SET @@session.max_insert_delayed_threads=0*/;
/*!50003 SET @OLD_COMPLETION_TYPE=@@COMPLETION_TYPE,COMPLETION_TYPE=0*/;
DELIMITER /*!*/;
# at 4
#150717 2:29:45 server id 247 end_log_pos 120 CRC32 0xa89426a5 Start: binlog v 4, server v 5.6.14-log created 150717 2:29:45 at startup
ROLLBACK/*!*/;
BINLOG '
GfinVQ/3AAAAdAAAAHgAAAAAAAQANS42LjE0LWxvZwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AAAAAAAAAAAAAAAAAAAZ+KdVEzgNAAgAEgAEBAQEEgAAXAAEGggAAAAICAgCAAAACgoKGRkAAaUm
lKg=
'/*!*/;
# at 120
#150717 2:40:45 server id 247 end_log_pos 199 CRC32 0xe654b6a6 Query thread_id=35 exec_time=0 error_code=0
SET TIMESTAMP=1437072045/*!*/;
SET @@session.pseudo_thread_id=35/*!*/;
SET @@session.foreign_key_checks=1, @@session.sql_auto_is_null=0, @@session.unique_checks=1, @@session.autocommit=1/*!*/;
SET @@session.sql_mode=1075838976/*!*/;
SET @@session.auto_increment_increment=1, @@session.auto_increment_offset=1/*!*/;
/*!C utf8 *//*!*/;
[root@crmdbM data]#
binlog第一个pos点是4,就可以指定pos点了
(crmDB)root@localhost [(none)]> change master to master_host='172.16.117.247',master_user='slave',master_password='slave',master_log_file='mysql-bin.000263',master_log_pos=4;
Query OK, 0 rows affected, 2 warnings (0.05 sec)
(crmDB)root@localhost [(none)]> start slave;
Query OK, 0 rows affected (0.00 sec)
(crmDB)root@localhost [(none)]> show slave statusG
*************************** 1. row ***************************
Slave_IO_State: Waiting for master to send event
Master_Host: 172.16.117.247
Master_User: slave
Master_Port: 3306
Connect_Retry: 60
Master_Log_File: mysql-bin.000265
Read_Master_Log_Pos: 60234822
Relay_Log_File: relay-bin.000005
Relay_Log_Pos: 18238150
Relay_Master_Log_File: mysql-bin.000265
Slave_IO_Running: Yes
Slave_SQL_Running: Yes
Replicate_Do_DB:
Replicate_Ignore_DB:
Replicate_Do_Table:
Replicate_Ignore_Table:
Replicate_Wild_Do_Table:
Replicate_Wild_Ignore_Table:
Last_Errno: 0
Last_Error:
Skip_Counter: 0
Exec_Master_Log_Pos: 18237987
Relay_Log_Space: 60235199
Until_Condition: None
Until_Log_File:
Until_Log_Pos: 0
Master_SSL_Allowed: No
Master_SSL_CA_File:
Master_SSL_CA_Path:
Master_SSL_Cert:
Master_SSL_Cipher:
Master_SSL_Key:
Seconds_Behind_Master: 2584
Master_SSL_Verify_Server_Cert: No
Last_IO_Errno: 0
Last_IO_Error:
Last_SQL_Errno: 0
Last_SQL_Error:
Replicate_Ignore_Server_Ids:
Master_Server_Id: 247
Master_UUID: db5c1497-9cf4-11e3-8259-77a6dd604747
Master_Info_File: /data/3306/data/master.info
SQL_Delay: 0
SQL_Remaining_Delay: NULL
Slave_SQL_Running_State: System lock
Master_Retry_Count: 86400
Master_Bind:
Last_IO_Error_Timestamp:
Last_SQL_Error_Timestamp:
Master_SSL_Crl:
Master_SSL_Crlpath:
Retrieved_Gtid_Set:
Executed_Gtid_Set:
Auto_Position: 0
1 row in set (0.00 sec)
(crmDB)root@localhost [(none)]>
没有主键冲突,
然后写个脚本,在凌晨,用pt-table-checksum检测一下库是否有主从差异
########
最终解决方法,主库这两个参数
(crmDB247)root@localhost [(none)]> show global variables like '%sync_binlog%';
+---------------+-------+
| Variable_name | Value |
+---------------+-------+
| sync_binlog | 0 |
+---------------+-------+
1 row in set (0.00 sec)
(crmDB247)root@localhost [(none)]>
mysql> show global variables like '%at_trx%';
+--------------------------------+-------+
| Variable_name | Value |
+--------------------------------+-------+
| innodb_flush_log_at_trx_commit | 2 |
+--------------------------------+-------+
1 row in set (0.00 sec)
mysql>
主从复制必须为双1
sync_binlog = 1
innodb_flush_log_at_trx_commit = 1
从库必须配置为###
relay_log = /data/3306/logs/relay-bin
log_slave_updates = 1
skip_slave_start = 1
relay_log_recovery =1
####及时同步sync_master_info sync_relay_log_info
master_info_repository = TABLE
relay-log-info-repository = TABLE