最近大体自己写了点alwayson相关的监控,是通过存储过程的方法,做个笔记如下:
--alwayson启用状态 declare @isenabled sql_variant SELECT @isenabled=SERVERPROPERTY ( 'IsHadrEnabled' ) if @isenabled=1 begin print 'is enabled(alwayson启用状态)' end ---------监控集群的状态 if exists(SELECT * FROM sys.dm_hadr_cluster_members where member_state <>1) begin print 'alwayson集群有拖机机器,请查看' end ------------监控副本同步状态 if exists(select * from sys.dm_hadr_availability_replica_states where synchronization_health<>2) begin print 'alwayson集群同步异常,请查看' end ---------------监控数据库同步状态 if exists(select * from sys.dm_hadr_database_replica_states where synchronization_state_desc NOT IN('SYNCHRONIZED','SYNCHRONIZED')) begin print 'alwayson有数据库同步异常,请查看' end if exists( select * from sys.dm_tcp_listener_states where state_desc not in('ONLINE')) begin print 'alwayson侦听器异常,请查看' end -----------------主从延迟的监控 IF EXISTS( SELECT ag.name AS ag_name, ar.replica_server_name AS ag_replica_server, dr_state.database_id as database_id, dr_state.log_send_queue_size, is_ag_replica_local = CASE WHEN ar_state.is_local = 1 THEN N'LOCAL' ELSE 'REMOTE' END , ag_replica_role = CASE WHEN ar_state.role_desc IS NULL THEN N'DISCONNECTED' ELSE ar_state.role_desc END FROM (( sys.availability_groups AS ag JOIN sys.availability_replicas AS ar ON ag.group_id = ar.group_id ) JOIN sys.dm_hadr_availability_replica_states AS ar_state ON ar.replica_id = ar_state.replica_id) JOIN sys.dm_hadr_database_replica_states dr_state on ag.group_id = dr_state.group_id and dr_state.replica_id = ar_state.replica_id where dr_state.log_send_queue_size>300 ) begin print 'alwayson主体有超过300M的日志没有同步,请尽快查看' end if exists( SELECT ag.name AS ag_name, ar.replica_server_name AS ag_replica_server, dr_state.database_id as database_id, dr_state.redo_queue_size, is_ag_replica_local = CASE WHEN ar_state.is_local = 1 THEN N'LOCAL' ELSE 'REMOTE' END , ag_replica_role = CASE WHEN ar_state.role_desc IS NULL THEN N'DISCONNECTED' ELSE ar_state.role_desc END FROM (( sys.availability_groups AS ag JOIN sys.availability_replicas AS ar ON ag.group_id = ar.group_id ) JOIN sys.dm_hadr_availability_replica_states AS ar_state ON ar.replica_id = ar_state.replica_id) JOIN sys.dm_hadr_database_replica_states dr_state on ag.group_id = dr_state.group_id and dr_state.replica_id = ar_state.replica_id where dr_state.redo_queue_size>300 ) begin print 'alwayson副本有较多日志未重做,请登录查看' end ---------------主从状态切换等监控 DECLARE @MASTER VARCHAR(20) --select @MASTER=a.replica_server_name,a.join_state,a.join_state_desc,b.is_local ,b.role,b.role_desc from sys.dm_hadr_availability_replica_cluster_states a join sys.dm_hadr_availability_replica_states b --on a.replica_id=b.replica_id where b.role=1 --and a.replica_server_name='ALWAYSON241' select @MASTER=a.replica_server_name from sys.dm_hadr_availability_replica_cluster_states a join sys.dm_hadr_availability_replica_states b on a.replica_id=b.replica_id where b.role=1 --and a.replica_server_name='ALWAYSON241' print @MASTER IF @MASTER IS NULL BEGIN PRINT '主从同步异常' END ELSE IF @MASTER<>'ALWAYSON241' BEGIN PRINT 'alwayson进行了切换,现在主库是'+@master END ------------------------监控alwayson数据库状态 if exists(select * from sys.dm_hadr_database_replica_states a where a.database_state<>0 or a.synchronization_state not in(1,2)) begin print 'alwayson有数据库异常' end -----------发邮件预警 ----declare @name nvarchar(222) ----declare @mailbody nvarchar(4000); ----declare @maillabel nvarchar(100); ----set @maillabel='作业失败' --EXEC msdb.dbo.sp_send_dbmail @profile_name = 'sendmail', -- @recipients = '729975475@qq.com', -- @subject = 'alwayson预警', -- @body = 'alwayson故障
于alwayson相关的系统视图主要涉及以下一些(这些系统的存储过程能让我们很直观的了解到我们可用性组的状态):
--监控alwayson可用性管理器是否启动 SELECT SERVERPROPERTY ( 'HadrManagerStatus' ) --监控alwayson可用性组是否启动 SELECT SERVERPROPERTY ( 'IsHadrEnabled' ) --查看服务器的集群信息 SELECT * FROM sys.dm_hadr_cluster SELECT * FROM sys.dm_hadr_cluster_members select * from sys.dm_hadr_availability_replica_cluster_nodes SELECT * FROM sys.dm_hadr_name_id_map ----------------------------------------------- SELECT * FROM sys.availability_groups SELECT * FROM sys.availability_groups_cluster SELECT * FROM sys.dm_hadr_availability_group_states --监控可用性副本 select * from sys.availability_replicas --查看只读路由 select * from sys.availability_read_only_routing_lists ---监控可用性副本状态 select * from sys.dm_hadr_availability_replica_cluster_states select * from sys.dm_hadr_availability_replica_states --synchronization_health的状态为2才算正常。需要监控 --监视可用性数据库 select * from sys.availability_databases_cluster select * from sys.databases where name in(select database_name from sys.availability_databases_cluster) --该视图包含对应于给定主/辅助数据库上最新自动页修复尝试的行,每个数据库最多可对应 100 行,如果存储数据就要手动查看 select * from sys.dm_hadr_auto_page_repair --除了已同步和为同步的alwayson的状态 select * from sys.dm_hadr_database_replica_states where synchronization_state_desc NOT IN('SYNCHRONIZED','SYNCHRONIZED') -------------------------监视可用性组侦听器的监控 select * from sys.availability_group_listener_ip_addresses where state_desc='ONLINE' select * from sys.availability_group_listeners -----我们可以用 NOT EXISTS来判断 select dns_name,port,ip_address from sys.availability_group_listener_ip_addresses A JOIN sys.availability_group_listeners B ON a.listener_id=b.listener_id where a.state_desc='ONLINE' --------------------------------------- select * from sys.dm_tcp_listener_states where state_desc not in('ONLINE')