zoukankan      html  css  js  c++  java
  • Nagios监控Oralce

    一、本文说明:

       本文是监控本地的Oracle,其实监控远端的Oracle也是跟下面的步骤差不多的。

    二、安装Nagios、Nagios插件、NRPE软件:

        安装步骤可以参考《Linux下Nagios的安装与配置

        注意点:
        1、由于nagios脚本需要读取oracle相关文件。所在运行nagios的用户需要定义为Oracle服务用户。并且修改/etc/xinted.d/nrpe中配置。

    [oracle@rhel5 libexec]$ cat /etc/xinetd.d/nrpe 
    # default: on
    # description: NRPE (Nagios Remote Plugin Executor)
    service nrpe
    {
               flags           = REUSE
            socket_type     = stream    
        port        = 5666    
               wait            = no
            user            = oracle
        group        = oinstall
               server          = /usr/local/nagios/bin/nrpe
            server_args     = -c /usr/local/nagios/etc/nrpe.cfg --inetd
               log_on_failure  += USERID
            disable         = no
        only_from       = 127.0.0.1 192.168.11.149
    }

        2、修改check_oracle脚本,将$ORACLE_HOME以及$PATH手动加入。

    [oracle@rhel5 libexec]$ cat /usr/local/nagios/libexec/check_oracle 
    #! /bin/sh
    #
    # latigid010@yahoo.com
    # 01/06/2000
    #
    #  This Nagios plugin was created to check Oracle status
    #
    
    
    ORACLE_HOME=/u01/app/oracle/product/11.2.0/db_1
    PATH=$PATH:/u01/app/oracle/product/11.2.0/db_1/bin

    三、配置nrpe服务:

        修改/usr/local/nagios/etc/nrpe.cfg文件。加入以下内容:

    [oracle@rhel5 libexec]$ cat /usr/local/nagios/etc/nrpe.cfg
    
    #Check Oracle  
     
    command[check_oracle_tns]=/usr/local/nagios/libexec/check_oracle --tns orcl jack jack  
     
    command[check_oracle_db]=/usr/local/nagios/libexec/check_oracle --db orcl  
     
    command[check_oracle_login]=/usr/local/nagios/libexec/check_oracle --login orcl jack jack
     
    command[check_oracle_cache]=/usr/local/nagios/libexec/check_oracle --cache orcl system oracle 80 90  
     
    command[check_oracle_tablespace]=/usr/local/nagios/libexec/check_oracle --tablespace orcl jack jack jack  90 80  

        具体参数写法参考 check_oracle -help

    [oracle@rhel5 libexec]$ ./check_oracle -help
    Usage:
      check_oracle --tns <Oracle Sid or Hostname/IP address>
      check_oracle --db <ORACLE_SID>
      check_oracle --login <ORACLE_SID>
      check_oracle --cache <ORACLE_SID> <USER> <PASS> <CRITICAL> <WARNING>
      check_oracle --tablespace <ORACLE_SID> <USER> <PASS> <TABLESPACE> <CRITICAL> <WARNING>
      check_oracle --oranames <Hostname>
      check_oracle --help
      check_oracle --version

        添加nrpe端口号

    [oracle@rhel5 libexec]$ tail -4 /etc/services 
    iqobject    48619/tcp            # iqobject
    iqobject    48619/udp            # iqobject
    # Local services
    nrpe            5666/tcp                        #nrpe

        配置完成后,重启xinetd服务

    [oracle@rhel5 libexec]$ service xinetd restart

    四、配置Nagios:
        1、在nagios服务器端添加nrpe命令配置。修改/usr/local/nagios/etc/objects/command.cfg文件:

    [oracle@rhel5 etc]$ tail -10 objects/commands.cfg
    define command{
        command_name    process-service-perfdata
        command_line    /usr/bin/printf "%b" "$LASTSERVICECHECK$	$HOSTNAME$	$SERVICEDESC$	$SERVICESTATE$	$SERVICEATTEMPT$	$SERVICESTATETYPE$	$SERVICEEXECUTIONTIME$	$SERVICELATENCY$	$SERVICEOUTPUT$	$SERVICEPERFDATA$
    " >> /usr/local/nagios/var/service-perfdata.out
        }
    
    #'check_nrpe' command definition
      define command{
                command_name   check_nrpe
                command_line   $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$
                }

        2、添加hosts.cfg和services.cfg

    [oracle@rhel5 etc]$ cat hosts.cfg 
    define host{
            use                     linux-server2
            host_name               oracle
            alias                   Nagios-node2
            address                 192.168.11.149
            }
    
    define hostgroup{      
            hostgroup_name          bsmart-servers      
            alias                   bsmart servers        
            members                 oracle
            }
    [oracle@rhel5 etc]$ cat services.cfg 
     
    define service {  
     
    use generic-service  
     
    host_name oracle  
     
    service_description TNS Check  
     
    check_command check_nrpe!check_oracle_tns  
     
    }  
     
    define service {  
     
    use generic-service  
     
    host_name oracle  
     
    service_description DB Check  
     
    check_command check_nrpe!check_oracle_db  
     
    }  
     
    define service {  
     
    use generic-service  
     
    host_name oracle  
     
    service_description Login Check  
     
    check_command check_nrpe!check_oracle_login  
     
    }  
     
    define service {  
     
    use generic-service  
     
    host_name oracle  
     
    service_description Cache Check  
     
    check_command check_nrpe!check_oracle_cache  
    
    notifications_enabled 0
     
    }  
     
    define service {  
     
    use generic-service  
     
    host_name oracle  
     
    service_description Tablespace Check  
     
    check_command check_nrpe!check_oracle_tablespace  
     
    }  

        3、在templates.cfg中添加如下内容:

    define host{
            name                            linux-server2    ; The name of this host template
            use                             generic-host    ; This template inherits other values from the generic-host template
            check_period                    24x7            ; By default, Linux hosts are checked round the clock
            check_interval                  5               ; Actively check the host every 5 minutes
            retry_interval                  1               ; Schedule host check retries at 1 minute intervals
            max_check_attempts              10              ; Check each Linux host 10 times (max)
            check_command                   check-host-alive ; Default command to check Linux hosts
            notification_period             workhours       ; Linux admins hate to be woken up, so we only notify during the day
                                                            ; Note that the notification_period variable is being overridden from
                                                            ; the value that is inherited from the generic-host template!
            notification_interval           120             ; Resend notifications every 2 hours
            notification_options            d,u,r           ; Only send notifications for specific host states
            contact_groups                  admins          ; Notifications get sent to the admins by default
            register                        0               ; DONT REGISTER THIS DEFINITION - ITS NOT A REAL HOST, JUST A TEMPLATE!
            }

    五、重点说明:

        由于nagios的用户是oracle,所以在nagios启动的命令应该使用:

        [oracle@rhel5 etc]$ /usr/local/nagios/bin/nagios -d /usr/local/nagios/etc/nagios.cfg

        关闭命令使用:

        [oracle@rhel5 etc]$ killall nagios

    [oracle@rhel5 etc]$ ll
    总计 148
    -rw-rw-r-- 1 oracle oinstall 11437 09-27 19:26 cgi.cfg
    -rw-r--r-- 1 oracle oinstall 11408 09-27 19:20 cgi.cfg.bak
    -rw-r--r-- 1 oracle oinstall   382 09-27 19:59 hosts.cfg
    -rw-r--r-- 1 oracle oinstall    44 09-27 17:17 htpasswd
    -rw-r--r-- 1 oracle oinstall    44 09-27 19:20 htpasswd.bak
    -rw-rw-r-- 1 oracle oinstall 43863 09-27 20:18 nagios.cfg
    -rw-r--r-- 1 oracle oinstall 43774 09-27 19:20 nagios.cfg.bak
    -rw-r--r-- 1 oracle oinstall  7834 09-27 21:12 nrpe.cfg
    drwxrwxr-x 2 oracle oinstall  4096 09-27 21:35 objects
    -rw-rw---- 1 oracle oinstall  1340 09-27 16:42 resource.cfg
    -rw-r----- 1 oracle oinstall  1340 09-27 19:20 resource.cfg.bak
    -rw-r--r-- 1 oracle oinstall   805 09-27 21:16 services.cfg
    [oracle@rhel5 etc]$ ll objects/
    总计 100
    -rw-rw-r-- 1 oracle oinstall  7891 09-27 19:44 commands.cfg
    -rw-r--r-- 1 oracle oinstall  7716 09-27 19:19 commands.cfg.bak
    -rw-rw-r-- 1 oracle oinstall  2153 09-27 19:24 contacts.cfg
    -rw-r--r-- 1 oracle oinstall  2166 09-27 19:19 contacts.cfg.bak
    -rw-rw-r-- 1 oracle oinstall  5386 09-27 19:22 localhost.cfg
    -rw-r--r-- 1 oracle oinstall  5403 09-27 19:19 localhost.cfg.bak
    -rw-rw-r-- 1 oracle oinstall  3124 09-27 16:42 printer.cfg
    -rw-r--r-- 1 oracle oinstall  3124 09-27 19:19 printer.cfg.bak
    -rw-rw-r-- 1 oracle oinstall  3293 09-27 16:42 switch.cfg
    -rw-r--r-- 1 oracle oinstall  3293 09-27 19:19 switch.cfg.bak
    -rw-rw-r-- 1 oracle oinstall 12360 09-27 20:00 templates.cfg
    -rw-r--r-- 1 oracle oinstall 10812 09-27 19:19 templates.cfg.bak
    -rw-rw-r-- 1 oracle oinstall  3208 09-27 16:42 timeperiods.cfg
    -rw-r--r-- 1 oracle oinstall  3208 09-27 19:20 timeperiods.cfg.bak
    -rw-rw-r-- 1 oracle oinstall  4019 09-27 16:42 windows.cfg
    -rw-r--r-- 1 oracle oinstall  4019 09-27 19:20 windows.cfg.bak

    七、nagios网页截图:

  • 相关阅读:
    Flink实例(117):FLINK-SQL应用场景(16)以upsert的方式读写Kafka数据——以Flink1.12为例(二)
    Flink实例(116):FLINK-SQL应用场景(15)以upsert的方式读写Kafka数据——以Flink1.12为例(一)
    数据挖掘实践(17):基础理论(十七)数据挖掘基础(四)模型解释
    数据挖掘实践(16):基础理论(十六)数据挖掘基础(三)特征工程(二)性能度量与评估方法
    rust 可变变量
    Rust学习(32):智能指针-Rc<T>
    rust 高级编程
    rust 所有权
    rust智能指针
    Anbox:容器中的 Android,anboxandroid
  • 原文地址:https://www.cnblogs.com/Richardzhu/p/3343633.html
Copyright © 2011-2022 走看看