zoukankan      html  css  js  c++  java
  • nagios部署+短信和邮件报警

    操作系统 CentOS6.6 

    服务端:10.0.0.20

    客户端:10.0.0.50

    一.nagios的服务端安装部署

    1.nagios安装

    [root@manager src]# rz
    rz waiting to receive.
    Starting zmodem transfer. Press Ctrl+C to cancel.
    Transferring nagios-cn-3.2.3.tar.bz2...
    100% 9412 KB 9412 KB/sec 00:00:01 0 Errors

    tar xf nagios-cn-3.2.3.tar.bz2 
    cd nagios-cn-3.2.3
    useradd -m -s /bin/bash nagios
    groupadd nagcmd

      make install; make install-init;make install-commandmode;make install-config;make install-webconf
      ll /usr/local/nagios/  #出现以下文件代表成功

    drwxrwxr-x 2 nagios nagios 4096 Mar 12 08:08 bin
    drwxrwxr-x 3 nagios nagios 4096 Mar 12 08:08 etc
    drwxrwxr-x 2 nagios nagios 4096 Mar 12 08:08 libexec
    drwxrwxr-x 2 nagios nagios 4096 Mar 12 08:08 sbin
    drwxrwxr-x 9 nagios nagios 4096 Mar 12 08:08 share
    drwxrwxr-x 6 nagios nagios 4096 Mar 12 08:08 var

      chmod o+rwx /usr/local/nagios/var/rw

     2.nagios plugin 插件安装

    [root@manager src]# rz
    rz waiting to receive.
    Starting zmodem transfer.  Press Ctrl+C to cancel.
    Transferring nagios-plugins-1.4.13.tar.gz...
      100%    2226 KB    2226 KB/sec    00:00:01       0 Errors 
    [root@manager src]# tar xf nagios-plugins-1.4.13.tar.gz  
    [root@manager src]# cd nagios-plugins-1.4.13

    #安装依赖

    yum install make apr* autoconf automake curl curl-devel gcc gcc-c++ zlib-devel 
    openssl openssl-devel pcre-devel gd gd-devel kernel keyutils patch perl perl-devel 
    kernel keyutils kernel-headers compat* mpfr cpp glibc libgomp libstdc++-devel ppl 
    cloog-ppl keyutils-libs-devel libcom_err-devel libsepol-devel libselinux-devel 
    krb5-devel zlib-devel libXpm* freetype libjpeg* libpng* php-common php-gd ncurses* libtool* libxml2 libxml2-devel patch -y
    ./configure --prefix=/usr/local/nagios --with-mysql=/usr/local/mysql/
    make
    make install

      file /usr/lib64/libxcb-reply.so.1.0.0 from install of compat-xcb-util-0.4.0-2.2.el6.x86_64 conflicts with file from package xcb-util-0.3.6-5.el6.x86_64

    有冲突卸载掉这个有冲突的

    yum remove -y xcb-util-0.3.6-5.el6.x86_64

    3.nrpe安装

    [root@manager src]# rz
    rz waiting to receive.
    Starting zmodem transfer.  Press Ctrl+C to cancel.
    Transferring nrpe-2.12.tar.gz...
      100%     396 KB     396 KB/sec    00:00:01       0 Errors 
    tar xf nrpe-2.12.tar.gz 
    cd nrpe-2.12 ./configure make ./configure make all make install-plugin make install-daemon make install-daemon-config cp src/check_nrpe /usr/local/nagios/libexec/ /usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d echo '/usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d' >> /etc/rc.local
    要重启nrpe进行就先杀掉进行,然后重启
    kill `ps aux |grep nrpe |grep -v grep |awk '{print $2}'`
    /usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d
    本机测试下:
    /usr/local/nagios/libexec/check_nrpe -H localhost -c check_users

    加入系统服务

    加入系统服务并设为开机自动
    chkconfig --add nagios
    chkconfig nagios on
    chown nagios.nagios /usr/local/nagios/var/rw
    # 测试配置文件是否正确
    /usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg

    添加别名命令,方便测试配置文件

    vi ~/.bashrc
    在里面用alias 来自定义一个命令来代替,这里我用check
    alias check='/usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg'
    source ~/.bashrc
    此时可以用check命令来检测配置文件了

    安装 nginx  的FCGI模块

      cd /usr/local/src/
      rz
      tar xf FCGI-0.74.tar.gz 
      cd FCGI-0.74
      perl Makefile.PL 
      make && make install

    安装IO  和IO-ALL模块

    [root@manager src]# tar xf IO-1.25.tar.gz 
    [root@manager src]# cd IO-1.25
    [root@manager IO-1.25]# perl Makefile.PL
    [root@manager IO-1.25]# make && make install

    [root@manager src]# tar xf IO-All-0.81.tar.gz
    [root@manager src]# cd IO-All-0.81
    [root@manager IO-All-0.81]# perl Makefile.PL

    [root@manager IO-All-0.81]# make && make install

    上传fastcgi 的启动脚本。

    [root@manager src]# rz
    rz waiting to receive.
    Starting zmodem transfer. Press Ctrl+C to cancel.
    Transferring perl-fcgi.zip...
    100% 2 KB 2 KB/sec 00:00:01 0 Errors

    [root@manager src]# unzip perl-fcgi.zip
    Archive: perl-fcgi.zip
    inflating: perl-fcgi.pl
    [root@manager src]# cp perl-fcgi.pl /usr/local/nginx/

    [root@manager IO-All-0.81]# chmod 755 /usr/local/nginx/perl-fcgi.pl

    [root@manager nginx]# ./start_perl_cgi.sh start
    start perl-fcgi done

    [root@manager ~]# cat /usr/local/nginx/start_perl_cgi.sh
    #!/bin/bash
    #set -x
    dir=/usr/local/nginx/
    stop ()
    {
    #pkill  -f  $dir/perl-fcgi.pl
    kill $(cat $dir/logs/perl-fcgi.pid)
    rm $dir/logs/perl-fcgi.pid 2>/dev/null
    rm $dir/logs/perl-fcgi.sock 2>/dev/null
    echo "stop perl-fcgi done"
    }
    start ()
    {
    rm $dir/now_start_perl_fcgi.sh 2>/dev/null
    chown nobody.root $dir/logs
    echo "$dir/perl-fcgi.pl -l $dir/logs/perl-fcgi.log -pid $dir/logs/perl-fcgi.pid -S $dir/logs/perl-fcgi.sock" >>$dir/now_start_perl_fcgi.sh
    chown nobody.nobody $dir/now_start_perl_fcgi.sh
    chmod u+x $dir/now_start_perl_fcgi.sh
    sudo -u nobody $dir/now_start_perl_fcgi.sh
    echo "start perl-fcgi done"
    }
    case $1 in
    stop)
    stop
    ;;
    start)
    start
    ;;
    restart)
    stop
    start
    ;;
    esac

    启动fastcgi

    [root@manager nginx]# ./start_perl_cgi.sh start
    start perl-fcgi done

    # 重复启动会报这样的错 使用行参restart 或者stop 以后再start 就不会报这样的错。

    
    

    [root@manager nginx]# ./start_perl_cgi.sh start

    
    

    ERROR PID file /usr/local/nginx/logs/perl-fcgi.pid already exists

    
    

    start perl-fcgi done

     

    start_perl_cgi.sh文件中的nobody全部用nagios替换,nginx 目录上的用户

    [root@manager ~]# sed -i 's#nobody#nagios#g' /usr/local/nginx/start_perl_cgi.sh 
    [root@manager ~]# ps -ef |grep nagios
    nagios     1695      1  0 05:52 ?        00:00:00 /usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d
    nagios     1707      1  0 05:52 ?        00:00:00 /usr/local/nagios/bin/nagios -d /usr/local/nagios/etc/nagios.cfg
    # 取消用户认证(方便调试)
    vi /usr/local/nagios/etc/cgi.cfg
    找到use_authentication=1并把值改为0
    #修改联系人邮箱,修改为用于报警接收的邮件地址
    vi /usr/local/nagios/etc/objects/contacts.cfg

    email 13311802282@163.com (默认是nagios@localhost换成自己的邮箱)

    产生这样一个socket文件表示启动成功了

    [root@manager nginx]#  /etc/init.d/nagios start              
    Starting nagios: done.

    二,nagios的被监控端安装部署

    1.安装nagios插件

    [root@master ~]# groupadd nagios
    [root@master ~]# useradd nagios -M -s /sbin/nologin -g nagios
    [root@master ~]# rz
    rz waiting to receive.
    Starting zmodem transfer.  Press Ctrl+C to cancel.
    Transferring nagios-plugins-1.4.13.tar.gz...
      100%    2226 KB    2226 KB/sec    00:00:01       0 Errors  
    [root@master nagios-plugins-1.4.13]# ./configure --prefix=/usr/local/nagios --with-nagios-user=nagios --with-nagios-group=nagios --with-mysql=/usr/local/mysql && make && make install

    2.安装nrpe

    [root@master nagios-plugins-1.4.13]# cd
    [root@master ~]# rz
    rz waiting to receive.
    Starting zmodem transfer.  Press Ctrl+C to cancel.
    Transferring nrpe-2.12.tar.gz...
      100%     396 KB     396 KB/sec    00:00:01       0 Errors  
    [root@master nrpe-2.12]# ./configure && make all && make install-plugin && make install-daemon && make install-daemon-config

    [root@master nrpe-2.12]# echo $? (0未报错)
    0

    # 启动nrpe 

    [root@master nrpe-2.12]# /usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d
    [root@master nrpe-2.12]# ps -ef |grep nrp
    nagios 23592 1 0 09:28 ? 00:00:00 /usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d

    # 设置开机启动进程

    [root@master nrpe-2.12]# echo '/usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d' >> /etc/rc.local

    监控服务端本机:自己监控自己不需要配置nrpe,服务端的nrpe只用于获取客户端的nrpe传送过来的数据,在这里因为中文版的nagios已经默认有些配置,等会儿修改下直接用了

    监控客户端:监控的服务有:mysqlnginxmemoryip连接数、僵死的进程、磁盘空间、磁盘IO、登录用户数、进程总数、cpu负载、PINGSSH

    有两个红色的不知道啥吊情况。

     3.操作数据库mysql

    mysql> create database nagios;
    Query OK, 1 row affected (0.07 sec)
    
    mysql> grant select on nagios.* to nagios@'%' identified by '123';
    Query OK, 0 rows affected (0.05 sec)
    
    mysql> flush privileges;
    Query OK, 0 rows affected (0.00 sec)
    
    mysql> select user,password,host from mysql.user;
    +--------+-------------------------------------------+-----------+
    | user   | password                                  | host      |
    +--------+-------------------------------------------+-----------+
    | root   | *23AE809DDACAF96AF0FD78ED04B6A265E05AA257 | localhost |
    | root   | *23AE809DDACAF96AF0FD78ED04B6A265E05AA257 | master    |
    | root   | *23AE809DDACAF96AF0FD78ED04B6A265E05AA257 | 127.0.0.1 |
    | root   | *23AE809DDACAF96AF0FD78ED04B6A265E05AA257 | ::1       |
    |        |                                           | localhost |
    |        |                                           | master    |
    | repl   | *23AE809DDACAF96AF0FD78ED04B6A265E05AA257 | 10.0.0.%  |
    | root   | *23AE809DDACAF96AF0FD78ED04B6A265E05AA257 | 10.0.0.%  |
    | nagios | *23AE809DDACAF96AF0FD78ED04B6A265E05AA257 | %         |
    +--------+-------------------------------------------+-----------+
    9 rows in set (0.03 sec)
    # 添加mysql库到系统搜索库
    vim /etc/ld.so.conf
    /usr/local/mysql/lib
    ldconfig
    # 要监控磁盘io,还得安装sysstat这个工具包
    yum install sysstat -y
    # 配置客户端上面的nrpe
    vim /usr/local/nagios/etc/nrpe.cfg

    allowed_hosts=127.0.0.1,10.0.0.20; #允许server主机获取信息
    command[check_users]=/usr/local/nagios/libexec/check_users -w 5 -c 10
    command[check_load]=/usr/local/nagios/libexec/check_load -w 15,10,5 -c 30,25,20
    #command[check_hda1]=/usr/local/nagios/libexec/check_disk -w 20% -c 10% -p /dev/hda1 (由于我的磁盘时scsii的硬盘所以不是hd是sd)
    command[check_zombie_procs]=/usr/local/nagios/libexec/check_procs -w 5 -c 10 -s Z
    command[check_total_procs]=/usr/local/nagios/libexec/check_procs -w 150 -c 200 
    
    #----------------------------------下面是后加的----------------------------------------------------
    command[check_sda1]=/usr/local/nagios/libexec/check_disk -w 20% -c 10% -p /dev/sda1
    command[check_sda2]=/usr/local/nagios/libexec/check_disk -w 20% -c 10% -p /dev/sda2
    command[check_swap]=/usr/local/nagios/libexec/check_swap -w 20% -c 10%
    command[check_iostat]=/usr/local/nagios/libexec/check_iostat.sh -d sda -w 6 -c 10
    command[check_mysql]=/usr/local/nagios/libexec/check_mysql -H 192.168.0.22 -u nagios -p 123456 -d nagios
    command[check_nginx]=/usr/local/nagios/libexec/check_nginx.sh -u 192.168.0.22 -p /status -w 4000 -c 5000
    command[check_mem]=/usr/local/nagios/libexec/check_memory.pl -f -w 20 -c 10
    command[check_ip_conn]=/usr/local/nagios/libexec/ip_conn.sh 200 250
    command[check_ssh]=/usr/local/nagios/libexec/check_tcp -p 22 -w 1.0 -c 10.0

    配置完重新启动进程

    [root@master ~]# kill `ps aux |grep nrpe |grep -v grep |awk '{print $2}'`
    [root@master ~]# /usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d
    [root@master ~]# ps -ef |grep nrpe
    nagios     1598      1  0 10:04 ?        00:00:00 /usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d
    root       1600   1491  0 10:04 pts/0    00:00:00 grep nrpe

    服务端配置:

    [root@manager ~]# cp /usr/local/nagios/etc/objects/localhost.cfg /usr/local/nagios/etc/objects/localhost.cfg.bak
    [root@manager ~]# > /usr/local/nagios/etc/objects/localhost.cfg
    [root@manager ~]# vim /usr/local/nagios/etc/objects/localhost.cfg
    #这是自己监控自己的配置文件
    define host{ use linux
    -server host_name localhost alias localhost address 127.0.0.1 icon_image server.gif statusmap_image server.gd2 2d_coords 500,200 3d_coords 500,200,100 } define hostgroup{ hostgroup_name linux-servers ; The name of the hostgroup alias Linux Servers ; Long name of the group members * ; Comma separated list of hosts that belong to this group } define servicegroup{ servicegroup_name 全部联通性检查 alias 联通性检查 members localhost,PING,nagios-client,PING } define service{ use local-service ; Name of service template to use host_name * service_description PING check_command check_ping!100.0,20%!500.0,60% } define service{ use local-service ; Name of service template to use host_name localhost service_description 根分区 check_command check_local_disk!20%!10%!/ } define service{ use local-service ; Name of service template to use host_name localhost service_description 登录用户数 check_command check_local_users!20!50 } define service{ use local-service ; Name of service template to use host_name localhost service_description 进程总数 check_command check_local_procs!250!400!RSZDT } define service{ use local-service ; Name of service template to use host_name localhost service_description 系统负荷 check_command check_local_load!5.0,4.0,3.0!10.0,6.0,4.0 } define service{ use local-service ; Name of service template to use host_name localhost service_description 交换空间利用率 check_command check_local_swap!20!10 } define service{ use local-service ; Name of service template to use host_name localhost service_description SSH check_command check_tcp!22!1.0!10.0 notifications_enabled 0 }
    服务器端监控被监控主机的配置文件:
     
    cp /usr/local/nagios/etc/objects/localhost.cfg /usr/local/nagios/etc/objects/nagios-client.cfg
    vim /usr/local/nagios/etc/objects/nagios-client.cfg   修改完成后的配置如下
    define host{
            use                     linux-server
            host_name               nagios-client
            alias                   nagios-client
            address                 10.0.0.50
            icon_image              server.gif
            statusmap_image         server.gd2
            2d_coords       500,200
            3d_coords       500,200,100
            }
    define service{
            use                             local-service         ; Name of service template to use
            host_name                       *
            service_description             PING
            check_command           check_ping!100.0,20%!500.0,60%
            }
    define service{
            use                             local-service         ; Name of service template to use
            host_name                       nagios-client
            service_description             boot分区
            check_command           check_nrpe!check_sda1
            }
    define service{
            use                             local-service         ; Name of service template to use
            host_name                       nagios-client
            service_description             根分区
            check_command           check_nrpe!check_sda2
            }
    define service{
            use                             local-service         ; Name of service template to use
            host_name                       nagios-client
            service_description             登录用户数
            check_command           check_nrpe!check_users
            }
    define service{
            use                             local-service         ; Name of service template to use
            host_name                       nagios-client
            service_description             进总程数
            check_command           check_nrpe!check_total_procs
            }
    define service{
            use                             local-service         ; Name of service template to use
            host_name                       nagios-client
            service_description             CPU平均负载
            check_command           check_nrpe!check_load
            }
    define service{
            use                             local-service         ; Name of service template to use
            host_name                       nagios-client
            service_description             虚拟内存
            check_command           check_nrpe!check_swap
            }
    define service{
            use                             local-service         ; Name of service template to use
            host_name                       nagios-client
            service_description             SSH
            check_command           check_nrpe!check_ssh
            notifications_enabled       0
            }
    define service{
            use                             local-service         ; Name of service template to use
            host_name                       nagios-client
            service_description             僵死进程数
            check_command           check_nrpe!check_zombie_procs
            }
    define service{
            use                             local-service         ; Name of service template to use
            host_name                       nagios-client
            service_description             iostat
            check_command           check_nrpe!check_iostat
            }
    define service{
            use                             local-service         ; Name of service template to use
            host_name                       nagios-client
            service_description             mysql
            check_command           check_nrpe!check_mysql
            }
    define service{
            use                             local-service         ; Name of service template to use
            host_name                       nagios-client
            service_description             nginx
            check_command           check_nrpe!check_nginx
            }
    define service{
            use                             local-service         ; Name of service template to use
            host_name                       nagios-client
            service_description             memory
            check_command           check_nrpe!check_mem
            }
    define service{
            use                             local-service         ; Name of service template to use
            host_name                       nagios-client
            service_description             IP连接数
            check_command           check_nrpe!check_ip_conn
            }

    出现问题会自动发送邮件到你设置的邮箱

    这个部署的不太成功!网页没有显示。。。。明天重新再来一遍,再好好理解理解,这些安装包都特么好难找,这下我应该算是收集全了,在部署应该很快。

  • 相关阅读:
    一点小小的心得
    JavaScript的跳转脚本举例
    如何在Windows Server 2008 R2上开启Windows Power Shell ISE
    如何设置密码输错N次后自动锁住账户?
    IE的local intranet zone里有个URL删不掉, 怎么办?
    理解cookie机制
    ajaxpro.2.dll 简单应用
    cookie概述
    PetShop是如何兼容数据库的.NET教程,数据库应用
    .Net PetShop 4.0的分布式数据库设计.NET教程,.NET Framework
  • 原文地址:https://www.cnblogs.com/benjamin77/p/8552852.html
Copyright © 2011-2022 走看看