zoukankan      html  css  js  c++  java
  • nagios --centos7.3

    nagios --centos7.3
    准备一台虚拟机开始安装nagios
    安装前准备:
    1,主机名
    2,关闭firewalld,selinux
    3,关闭NetworkManager,并配置静态ip
    4,配置本地yum,epel源,163源
    5,时间同步
    安装步骤:
    1,搭建rpm版lamp(源码版lamp也可以,但nginx不行,因为后面nagios的web子配置文件里的语法都是apache的语法)
    # yum install httpd httpd-devel gd gd-devel php
    2,安装nagios
    yum install nagios*
    安装完后确认用户
    # id nagios
    uid=988(nagios) gid=983(nagios) groups=983(nagios)
    # id apache
    uid=48(apache) gid=48(apache) groups=48(apache),983(nagios)
    主配置文件路径:
    /etc/nagios/nagios.cfg
    子配置文件路径:
    # ls /etc/nagios/objects/
    commands.cfg localhost.cfg switch.cfg timeperiods.cfg
    contacts.cfg printer.cfg templates.cfg windows.cfg
    plugins(监控命令)路径,目录下有很多check开头的命令
    # ls /usr/lib64/nagios/plugins/
    3,设置http访问nagios的验证用户和密码
    # htpasswd /etc/nagios/passwd nagiosadmin
    # nagios -v /etc/nagios/nagios.cfg ----检查配置文件正确性
    # systemctl restart httpd
    # systemctl restart nagios
    # systemctl status httpd
    # systemctl status nagios
    # systemctl enable httpd
    # systemctl enable nagios
    使用fire访问:
    访问路径http://IP/nagios
    -------------------------------------------------------------------------------------------------------------------------------------
    现在查看web界面,默认只监控了localhost,并监控了其8个服务
    一些小操作:
    1,如果http服务为黄色,是警告,则需要把网站家目录里加一个主页进去(家目录为空,他就会警告)。
    但需要等它下一次check才会OK。如果要手动check,可以点http,再右边点Re-schedule the next check of this service去强制check,就OK了
    2,默认http和ssh是关闭通知的,是因为在localhost.cfg里这两个服务有一句 notifications_enabled 0。
    也可以手动打开,点进去,再右边点enabled notifications for this service.
    3,关闭ssh服务,刷新web界面,还是没有critical.
    点击ssh,可以看到下一次计划的check时间。如果不等的话,在右边点Re-schedule the next check of this service强制check,再刷新就critical
    -------------------------------------------------------------------------------------------------------------------------------------
    关于nagios配置文件之间的联系讲解示例
    # vim /etc/nagios/nagios.cfg
    cfg_file=/etc/nagios/objects/localhost.cfg
    # vim /etc/nagios/objects/localhost.cfg
    define host{
    use linux-server ----模版
    host_name localhost ----主机名
    alias localhost ----主机别名
    address 127.0.0.1 ----被监控机器的IP
    }
    define hostgroup{
    hostgroup_name linux-servers
    alias Linux Servers
    members localhost ----linux Servers组现在只有localhost这一个成员
    }
    --下面是8个默认定义的服务,我以监控磁盘利用率的这一段为例
    define service{
    use local-service ----模版,在templates.cfg 里定义的
    host_name localhost ----主机名,调用的是同配置文件里define host里定义的host_name
    service_description Root Partition ----描述,会在web界面显示的一个标题
    check_command check_local_disk!20%!10%!/ ----检测利用率的命令,free空间小于20%就报警,小于10就critcal警告
    }
    # vim /etc/nagios/objects/templates.cfg
    define host{
    name linux-server
    use generic-host ----linux主机模版也使用了一个叫generic-host的模版,也在templates.cfg里
    check_period 24x7 ----在timeperiods.cfg 里定义的时间段
    check_interval 5
    retry_interval 1
    max_check_attempts 10
    check_command check-host-alive ----在commands.cfg 里定义的命令
    notification_period workhours -----通知时间在timeperiods.cfg里定义的
    notification_interval 120 ----通知间隔
    notification_options d,u,r ----通知选项
    contact_groups admins ----通知组,在contacts.cfg 里定义
    register 0 -----不注册,表示这只是一个模版,被调用,不会被nagios进程认为就是一台主机
    }
    # vim /etc/nagios/objects/commands.cfg
    define command{
    command_name check-host-alive
    command_line $USER1$/check_ping -H $HOSTADDRESS$ -w 3000.0,80% -c 5000.0,100% -p 5
    }
    --可用的命令都在libexec下,用--help去查
    # /usr/lib64/nagios/plugins/check_ping --help
    -----------------------------------------------------------------------------------------------------------------------------------------------
    问题:
    如何监控本地的/boot分区 使用80%警告,使用90% critical
    define service{
    use local-service
    host_name localhost
    service_description Boot Partition
    check_command check_local_disk!20%!10%!/boot
    }
    问题:
    如何监控本机zombie进程 5个警告 10个 critical
    define service{
    use local-service
    host_name localhost
    service_description Zombie Total Processes
    check_command check_local_procs!5!10!Z
    }
    例:如何增加监控本机的ftp服务
    思路步骤:
    1,看/usr/lib64/nagios/plugins/下是否有检测ftp的命令,如果没有,自己开发
    2,在localhost.cfg里定义这个服务
    3,在command.cfg里定义命令
    # vim /etc/nagios/objects/localhost.cfg --加上下面一段
    define service{
    use local-service
    host_name localhost
    service_description FTP
    check_command check_ftp!1!3
    }
    # vim /etc/nagios/objects/commands.cfg ----下面一段默认就有,不需要加,直接改一下
    define command{
    command_name check_ftp
    command_line $USER1$/check_ftp -H $HOSTADDRESS$ -w $ARG1$ -c $ARG2$
    }
    # /etc/init.d/nagios restart
    练习:
    1,如果本机ftp服务为监听2121端口,应该如何监控
    # vim /etc/vsftpd/vsftpd.conf
    listen_port=2121 --加上这一句
    # /etc/init.d/vsftpd restart
    # netstat -ntlup |grep ftp
    # vim /etc/nagios/objects/localhost.cfg
    ----加下面一段
    define service{
    use local-service
    host_name localhost
    service_description FTP
    check_command check_ftp_2121!1!3!2121
    --命令我这里是没有的,在command.cfg里默认有一个check_ftp,没有
    --check_ftp_2121这个,所以要手动去加;!为参数分隔符,1是第一个参数,3是第二个参数,2121是第三个参数;它们对应于我下面定义的-w -c -p
    }
    # vim /etc/nagios/objects/commands.cfg
    define command{
    command_name check_ftp_2121
    command_line $USER1$/check_ftp -H $HOSTADDRESS$ -w $ARG1$ -c $ARG2$ -p $ARG3$
    }
    --直接使用监控命令去手工check一下,OK的
    # /usr/lib64/nagios/plugins/check_ftp -w 1 -c 3 -p 2121
    FTP OK - 0.004 second response time on port 2121 [220-#############################
    220-#]|time=0.00389s;1.000000;3.000000;0.000000;10.000000
    # systemctl restart nagios
    如果本机http服务监听端口为8000,应该如何监控
    # vim /etc/nagios/objects/localhost.cfg
    define service{
    use local-service
    host_name localhost
    service_description HTTP
    check_command check_http_port!8000
    # vim /etc/nagios/objects/commands.cfg
    define command{
    command_name check_http_port
    command_line $USER1$/check_http -I $HOSTADDRESS$ -p $ARG1$
    }
    2,监控本机的mysql
    # vim /etc/nagios/objects/localhost.cfg
    define service{
    use local-service
    host_name localhost
    service_description MYSQL
    check_command check_mysql!root!123
    }
    # vim /etc/nagios/objects/commands.cfg
    define command{
    command_name check_mysql
    command_line $USER1$/check_mysql -H $HOSTADDRESS$ -u $ARG1$ -p $ARG2$ ----第一个参数对应上面的root,第二个对应密码123
    }
    --手动check一下mysql,OK
    # /usr/lib64/nagios/plugins/check_mysql -u root -p123
    Uptime: 189 Threads: 1 Questions: 5 Slow queries: 0 Opens: 12 Flush tables: 1 Open tables: 6 Queries per second avg: 0.026
    # systemctl restart nagios
    =======================================================================
    nagios server ----》 nagios client
    172.16.2.10 172.16.2.11
    我们把监控的服务分为公共和私有
    公共:如ssh,http,ftp,mysql等。监控本地或远程的公共服务,都可以直接配置
    私有:如load,users,disk usage等。监控本地私有服务直接配置就好,监控远程私有服务,需要服务和被监控端安装nrpe
    例:监控远程服务器的普通服务(公共服务)。如ssh,http,ftp,mysql等
    如:我的被监控端IP为172.16.2.11
    1.在nagios服务器的主配置文件里加上172.16.2.11的主机配置文件
    # vim /etc/nagios/nagios.cfg
    cfg_file=/etc/nagios/objects/172.16.2.11.cfg
    2,创建这个172.16.2.11.cfg
    # vim /etc/nagios/objects/172.16.2.11.cfg
    define host{
    use linux-server
    host_name 172.16.2.11 ----主机名,最好/etc/hosts里对应好IP,我这里没有做,就直接写IP
    alias 172.16.2.11 ----显示到web上的名字
    address 172.16.2.11 ----实际被监控主机IP
    }
    define hostgroup{
    hostgroup_name remote linux-servers ----这里我定义了一个新组,不能和localhost.cfg里的组同名,会冲突
    alias remote Linux Servers
    members 172.16.2.11
    }
    ----下面是公共服务,这里我只写了四个,你可以自行增加
    define service{
    use local-service
    host_name 172.16.2.11
    service_description PING
    check_command check_ping!100.0,20%!500.0,60%
    }
    define service{
    use local-service
    host_name 172.16.2.11
    service_description SSH
    check_command check_ssh
    }
    define service{
    use local-service
    host_name 172.16.2.11
    service_description HTTP
    check_command check_http
    }
    define service{
    use local-service
    host_name 172.16.2.11
    service_description FTP
    check_command check_ftp!1!3
    }
    # nagios -v /etc/nagios/nagios.cfg
    # systemctl restart nagios
    ------------------------------------------------------------------------------------------------------------------------------------
    例:监控远程的私有服务
    172.16.2.10 172.16.2.11
    nagios监控端 被监控linux
    check_disk
    check_nrpe --------- check_nrpe check_swap
    SSL传输 check_load等
    第一大步:nagios监控端上的操作
    1,确认有如下的命令,如果没有,则yum install nagios-plugins-nrpe
    /usr/lib64/nagios/plugins/check_nrpe
    2,增加check_nrpe命令到commands.conf文件里
    # vim /etc/nagios/objects/commands.cfg
    define command{
    command_name check_nrpe
    command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$
    }
    --c参数后接command, 也就说check_nrpe可以调用别的check命令
    3,在nagios服务器上对172.16.2.11的配置文件增加远程私有服务
    # vim /etc/nagios/objects/172.16.2.11.cfg
    define service{
    use local-service
    host_name 172.16.2.11
    service_description Current Users
    check_command check_nrpe!check_remote_users
    }
    --check_remote_users就是check_nrpe的C参数要调用的命令,此命令在nagios服务器上的commands.cfg里是不存在,它会在后面的步骤中加到被监控端
    # systemctl restart nagios
    4,用下面的命令做测试,但现在是会报对方5666端口拒绝(因为被监控端还没有安装配置)
    # /usr/lib64/nagios/plugins/check_nrpe -H 172.16.2.11 -c check_remote_users
    connect to address 172.16.2.11 port 5666: Connection refused
    connect to host 172.16.2.11 port 5666: Connection refused
    第二大步:nagios被监控端上的操作
    1,安装nrpe和其它监控命令包
    # yum install nrpe nagios-plugins*
    2,修改nrpe主配置文件
    # vim /etc/nagios/nrpe.cfg
    allowed_hosts=172.16.2.10
    command[check_remote_users]=/usr/lib64/nagios/plugins/check_users -w 5 -c 10
    3,启动服务,并检查5666端口是否开启
    # systemctl restart nrpe
    # lsof -i:5666
    第三大步:回到nagios服务器端测试
    再次使用下面的命令,就可以监控到远程的实际登录用户数了
    # /usr/lib64/nagios/plugins/check_nrpe -H 172.16.2.11 -c check_remote_users
    USERS WARNING - 9 users currently logged in |users=9;5;10;0
    最后,清firefox缓存,在firefox查看远程监控也正确了
    ----------------------------------------------------------------------------------------------------------------------------------
    邮件报警验证:
    1,确认你至少有一个service为crital状态
    2,yum install postfix
    # systemctl status postfix
    # systemctl status postfix
    # systemctl status postfix
    3,保证nagios服务器能上公网,还有确认有mail命令了
    4,# vim /etc/nagios/objects/contacts.cfg
    email litengllll@126.com --改成你的一个公网测试邮箱
    5,/etc/init.d/nagios restart
    如果你想做成免费手机短信通知,可以使用类似139邮箱这种(有邮件到达就短信通知的功能)的邮箱
    现在有智能手机就方便多了,直接报警邮件发给外部一个邮箱,然后在你的手机上下载对应邮箱的app软件就ok了
    现在nagios官方直接都有手机客户端管理软件
    ======================================================================================
    nagios图表
    nagiosgraph-1.4.4.tar.gz
    软件包路径:
    笔记目录/program/nagios_soft/nagiosgraph-1.4.4.tar.gz
    # tar xf nagiosgraph-1.4.4.tar.gz -C /usr/src
    # cd /usr/src/nagiosgraph-1.4.4
    # ./install.pl --check-prereq
    checking required PERL modules
    Carp...1.26
    CGI...3.63 --如果fail,则yum install perl-CGI
    Data::Dumper...2.145
    File::Basename...2.84
    File::Find...1.20
    MIME::Base64...3.13
    POSIX...1.30
    RRDs...1.4008 --如果fail,则yum install rrdtool-perl rrdtool
    Time::HiRes...1.9725
    checking optional PERL modules
    GD...2.49 --如果fail,则yum install perl-GD
    checking nagios installation
    found nagios at /sbin/nagios
    checking web server installation
    found apache at /sbin/httpd
    上面的检测全面ok后,则使用下面的命令安装,一直回车就可以了
    1,
    # ./install.pl --install
    checking required PERL modules
    Carp...1.11
    CGI...3.51
    Data::Dumper...2.124
    File::Basename...2.77
    File::Find...1.14
    MIME::Base64...3.08
    POSIX...1.17
    RRDs...1.4008
    Time::HiRes...1.9721
    checking optional PERL modules
    GD...2.53
    checking nagios installation
    found nagios at /usr/local/nagios/bin/nagios
    checking web server installation
    found apache at /usr/sbin/httpd
    Destination directory (prefix)? [/usr/local/nagiosgraph]
    Location of configuration files (etc-dir)? [/usr/local/nagiosgraph/etc]
    Location of executables? [/usr/local/nagiosgraph/bin]
    Location of CGI scripts? [/usr/local/nagiosgraph/cgi]
    Location of documentation (doc-dir)? [/usr/local/nagiosgraph/doc]
    Location of examples? [/usr/local/nagiosgraph/examples]
    Location of CSS and JavaScript files? [/usr/local/nagiosgraph/share]
    Location of utilities? [/usr/local/nagiosgraph/util]
    Location of state files (var-dir)? [/usr/local/nagiosgraph/var]
    Location of RRD files? [/usr/local/nagiosgraph/var/rrd]
    Location of log files (log-dir)? [/usr/local/nagiosgraph/var]
    Path of log file? [/usr/local/nagiosgraph/var/nagiosgraph.log]
    Path of CGI log file? [/usr/local/nagiosgraph/var/nagiosgraph-cgi.log]
    URL of CGI scripts? [/nagiosgraph/cgi-bin]
    URL of CSS file? [/nagiosgraph/nagiosgraph.css]
    URL of JavaScript file? [/nagiosgraph/nagiosgraph.js]
    Path of Nagios performance data file? [/tmp/perfdata.log]
    URL of Nagios CGI scripts? [/nagios/cgi-bin]
    username or userid of Nagios user? [nagios]
    username or userid of web server user? [apache]
    Modify the Nagios configuration? [n]
    Modify the Apache configuration? [n]
    configuration:
    ng_layout standalone
    ng_prefix /usr/local/nagiosgraph
    ng_etc_dir /usr/local/nagiosgraph/etc
    ng_bin_dir /usr/local/nagiosgraph/bin
    ng_cgi_dir /usr/local/nagiosgraph/cgi
    ng_doc_dir /usr/local/nagiosgraph/doc
    ng_examples_dir /usr/local/nagiosgraph/examples
    ng_www_dir /usr/local/nagiosgraph/share
    ng_util_dir /usr/local/nagiosgraph/util
    ng_var_dir /usr/local/nagiosgraph/var
    ng_rrd_dir /usr/local/nagiosgraph/var/rrd
    ng_log_dir /usr/local/nagiosgraph/var
    ng_log_file /usr/local/nagiosgraph/var/nagiosgraph.log
    ng_cgilog_file /usr/local/nagiosgraph/var/nagiosgraph-cgi.log
    ng_url /nagiosgraph
    ng_cgi_url /nagiosgraph/cgi-bin
    ng_css_url /nagiosgraph/nagiosgraph.css
    ng_js_url /nagiosgraph/nagiosgraph.js
    nagios_cgi_url /nagios/cgi-bin
    nagios_perfdata_file /tmp/perfdata.log
    nagios_user nagios
    www_user apache
    modify_nagios_config n
    nagios_config_file
    nagios_commands_file
    modify_apache_config n
    apache_config_dir
    apache_config_file
    Continue with this configuration? [y]
    .............
    2,vim /etc/nagios/nagios.cfg --最后加上下面一段
    process_performance_data=1
    service_perfdata_file=/tmp/perfdata.log
    service_perfdata_file_template=$LASTSERVICECHECK$||$HOSTNAME$||$SERVICEDESC$||$SERVICEOUTPUT$||$SERVICEPERFDATA$
    service_perfdata_file_mode=a
    service_perfdata_file_processing_interval=30
    service_perfdata_file_processing_command=process-service-perfdata-for-nagiosgraph
    3,# vim /etc/nagios/objects/commands.cfg
    define command {
    command_name process-service-perfdata-for-nagiosgraph
    command_line /usr/local/nagiosgraph/bin/insert.pl
    }
    4,
    # vim /etc/httpd/conf/httpd.conf --在你的apache里include这个文件
    Include /usr/local/nagiosgraph/etc/nagiosgraph-apache.conf
    # systemctl restart httpd
    # systemctl restart nagios
  • 相关阅读:
    2.1求二进制数中1的个数
    SQL学习笔记八:SQL Server陷阱
    SQL学习笔记之三:系统表syscommnets
    SQL学习笔记之二:QUOTENAME函数
    SQL学习笔记五:脚本创建角色、用户以及相关操作
    输入表名生成插入、更新存储过程和调用该存储过程的字符串
    没有注意过的count(0),count(1),count(*),count(列名)
    清除开始文档批处理
    SQL学习笔记七:利用脚本附加数据库
    SQL学习笔记六:关于全备/差异/日志备份的恢复
  • 原文地址:https://www.cnblogs.com/skyzy/p/9201369.html
Copyright © 2011-2022 走看看