Nagios工作原理

Nagios主要配置文件

Nagios软件包
链接:http://pan.baidu.com/s/1skGSBIP 密码:6fwr
Nagios服务端环境及依赖
cat /etc/redhat-release CentOS release 6.6 (Final) uname -rm 2.6.32-504.el6.x86_64 x86_64 echo "export LC_ALL=C" >> /etc/profile # 恢复默认语言环境 tail -1 /etc/profile source /etc/profile echo $LC_ALL yum install gcc glibc glibc-common gd-devel mysql-server httpd php php-gd -y rpm -qa mysql httpd php useradd nagios groupadd nagcmd usermod -a -G nagcmd nagios usermod -a -G nagcmd apache groups nagios groups apache
Nagios服务端主程序安装
mkdir -p /server/tools/nagios cd /server/tools/nagios rz -y #上传软件包 tree /server/tools/nagios /etc/init.d/httpd start lsof -i :80 tar xf nagios-3.5.1.tar.gz cd nagios ./configure --with-command-group=nagcmd make all make install make install-init make install-config make install-commandmode make install-webconf cd .. htpasswd -bc /usr/local/nagios/etc/htpasswd.users peter 123456 cat /usr/local/nagios/etc/htpasswd.users /etc/init.d/httpd reload sed -i 's#nagios@localhost#asdftttt@163.com#g' /usr/local/nagios/etc/objects/contacts.cfg sed -n '35p' /usr/local/nagios/etc/objects/contacts.cfg echo -e "set from=asdftttt@163.com set smtp=smtp.163.com smtp-auth-user=asdftttt smtp-auth-password=xxxxx smtp-auto=login" >> /etc/mail.rc echo "#time sync by peter at 2017-9-14" >> /var/spool/cron/root #时间同步 echo "*/5 * * * * /usr/sbin/ntpdate time.nist.gov > /dev/null 2>&1" >> /var/spool/cron/root crontab -l /etc/init.d/httpd restart chkconfig httpd on netstat -nutlp | grep httpd #网页 172.16.1.53/nagios
Nagios插件包安装
yum install perl-devel perl-CPAN openssl-devel -y cd /server/tools/nagios tar xf nagios-plugins-1.4.16.tar.gz cd nagios-plugins-1.4.16 ./configure --with-nagios-user=nagios --with-nagios-group=nagios --enable-perl-modules --with-mysql make make install cd .. ls /usr/local/nagios/libexec/ | wc -l 59 #也可能61
Nrpe软件安装
tar xf nrpe-2.12.tar.gz cd nrpe-2.12 ./configure make all make install-plugin make install-daemon make install-daemon-config cd .. ls /usr/local/nagios/libexec/check_nrpe #服务端安装nrpe是为了获得check_nrpe插件
Nagios服务端配置和启动
chkconfig nagios on chkconfig --list nagios /etc/init.d/nagios checkconfig Running configuration check... OK. /usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg vim /etc/init.d/nagios +183 $NagiosBin -v $NagiosCfgFile /etc/init.d/nagios checkconfig /etc/init.d/nagios start Starting nagios: done. ps -ef | grep nagios nagios 47748 1 0 15:22 ? 00:00:00 /usr/local/nagios/bin/nagios -d /usr/local/nagios/etc/nagios.cfg
Nagios客户端环境准备(web01 web02)
yum install gcc glibc glibc-common mysql-server perl-devel perl-CPAN openssl-devel -y mkdir -p /server/tools/nagios cd /server/tools/nagios rz -y useradd nagios -M -s /sbin/nologin
Nagios插件包安装
tar xf nagios-plugins-1.4.16.tar.gz cd nagios-plugins-1.4.16 ./configure --with-nagios-user=nagios --with-nagios-group=nagios --enable-perl-modules --with-mysql make make install cd .. ls /usr/local/nagios/libexec/ | wc -l
Nrpe服务安装
tar xf nrpe-2.12.tar.gz cd nrpe-2.12 ./configure make all make install-plugin make install-daemon make install-daemon-config cd ..
安装check_iostat插件依赖包
for n in Params-Validate-0.91 Class-Accessor-0.31 Config-Tiny-2.12 Math-Calc-Units-1.07 Regexp-Common-2010010201 Nagios-Plugin-0.34
do
tar xf ${n}.tar.gz
cd $n
perl Makefile.PL
make
make install
cd ..
done
配置监控内存、磁盘I/O脚本插件
yum install -y sysstat dos2unix cp /server/tools/nagios/check_memory.pl /usr/local/nagios/libexec cp /server/tools/nagios/check_iostat /usr/local/nagios/libexec chmod 755 /usr/local/nagios/libexec/check_memory.pl chmod 755 /usr/local/nagios/libexec/check_iostat dos2unix /usr/local/nagios/libexec/check_memory.pl dos2unix /usr/local/nagios/libexec/check_iostat
客户端Nrpe服务配置
cd /usr/local/nagios/etc sed -i 's#allowed_hosts=127.0.0.1#allowed_hosts=127.0.0.1,172.16.1.53#g' nrpe.cfg vim nrpe.cfg +199 #删掉原来的 command[check_load]=/usr/local/nagios/libexec/check_load -w 15,10,5 -c 30,25,20 command[check_mem]=/usr/local/nagios/libexec/check_memory.pl -w 10% -c 3% command[check_disk]=/usr/local/nagios/libexec/check_disk -w 15% -c 7% -p / command[check_swap]=/usr/local/nagios/libexec/check_swap -w 20% -c 10% command[check_iostat]=/usr/local/nagios/libexec/check_iostat -w 6 -c 10 /usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d netstat -nutlp | grep nrpe tcp 0 0 0.0.0.0:5666 0.0.0.0:* LISTEN 39042/nrpe #重启nrpe pkill nrpe /usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d #加入开机启动 echo "#nagios nrpe process cmd by peter 2017-08-31" >> /etc/rc.local echo "/usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d" >> /etc/rc.local tail -2 /etc/rc.local
Nagios服务端配置文件设置
vim /usr/local/nagios/etc/nagios.cfg +34
cfg_file=/usr/local/nagios/etc/objects/hosts.cfg
cfg_file=/usr/local/nagios/etc/objects/services.cfg
cfg_dir=/usr/local/nagios/etc/objects/services #该目录下只要以.cfg结尾都会被加载
# Definitions for monitoring the local (Linux) host
#cfg_file=/usr/local/nagios/etc/objects/localhost.cfg
cd /usr/local/nagios/etc/objects/
head -51 localhost.cfg > hosts.cfg
chown nagios.nagios /usr/local/nagios/etc/objects/hosts.cfg
touch services.cfg
chown nagios.nagios services.cfg
mkdir services
chown -R nagios.nagios services
vim hosts.cfg
# Define a host for the local machine
define host{
use linux-server ; Name of host template to use
; This host definition will inherit all variables that are defined
; in (or inherited by) the linux-server host template definition.
host_name 13-web01
alias 13-web01
address 172.16.1.13
}
define host{
use linux-server ; Name of host template to use
; This host definition will inherit all variables that are defined
; in (or inherited by) the linux-server host template definition.
host_name 14-web02
alias 14-web02
address 172.16.1.14
}
# Define an optional hostgroup for Linux machines
define hostgroup{
hostgroup_name linux-servers ; The name of the hostgroup
alias Linux Servers ; Long name of the group
members 13-web01,14-web02 ; Comma separated list of hosts that belong to this group
}
vim services.cfg
define service {
use generic-service
host_name 13-web01,14-web02
service_description Disk Partition
check_command check_nrpe!check_disk
}
define service {
use generic-service
host_name 13-web01,14-web02
service_description Swap Usage
check_command check_nrpe!check_swap
}
define service {
use generic-service
host_name 13-web01,14-web02
service_description Mem Usage
check_command check_nrpe!check_mem
}
define service {
use generic-service
host_name 13-web01,14-web02
service_description Current Load
check_command check_nrpe!check_load
}
define service {
use generic-service
host_name 13-web01,14-web02
service_description Disk IOstat
check_command check_nrpe!check_iostat
}
define service {
use generic-service
host_name 13-web01,14-web02
service_description PING
check_command check_ping!100.0,20%!500.0,60%
}
vim commands.cfg
define command{
command_name check_nrpe
command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$
}
/etc/init.d/nagios checkconfig
/etc/init.d/nagios start
/etc/init.d/nagios reload
cd /usr/local/nagios/etc
sed -i 's#nagiosadmin#peter#g' cgi.cfg
grep "^authorized_for" cgi.cfg
/etc/init.d/nagios reload
#监控URL
vim objects/services.cfg
define service {
use generic-service
host_name 13-web01
service_description blog_url
check_command check_http!-H 172.16.1.13
}
define service {
use generic-service
host_name 14-web02
service_description blog_url
check_command check_http!-H 172.16.1.14
}
/etc/init.d/nagios checkconfig
/etc/init.d/nagios reload
#监控TCP端口
vim objects/services.cfg
define service {
use generic-service
host_name 13-web01,14-web02
service_description http_80
check_command check_tcp!80
}
define service {
use generic-service
host_name 13-web01,14-web02
service_description ssh_22
check_command check_tcp!22
}
/etc/init.d/nagios checkconfig
/etc/init.d/nagios reload

Nagios配置出图
yum install cairo pango zlib-devel freetype-devel gd-devel libart_lgpl-devel rrdtool-devel perl-Time-HiRes -y
rpm -qa cairo pango zlib zlib-devel freetype freetype-devel gd gd-devel libart_lgpl libart_lgpl-devel rrdtool rrdtool-devel perl-Time-HiRes
cd /server/tools/nagios/
tar xf pnp-0.4.14.tar.gz
cd pnp-0.4.14
./configure --with-rrdtool --with-perfdata-dir=/usr/local/nagios/share/perfdata/
make all
make install
make install-config
make install-init
ll /usr/local/nagios/libexec/ | grep process
vim /usr/local/nagios/etc/nagios.cfg +835
process_performance_data=1
...
host_perfdata_command=process-host-perfdata
service_perfdata_command=process-service-perfdata
vim /usr/local/nagios/etc/objects/commands.cfg +227
# 'process-host-perfdata' command definition
define command{
command_name process-host-perfdata
command_line /usr/local/nagios/libexec/process_perfdata.pl
}
# 'process-service-perfdata' command definition
define command{
command_name process-service-perfdata
command_line /usr/local/nagios/libexec/process_perfdata.pl
}
/etc/init.d/nagios checkconfig
/etc/init.d/nagios reload
#网页
172.16.1.53/nagios/pnp
vim /usr/local/nagios/etc/objects/hosts.cfg
# Define a host for the local machine
define host{
use linux-server ; Name of host template to use
; This host definition will inherit all variables that are defined
; in (or inherited by) the linux-server host template definition.
host_name 13-web01
alias 13-web01
address 172.16.1.13
process_perf_data 1
}
define host{
use linux-server ; Name of host template to use
; This host definition will inherit all variables that are defined
; in (or inherited by) the linux-server host template definition.
host_name 14-web02
alias 14-web02
address 172.16.1.14
process_perf_data 1
}
#service.cfg中每个服务下也要添加,但generic-service模板中已经定义好了
/etc/init.d/nagios checkconfig
/etc/init.d/nagios reload
#网页
172.16.1.53/nagios/pnp

整合PNP URL到Nagios Web界面
vim objects/hosts.cfg
# Define a host for the local machine
define host{
use linux-server ; Name of host template to use
; This host definition will inherit all variables that are defined
; in (or inherited by) the linux-server host template definition.
host_name 13-web01
alias 13-web01
address 172.16.1.13
process_perf_data 1
action_url /nagios/pnp/index.php?host=$HOSTNAME$
}
define host{
use linux-server ; Name of host template to use
; This host definition will inherit all variables that are defined
; in (or inherited by) the linux-server host template definition.
host_name 14-web02
alias 14-web02
address 172.16.1.14
process_perf_data 1
action_url /nagios/pnp/index.php?host=$HOSTNAME$
}
vim objects/templates.cfg
#直接配置services.cfg中使用的模板文件
define service{
name generic-service ; The 'name' of this service template
active_checks_enabled 1 ; Active service checks are enabled
passive_checks_enabled 1 ; Passive service checks are enabled/accepted
parallelize_check 1 ; Active service checks should be parallelized (disabling this can lead to major performance problems)
obsess_over_service 1 ; We should obsess over this service (if necessary)
check_freshness 0 ; Default is to NOT check service 'freshness'
notifications_enabled 1 ; Service notifications are enabled
event_handler_enabled 1 ; Service event handler is enabled
flap_detection_enabled 1 ; Flap detection is enabled
failure_prediction_enabled 1 ; Failure prediction is enabled
process_perf_data 1 ; Process performance data
retain_status_information 1 ; Retain status information across program restarts
retain_nonstatus_information 1 ; Retain non-status information across program restarts
is_volatile 0 ; The service is not volatile
check_period 24x7 ; The service can be checked at any time of the day
max_check_attempts 3 ; Re-check the service up to 3 times in order to determine its final (hard) state
normal_check_interval 10 ; Check the service every 10 minutes under normal conditions
retry_check_interval 2 ; Re-check the service every two minutes until a hard state can be determined
contact_groups admins ; Notifications get sent out to everyone in the 'admins' group
notification_options w,u,c,r ; Send notifications about warning, unknown, critical, and recovery events
notification_interval 60 ; Re-notify about service problems every hour
notification_period 24x7 ; Notifications can be sent out at any time
register 0 ; DONT REGISTER THIS DEFINITION - ITS NOT A REAL SERVICE, JUST A TEMPLATE!
action_url /nagios/pnp/index.php?host=$HOSTNAME$&srv=$SERVICEDESC$
}
/etc/init.d/nagios checkconfig
/etc/init.d/nagios reload
#网页
172.16.1.53/nagios
#服务状态数据存放目录,注意备份
tree /usr/local/nagios/share/perfdata
/usr/local/nagios/share/perfdata
|-- 13-web01
| |-- Current_Load.rrd
| |-- Current_Load.xml
| |-- Disk_IOstat.rrd
| |-- Disk_IOstat.xml
| |-- Disk_Partition.rrd
| |-- Disk_Partition.xml
| |-- Mem_Usage.rrd
| |-- Mem_Usage.xml
| |-- PING.rrd
| |-- PING.xml
| |-- Swap_Usage.rrd
| |-- Swap_Usage.xml
| |-- blog_url.rrd
| |-- blog_url.xml
| |-- http_80.rrd
| |-- http_80.xml
| |-- ssh_22.rrd
| `-- ssh_22.xml
`-- 14-web02
|-- Current_Load.rrd
|-- Current_Load.xml
|-- Disk_IOstat.rrd
|-- Disk_IOstat.xml
|-- Disk_Partition.rrd
|-- Disk_Partition.xml
|-- Mem_Usage.rrd
|-- Mem_Usage.xml
|-- PING.rrd
|-- PING.xml
|-- Swap_Usage.rrd
|-- Swap_Usage.xml
|-- blog_url.rrd
|-- blog_url.xml
|-- http_80.rrd
|-- http_80.xml
|-- ssh_22.rrd
`-- ssh_22.xml

Nagios故障报警(邮件)
vim objects/contacts.cfg
email asdftttt@163.com
vim objects/commands.cfg
#优化一下报警信息
# 'notify-host-by-email' command definition
define command{
command_name notify-host-by-email
command_line /usr/bin/printf "%b" "***** Nagios *****
Notification Type: $NOTIFICATIONTYPE$
Host: $HOSTNAME$
State: $HOSTSTATE$
Address: $HOSTADDRESS$
Info: $HOSTOUTPUT$
Date/Time: $LONGDATETIME$
" | /bin/mail -s "Host $HOSTSTATE$ alert for $HOSTNAME$!" $CONTACTEMAIL$
}
# 'notify-service-by-email' command definition
define command{
command_name notify-service-by-email
command_line /usr/bin/printf "%b" "***** Nagios *****
Notification Type: $NOTIFICATIONTYPE$
Service: $SERVICEDESC$
Host: $HOSTALIAS$
Address: $HOSTADDRESS$
State: $SERVICESTATE$
Date/Time: $LONGDATETIME$
Additional Info:
$SERVICEOUTPUT$
" | /bin/mail -s " $HOSTALIAS$/$SERVICEDESC$ is $SERVICESTATE$ " $CONTACTEMAIL$
}
Shell开发Nagios插件
#Nagios为插件提供两个接口:退出状态码和控制台打印的第一行信息
#主动模式:编写一个探测url的插件
cd /usr/local/nagios/libexec
vim check_url.sh
#!/bin/bash
PROGNAME=`basename $0`
PROGPATH=`dirname $0`
usage() {
echo "Usage: /bin/sh $PROGNAME url"
exit 1
}
[ $# -ne 1 ]&&usage
. $PROGPATH/utils.sh
if wget -T 20 --spider $1 > /dev/null 2>&1;then
echo 'url $1 OK'
exit $STATE_OK
else
echo 'url $1 NO'
exit $STATE_CRITICAL
fi
#[root@mage-monitor-01 libexec]# cat ./utils.sh
#! /bin/sh
#
#STATE_OK=0
#STATE_WARNING=1
#STATE_CRITICAL=2
#STATE_UNKNOWN=3
#STATE_DEPENDENT=4
sh /usr/local/nagios/libexec/check_url 172.16.1.14
chmod +x check_url.sh
cd /usr/local/nagios/etc/objects/
vim commands.cfg
#'check_url' command defined by peter
define command{
command_name check_url
command_line $USER1$/check_url.sh 172.16.1.14
}
vim services.cfg
define service {
use generic-service
host_name 14-web02
service_description check_url
check_command check_url
} E
/etc/init.d/nagios checkconfig
/etc/init.d/nagios reload
#被动模式:监控/etc/passwd文件是否变化
#web02
md5sum /etc/passwd > /opt/ps.md5
cat /opt/ps.md5
70fe6e84988c7298fe6c5f108e02df39 /etc/passwd
cd /usr/local/nagios/libexec/
vim check_passwd.sh
#!/bin/bash
OriMd5="70fe6e84988c7298fe6c5f108e02df39"
CurrMd5=`md5sum /etc/passwd|cut -c 1-32`
if [ "$OriMd5" == "$CurrMd5" ];then
echo "/etc/passwd:ok"
exit 0
else
echo "/etc/passwd:failed"
exit 2
fi
sh check_passwd.sh
chmod +x check_passwd.sh
cd /usr/local/nagios/etc/
vim nrpe.cfg
command[check_passwd]=/usr/local/nagios/libexec/check_passwd.sh
netstat -nutlp | grep nrpe
pkill nrpe
/usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d
netstat -nutlp | grep nrpe
#nagios服务端
vim services.cfg
define service {
use generic-service
host_name 14-web02
service_description check_passwd
check_command check_nrpe!check_passwd
}
/etc/init.d/nagios checkconfig
/etc/init.d/nagios reload
