#!/bin/bash
Date=`echo $(date +%Y-%m-%d %H:%M:%S)`
HostName=`hostname`
IP=`ifconfig eth0 | grep "inet addr" | cut -f 2 -d ":" | cut -f 1 -d " "`
ServerInfo=`echo -e "--------$Date--------
服务器:$HostName IP:$IP"`
#删除5天前的监控日志
LOGPWD=/var/log/monitor
LOGDATE=`echo $(date -d -5day +%Y%m%d)`
LOGGREP=`ls $LOGPWD | grep -c "$LOGDATE"`
if [ $LOGGREP -gt 0 ]
then
rm -fr $LOGPWD/*$LOGDATE.log
fi
echo -e "$ServerInfo
uptime
`uptime`
free -m
`free -m`" > $LOGPWD/monitor.log
#监控cpu负载
PhysicalCpu=`grep 'physical id' /proc/cpuinfo | sort -u | wc -l`
CoreCpu=`grep 'core id' /proc/cpuinfo | sort -u | wc -l`
count_cores=`echo "$PhysicalCpu*$CoreCpu" | bc`
count_uptime=`uptime |wc -w`
AverageLoad=`uptime | awk '{print $'$count_uptime'}'`
AverageInt=`echo $AverageLoad | cut -f 1 -d "."`
if [ $AverageInt -gt $count_cores ]
then
echo -e "$ServerInfo
15分钟的负载为$AverageLoad,核心数$count_cores,超过cpu核心数,请处理!" >> $LOGPWD/cpu_status_$(date +%Y%m%d).log
echo -e "15分钟的负载为$AverageLoad,核心数$count_cores,超过cpu核心数,请处理!" >> $LOGPWD/monitor.log
else
echo -e "$ServerInfo
15分钟的负载为$AverageLoad,核心数$count_cores,负载正常。" >> $LOGPWD/cpu_status_$(date +%Y%m%d).log
echo -e "15分钟的负载为$AverageLoad,核心数$count_cores,负载正常。" >> $LOGPWD/monitor.log
fi
#监控内存使用率
MemTotal=`free -m | grep Mem | awk '{print $2}'`
MemFree=`free -m | grep Mem | awk '{print $4}'`
MemRate=`echo "100-$MemFree*100/$MemTotal" | bc`
if [ $MemRate -gt 80 ]
then
echo -e "$ServerInfo
内存使用率$MemRate%,大于80%,请处理。" >> $LOGPWD/mem_status_$(date +%Y%m%d).log
echo -e "内存使用率$MemRate%,大于80%,请处理。" >> $LOGPWD/monitor.log
else
echo -e "$ServerInfo
内存使用率$MemRate%,小于等于80%,内存负载正常。" >> $LOGPWD/mem_status_$(date +%Y%m%d).log
echo -e "内存使用率$MemRate%,小于等于80%,内存负载正常。" >> $LOGPWD/monitor.log
fi
if [ $AverageInt -gt $count_cores ]||[ $MemRate -gt 80 ]
then
cat $LOGPWD/monitor.log | mail -s "$HostName服务器负载监控报警"xxx@xxx.xxx
fi