1.每10分钟执行一次
*/10 * * * * /opt/shell/check_disk.sh
2.脚本内容
for i in `cat /opt/shell/hosts` do # echo $i sys=`ssh $i "df -h |grep '/$'"` #echo $sys sysc=`echo ${sys}|awk -F ' |%' '{print $5}'` #echo $sysc if [[ $sysc -gt 85 ]];then curl 'https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=348wo935-shia-4939-a311-dc221d4sb6c4' -H 'Content-Type: application/json' -d ' { "msgtype": "markdown", "markdown": { "content": "<font color="warning">'$i'</font>的系统磁盘使用率大于85,当前使用率为'${sysc}'%,请及时处理!!" } }' fi data=`ssh $i "df -h |egrep '/mnt|/data'"` #echo $data datac=`echo ${data}|awk -F ' |%' '{print $5}'` if [[ $datac -gt 85 ]];then curl 'https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=3482ni35-9a1a-4shi9-a311-dc2sbd4766c4' -H 'Content-Type: application/json' -d ' { "msgtype": "markdown", "markdown": { "content": "<font color="warning">'$i'</font>的数据磁盘使用率大于85,当前使用率为'${datac}'%,请及时处理!!" } }' fi done
2.http响应时间监控告警
[root@VM_0_11_centos shell]# cat checklong_http_code.sh #!/bin/bash while true do for i in `cat services_list` do http_code=`curl -I -o /dev/null -m 10 -s -w %{http_code} $i` if [[ $http_code -ne 200 && $http_code -ne 302 ]] then date echo "$i is down" curl 'https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=348wo935-9sha-4i39-a311-dc2sb21d4766c4' -H 'Content-Type: application/json' -d ' { "msgtype": "markdown", "markdown": { "content": "<font color="warning">'$i'</font>服务状态超长时间(10s)返回异常,请快速检查!!" } }' fi done sleep 30 done [root@VM_0_11_centos shell]#
3.内存cpu
[dev@VM_0_11_centos shell]$ cat /opt/shell/check_memcpu.sh function baojing(){ curl 'https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=348wo935-shia-4939-a311-dc221dsb66c4' -H 'Content-Type: application/json' -d ' { "msgtype": "markdown", "markdown": { "content": "<font color="warning">'$1'</font>" } }' } for i in `cat hosts`;do #############检查内存情况 meminfo=`ssh $i "free -m|sed -n '2p'"` mem=`echo $meminfo|awk '{print $3/$2*100}'|awk -F'.' '{print $1}'` #echo $mem if [ $mem -gt 85 ];then baojing "服务器${i}的内存使用率大于85%,当前使用率为$mem%" fi #############检查cpu情况 cpuinfo=`ssh $i "mpstat 1 1|tail -n 1"` cpu=`echo $cpuinfo|awk '{print $3+$5}'|awk -F'.' '{print $1}'` echo $cpu #echo $mem if [ $cpu -gt 85 ];then baojing "服务器${i}的cpu使用率大于85%,当前使用率为$cpu%" fi done
[dev@VM_0_11_centos shell]$ cat hosts 10.0.0.28 10.0.0.24 10.0.0.29 10.0.0.12 10.0.0.30 10.0.0.17