zoukankan      html  css  js  c++  java
  • Linux看门狗脚本 1.4

    近期项目的看门狗经历了三个版本号。

    第一个版本号:

    用ps -ef,假设程序挂了就启动


    第二个版本号:

    程序因为执行时会出现不再监听7901port,所以不能简单推断机器是不是挂了,而是推断此port是否有监听


    第三个版本号:

    当7901port不再监听,就先把原来的killall再启动。每次输出到文件的内容都加日期,要不然根本不知道这事情啥时候发生的


    第四个版本号:

    使用nohup让程序和监控程序的echo输出到非标准设备而是文件。这样彻底脱离shell,从而退出一个shell的时候真正实现后台执行


    老版本号例如以下:

    #!/bin/sh
    set +x
    
    source env.sh
    
    PRMGRAM=scp_platform
    FILE_NAME=scp_monitor.log
    
    Current_Time=`date +"%Y-%m-%d %H:%M:%S.%N"`
    echo "[${Current_Time}] monitor start...." 
    echo "[${Current_Time}] monitor start...." >> ${WORK_DIR}/log/${FILE_NAME}
    
    port=7905
    
    TCPListeningnum=`netstat -an | grep ":$port " | awk '$1 == "tcp" && $NF == "LISTEN" {print $0}' | wc -l`
    
    if [ $TCPListeningnum = 1 ]
    then
    {
        echo "[${Current_Time}] The $port is listening"
    }
    else
    {
        echo "[${Current_Time}] The port is not listening"
    }
    fi
    
    
    
    while [ 1 ]
    do
      Current_Time=`date +"%Y-%m-%d %H:%M:%S.%N"`
    	TCPListeningnum=`netstat -an | grep ":$port " |   awk '$1 == "tcp" && $NF == "LISTEN" {print $0}' | wc -l`
    	if [ $TCPListeningnum = 1 ]
    	then
    	{
        		echo "[${Current_Time}] The ${port} is listening" >> ${WORK_DIR}/log/${FILE_NAME}
    	}
    	else
    	{
        		echo "[${Current_Time}] The  ${port} is not listening" >> ${WORK_DIR}/log/${FILE_NAME}
    	    	echo "[${Current_Time}] killall  scp_platform now !" >> ${WORK_DIR}/log/${FILE_NAME}
    	    	kscp
          	echo "[${Current_Time}] check ${PRMGRAM} quit, now restart ${PRMGRAM} ..." >> ${WORK_DIR}/log/${FILE_NAME}
          	scp_platform&
    	}
    	fi
        sleep 180
    done
    

    新版本号例如以下:

    start_monitor.sh #此脚本负责将monitor后台执行

    #!/bin/bash
    
    #start monitor background  without console!!
    
    nohup ./monitor.sh &
    

    monitor.sh #实际的monitor监控程序

    #!/bin/bash
    set -x
    
    nohup  ./env.sh &
    
    PRMGRAM=scp_platform
    FILE_NAME=scp_monitor.log
    
    Current_Time=`date +"%Y-%m-%d %H:%M:%S.%N"`
    echo "[${Current_Time}] monitor start...." 
    echo "[${Current_Time}] monitor start...." >> ${WORK_DIR}/log/${FILE_NAME}
    
    port=7905
    
    TCPListeningnum=`netstat -an | grep ":$port " | awk '$1 == "tcp" && $NF == "LISTEN" {print $0}' | wc -l`
    
    if [ $TCPListeningnum = 1 ]
    then
    {
        echo "[${Current_Time}] The $port is listening"
    }
    else
    {
        echo "[${Current_Time}] The port is not listening"
    }
    fi
    
    
    
    while [ 1 ]
    do
      Current_Time=`date +"%Y-%m-%d %H:%M:%S.%N"`
    	TCPListeningnum=`netstat -an | grep ":$port " |   awk '$1 == "tcp" && $NF == "LISTEN" {print $0}' | wc -l`
    	if [ $TCPListeningnum = 1 ]
    	then
    	{
        		echo "[${Current_Time}] The ${port} is listening" >> ${WORK_DIR}/log/${FILE_NAME}
    	}
    	else
    	{
        		echo "[${Current_Time}] The  ${port} is not listening" >> ${WORK_DIR}/log/${FILE_NAME}
    	    	echo "[${Current_Time}] killall  scp_platform now !" >> ${WORK_DIR}/log/${FILE_NAME}
    	    	killall scp_platform
          	echo "[${Current_Time}] check ${PRMGRAM} quit, now restart ${PRMGRAM} ..." >> ${WORK_DIR}/log/${FILE_NAME}
          	nohup scp_platform&
    	}
    	fi
        sleep 180
    done
    

    这里之所以要sleep 180是是由于程序载入实际略微有点长,要不然载入还没完毕的时候是不能够推断有没有监听7905port的


    原来版本号的env.sh #无需改动就可以使用
    env.sh主要是环境变量设置和自己定义的变量

    #bin/bash
    export ROOT=/root/scp
    export WORK_DIR=${ROOT}
    export INCLUDE=${ROOT}/include
    export OTL=${INCLUDE}/otl_mysql
    export LD_LIBRARY_PATH=${ROOT}/lib:/usr/local/lib
    export ACE_ROOT=${INCLUDE}
    export ODBCINI=/usr/local/etc/odbc.ini
    export ODBCSYSINI=/usr/local/etc
    PATH=${PATH}:${ROOT}/bin
    export PATH
    odbcinst -j
    
    
    alias wk='cd ${ROOT}'
    alias bin='cd ${ROOT}/bin'
    alias cfg='cd ${ROOT}/conf'
    alias rmlog='rm -rf ${ROOT}/bin/log*.*; rm -rf ${ROOT}/log/*.*'
    alias lis='netstat -an|grep -i 7905'
    alias scp='${ROOT}/bin/scp_platform &'
    alias moni='${ROOT}/bin/monitor.sh &'
    alias myps='ps -fu root|grep -v grep|grep -i scp'
    alias mymoni='ps -fu root|grep -v grep|grep -i moni'
    alias kscp='killall -9 scp_platform'
    alias kmoni='killall -9 monitor.sh'
    isql
    alias mynet='netstat -an | grep 7905'
    
    
    ulimit -c unlimited
    ulimit -n 65530
    





  • 相关阅读:
    elasticsearch + python
    Django中的监控组件Prometheus
    MySQL索引原理以及最左前缀原则
    Insert ignore,还是insert replace还是insert on duplicate key update区别
    PyTorch中Tensor与numpy数据之间时共享的条件:
    在Numpy中numpy.nan显示成-9223372036854775808的缘故
    mac上安装pil报错: ERROR: Could not find a version that satisfies the requirement pil (from versions: none):
    OSError: [Errno 48] Address already in use
    curl: (7) Failed to connect to raw.githubusercontent.com port 443: Connection refused
    在实际项目中使用git推代码踩过的坑
  • 原文地址:https://www.cnblogs.com/bhlsheji/p/5222421.html
Copyright © 2011-2022 走看看