zoukankan      html  css  js  c++  java
  • Paas平台架构

    1. 整体架构:

    ui--> controller -> service --> mapper -->mysql     (后端,主要维护服务列表)

            ⬇

      AgentService --> sqlite        (虚机层面,主要维护当前虚机的运行服务,同时向脚本发布  install,start, stop , uninstall, take-over, monitor_trace, installall, uninstallall 等命令)

        ⬇

           脚本 (包括中间件mysql, redis, kafka, elk, nacos, sentinel, mogodb, skywallking 等的维护,接管的java服务的维护)

    2. 整个平台精华全在脚本,后端主要是向脚本下发命令,维护库的信息等。

    3. 脚本简述

     agentctl.sh 举例:

    #!/bin/bash
    # JAR 包目录
    readonly PACKAGE_FULL_WAY=/opt/download/packages
    # MySQL执行脚本目录
    readonly SH_HOME=agentctl.sh
    # JAR 包执行目录
    readonly AGENT_INSTALL_HOME=/opt/agent
    
    # JAR 包名称
    readonly JAR_NAME=sitesupport-agent-0.0.1-SNAPSHOT.jar
    readonly NODE_EXPORTER=node_exporter-1.1.2.linux-amd64.tar.gz
    
    # 引入commmon.sh
    # shellcheck disable=SC1091
    source "${PACKAGE_FULL_WAY}"/common.sh || exit
    
    function createSshkey() {
      if [ -e /root/.ssh ] && [ -e /root/.ssh/id_rsa ] && [ -e /root/.ssh/id_rsa.pub ]; then
        logInfo "ssh-key已存在!"
      else
        cd /root && if [ ! -e .ssh ]; then mkdir .ssh; fi
        cd .ssh || exit
        ssh-keygen -f "id_rsa" -N ""
        logInfo "ssh-key生成成功"
      fi
    }
    
    #check jdk
    function checkJdk() {
      logInfo "start check jdk...."
      if java -version &>/dev/null; then
        logInfo "start remove old jdk..."
        yum remove jdk -y
        # shellcheck disable=SC1091
        source /etc/profile
      fi
      logInfo "start install new jdk..."
      if ! rpm -ivh ${PACKAGE_FULL_WAY}/"${JDK_PKG_NAME}"; then
        logError "jdk1.8.0_291 未安装成功,请重新安装!"
      fi
      # 允许jmx远程访问
      local jmxremote_conf=/usr/java/jdk1.8.0_291-amd64/jre/lib/management
      cd ${jmxremote_conf} || logError "${jmxremote_conf} 不存在!"
      cp jmxremote.password.template jmxremote.password
      chmod +w jmxremote.password
      echo "monitorRole QED" >>jmxremote.password
      echo "controlRole R&D" >>jmxremote.password
      chmod 0400 jmxremote.password
      logInfo "the jdk is installed and the environment variables are configured"
    }
    
    # 检查定时任务状态
    function checkCrond() {
      local state=""
      state=$(systemctl status crond | awk 'NR==3{print}' | awk '{print $3}' | tail -c +2 | head -c -2)
      if [[ ${state} != "running" ]]; then
        # 启动定时任务服务
        service crond start
      fi
    
      # 设置cron开机自启
      systemctl enable crond.service
    }
    
    function installNodeExporter() {
      logInfo "start install node exporter..."
      if [ ! -e ${PACKAGE_FULL_WAY}/${NODE_EXPORTER} ]; then
        logInfo "node exporter不存在!"
        return
      fi
      # 解压node exporter到安装主目录
      mkdir ${AGENT_INSTALL_HOME}/node_exporter
      tar -zxvf ${PACKAGE_FULL_WAY}/${NODE_EXPORTER} -C ${AGENT_INSTALL_HOME}/node_exporter >/dev/null 2>&1
      checkResult $? "tar node exporter package error"
      local package_name=""
      # shellcheck disable=SC2010
      package_name=$(ls ${AGENT_INSTALL_HOME}/node_exporter | grep node_exporter)
      mv ${AGENT_INSTALL_HOME}/node_exporter/"${package_name}"/* ${AGENT_INSTALL_HOME}/node_exporter
      rm -rf ${AGENT_INSTALL_HOME}/node_exporter/"${package_name}"
      cd ${AGENT_INSTALL_HOME}/node_exporter || logError "${AGENT_INSTALL_HOME}/node_exporter 不存在!"
    
      if [ -e /usr/lib/systemd/system/node_exporter.service ]; then
        rm -rf /usr/lib/systemd/system/node_exporter.service &>/dev/null
      fi
      cat <<EOF >>/usr/lib/systemd/system/node_exporter.service
    [Unit]
    Description=node_exporter
    After=network-online.target remote-fs.target nss-lookup.target
    Wants=network-online.target
    
    [Service]
    Type=simple
    ExecStart=${AGENT_INSTALL_HOME}/node_exporter/node_exporter
    ExecReload=/bin/kill -s HUP $MAINPID
    ExecStop=/bin/kill -s TERM $MAINPID
    
    [Install]
    WantedBy=multi-user.target
    EOF
    
      systemctl daemon-reload
      systemctl enable node_exporter.service
      systemctl start node_exporter.service
    
      # 修改prometheus服务端配置
      # TODO
      local prometheus_ip=""
      if [[ ${prometheus_ip} = "" ]] || [[ ${prometheus_ip} = "null" ]]; then
        echo "下次一定!"
        # logInfo "nacos配置获取失败,开始从外部配置文件获取配置..."
        # i=0
        # temp=""
        # while true
        # do
        #   i=`expr $i + 1`
        #   temp=`sed -n "/^${i} /p" ${AGENT_INSTALL_HOME}/nacos_config | cut -d ' ' -f 2`
        #   if [[ ${temp} = "" ]];then break;fi;
        #   if [[ ${temp} =~ ^prometheus ]];then
        #     sed -n "/^${i} /,/^}$/p" ${AGENT_INSTALL_HOME}/nacos_config | sed -n -e '/^{$/,/^}$/p' | jq -r ".install_ip" > ip.txt
        #   fi
        # done
        # prometheus_ip=`cat ip.txt` && rm -rf ip.txt
      else
        rm -rf temp.json
        # shellcheck disable=SC2154
        sshpass -p "${linux_password}" ssh -n -o StrictHostKeyChecking=no root@"${prometheus_ip}" "cd /opt/sitesupport/prometheus-standalone &>/dev/null || exit;sh prometheusctl.sh add_exporter -j node-${localIp}-exporter -h ${localIp} -p 9100"
        return 0
      fi
    }
    
    function installAgent() {
      # 创建安装目录
      if [[ -e ${AGENT_INSTALL_HOME} ]]; then logError "安装目录[${AGENT_INSTALL_HOME}]已存在,请检查!"; fi
      mkdir ${AGENT_INSTALL_HOME}
      checkSshpass
      createSshkey
      checkCrond
      cp -f ${PACKAGE_FULL_WAY}/${JAR_NAME} ${AGENT_INSTALL_HOME}
      cp -f ${PACKAGE_FULL_WAY}/${SH_HOME} ${AGENT_INSTALL_HOME}
      cp -f ${PACKAGE_FULL_WAY}/common.sh ${AGENT_INSTALL_HOME}
      cp -f ${PACKAGE_FULL_WAY}/constant.sh ${AGENT_INSTALL_HOME}
      cp ${PACKAGE_FULL_WAY}/agent.db ${AGENT_INSTALL_HOME}
      chmod 755 ${AGENT_INSTALL_HOME}/${SH_HOME}
      # 增加定时任务
      echo "*/1 * * * * root \`cd /opt/agent && sh agentctl.sh self_healing\`" >>/etc/crontab
      logInfo "config jar finish"
    }
    
    function install() {
      judgeMem 1024000
      checkDepend
      installAgent
      start
      installNodeExporter
    }
    
    function print() {
      echo -e "====================== sitesupport-agent 启动完成 ======================
    
    =                   private: http://${localIp}:8888                  =
    
    ========================================================================"
    }
    
    function start() {
      local step=5
      local res=1
      local bool=1
      for ((i = 0; i < 60; i = (i + step))); do
        serviceIsAlive
        res=$?
        if [ ${res} = 1 ]; then
          nohup java -agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=18888 -jar ${AGENT_INSTALL_HOME}/${JAR_NAME} >${AGENT_INSTALL_HOME}/nohup.out 2>&1 &
          logInfo "${JAR_NAME}服务启动中..."
        elif [ ${res} = 2 ]; then
          logInfo "${JAR_NAME}服务启动中..."
          bool=1
        else
          logInfo "${JAR_NAME}服务已正常启动!"
          bool=0
          print
          return
        fi
        sleep $step
      done
    
      # 启动node-exporter
      systemctl start node_exporter.service
      if [ ${bool} = 1 ]; then
        local pid=""
        pid=$(ps -ef | grep ${JAR_NAME} | grep -v grep | awk '{print $2}')
        kill -9 "${pid}"
        logError "${JAR_NAME}服务启动失败!i will kill it!!"
      fi
    }
    
    function stop() {
      if serviceIsAlive; then
        local pid=""
        pid=$(ps -ef | grep ${JAR_NAME} | grep -v grep | awk '{print $2}')
        kill -9 "${pid}"
        logInfo "${JAR_NAME}停止成功"
      else
        logInfo "${JAR_NAME}没有启动"
      fi
    
      local node_pid=""
      node_pid=$(netstat -tnlp | grep 9100 | grep node_exporter | awk '{print $7}' | awk 'NR==1' | cut -d '/' -f 1)
      if [[ ${node_pid} != "" ]]; then kill -9 "${node_pid}"; fi
    }
    
    function serviceIsAlive() {
      setLocalIp
      local pid=""
      pid=$(ps -ef | grep ${JAR_NAME} | grep -v grep | awk '{print $2}')
    
      # 如果不存在返回1,存在返回0
      if [ -z "${pid}" ]; then
        return 1
      else
        if netstat -tlnp | grep "${pid}" >/dev/null; then
          if ! curl http://"${localIp}":8888 &>/dev/null; then return 2; fi # 存在端口但不提供服务
          return 0
        else
          return 2 # 存在pid不存在port,可能正在启动,也可能启动失败
        fi
      fi
    }
    
    # 服务自愈,可配合cron定时任务
    function self_healing() {
      local step=5
      local res=1
      local bool=1
      for ((i = 0; i < 60; i = (i + step))); do
        serviceIsAlive
        res=$?
        if [ ${res} = 1 ]; then
          logInfo "${JAR_NAME}服务开始启动!"
          nohup java -jar ${AGENT_INSTALL_HOME}/${JAR_NAME} >${AGENT_INSTALL_HOME}/nohup.out 2>&1 &
        elif [ ${res} = 2 ]; then
          logInfo "${JAR_NAME}服务启动中..."
          bool=1
        else
          logInfo "${JAR_NAME}服务已正常启动!"
          bool=0
        fi
        sleep $step
      done
    
      if [ ${bool} = 1 ]; then
        local pid=""
        pid=$(ps -ef | grep ${JAR_NAME} | grep -v grep | awk '{print $2}')
        kill -9 "${pid}"
        logInfo "${JAR_NAME}服务启动失败!i will kill it!!"
      fi
    }
    
    function uninstall() {
      stop
      rm -rf ${AGENT_INSTALL_HOME}
      # 考虑残留文件,再次判断删除
      if [ -e ${AGENT_INSTALL_HOME} ]; then rm -rf ${AGENT_INSTALL_HOME}; fi
      # 删除定时任务
      sed -i '/agentctl.sh/d' /etc/crontab
      source /etc/crontab
      # 删除exporter
      rm -rf /usr/lib/systemd/system/node_exporter.service
    }
    
    function check_status() {
      serviceIsAlive
    }
    
    case $1 in
    start)
      start
      ;;
    stop)
      stop
      ;;
    restart)
      stop
      start
      ;;
    install)
      install
      ;;
    uninstall)
      uninstall
      ;;
    check_status)
      check_status
      ;;
    self_healing)
      self_healing
      ;;
    *)
      logError "Usage: $0 {start|stop|install|uninstall|check_status|self_healing} {..}"
      ;;
    esac
  • 相关阅读:
    23种设计模式-原型模式
    23种设计模式-工厂方法模式
    23种设计模式-代理模式
    23种设计模式-装饰模式
    23种设计模式-策略模式
    CSS高度塌陷问题与解决办法
    Java: 多态
    Java: 接口
    Java: 继承
    Java: 单例设计模式
  • 原文地址:https://www.cnblogs.com/dhName/p/15334438.html
Copyright © 2011-2022 走看看