zoukankan      html  css  js  c++  java
  • Python之行 --liunx常用服务监管

    Python之行--服务监管

    背景

    针对公司系统平台服务不断增多和复杂化,定位问题效率逐渐下降,实现各个服务统一监管显得越来越重要,在作为运维人员的我管理越来越头疼的时候,我觉得开发自己的监控程序!

    动作

    首先公司系统使用 java 开发,以微服务体系实现的快速搭建,服务器使用最常用的centos系统(centos7)因此有多个xxx.jar 启动命令冗长繁琐,在不断和后台人员优化统一启动命令后,统一改造成系统服务 例如: systemctl start xxx.service,类似这样都有9个之多,还不包括所依赖的其他服务,如mysql,redis,kafka...,这些服务也统一做成了系统服务!

    开发(上代码)

    # coding:utf-8
    # author:Liu Xiaofei
    # date:2020-5-4
    # mood:restless
    
    import time
    import logging
    import subprocess
    
    from copy import deepcopy
    from functools import wraps
    
    
    class ParsesTools(object):
    
        @staticmethod
        def parse_netstat_response(origin):
            """
            解析netstat -tnlp 返回结果
            :param origin:
            :return:
            """
            res = origin.strip().splitlines()
            origin_parse = deepcopy(res[2::])
    
            for index, item in enumerate(origin_parse):
                every_info = item.split()
                dicts = {
                    "type": every_info[0],
                    "process": every_info[6].split("/")[0],
                    "service": every_info[6].split("/")[1].replace(":", ""),
                    "port": every_info[3].replace(":", "") if "::" in every_info[3] else every_info[3].split(":")[1]
                }
                yield dicts
    
        @staticmethod
        def execute_result(command):
            """
            收集某些指令后的返回值
            :param command: 常规指令
            :return:
            """
            network_res = subprocess.Popen(command, stdin=subprocess.PIPE, stdout=subprocess.PIPE,
                                           shell=True)
            response_info = network_res.stdout.read()
            network_res.stdout.close()
            return response_info
    
        @staticmethod
        def service_run(command):
            """
            带参数装饰器执行服务启动监测
            :param command: 执行的命令
            :return:
            """
            def execute_status(func):
                @wraps(func)
                def inner(*args, **kwargs):
                    try:
                        subprocess.check_call(command, shell=True)
                        status = True
                    except Exception as e:
                        logging.error(e)
                        status = False
                    kwargs[func.__name__.replace("_", "-")] = status
                    result = func(*args, **kwargs)
                    return result
    
                return inner
    
            return execute_status
    
    class ExecuteLinuxCommands(object):
        CENTOS7_NETSTAT = "netstat -tnlp"
    
        CENTOS7_MYSQL = "systemctl start mysqld.service"
        CENTOS7_REDIS = "systemctl start redis-server.service"
        CENTOS7_INFLUXDB = "systemctl start influxdb"
        CENTOS7_GRAFANA = "systemctl start grafana-server"
        CENTOS7_NGINX = "systemctl start nginx.service"
        CENTOS7_KAFKA = "systemctl start kafka.service"
        CENTOS7_ZOOKEEPER = "systemctl restart kafka.service"
    
        JAVA_GATEWAYAPI = "systemctl start gateway-api.service"
        JAVA_CONFIGMANGER = "systemctl start config-manger.service"
        JAVA_REALTIME = "systemctl start realtime.service"
        JAVA_REALTIME_DATAVIEW = "systemctl start realtime-dataview.service"
        JAVA_ALERT_STRATEGY = "systemctl start alert-strategy.service"
        JAVA_ALERT_PUSHER = "systemctl start alert-pusher.service"
        JAVA_ALERT_ENGINE = "systemctl start alert-engine.service"
        JAVA_LOGIN = "systemctl start alert-engine.service"
        JAVA_GATEWAY = "systemctl start gateway.service"
    
    class YiLianSystemServe(object):
        pt = ParsesTools
        els = ExecuteLinuxCommands
    
        def __init__(self):
            self.serve_counter = 0
            self.serve_recorder = {}
    
        @staticmethod
        def comm(*args, **kwargs):
            """
            校验服务是否正常,返回记录结果
            :param args:
            :param kwargs:
            :return:
            """
            result = dict()
            count = 0
            result.update(kwargs)
            serve_name = kwargs.keys()[0].replace("_", "-")
            if result.get(serve_name):
                count += 1
            else:
                print "{}服务启动失败,请检查启动命令是否正确以及跟踪错误日志!!!".format(serve_name)
            return result, count
    
        def record(self, *args):
            """
            记录信息和统计次数
            :param args:
            :return: None
            """
            self.serve_recorder.update(args[0])
            self.serve_counter += args[1]
    
        @pt.service_run(els.CENTOS7_MYSQL)
        def mysql_service(self, *args, **kwargs):
            result, count = self.comm(args, kwargs)
            self.record(result, count)
    
        @pt.service_run(els.CENTOS7_REDIS)
        def redis_service(self, *args, **kwargs):
            result, count = self.comm(*args, **kwargs)
            self.record(result, count)
    
        @pt.service_run(els.CENTOS7_INFLUXDB)
        def influx_service(self, *args, **kwargs):
            result, count = self.comm(*args, **kwargs)
            self.record(result, count)
    
        @pt.service_run(els.CENTOS7_GRAFANA)
        def grafana_service(self, *args, **kwargs):
            result, count = self.comm(*args, **kwargs)
            self.record(result, count)
    
        @pt.service_run(els.CENTOS7_KAFKA)
        def kafka_service(self, *args, **kwargs):
            result, count = self.comm(*args, **kwargs)
            self.record(result, count)
    
        @pt.service_run(els.CENTOS7_ZOOKEEPER)
        def zookeeper_service(self, *args, **kwargs):
            result, count = self.comm(*args, **kwargs)
            self.record(result, count)
    
        @pt.service_run(els.CENTOS7_REDIS)
        def redis_service(self, *args, **kwargs):
            result, count = self.comm(*args, **kwargs)
            self.record(result, count)
    
        @pt.service_run(els.JAVA_GATEWAYAPI)
        def gateway_api_service(self, *args, **kwargs):
            result, count = self.comm(*args, **kwargs)
            self.record(result, count)
    
        @pt.service_run(els.JAVA_CONFIGMANGER)
        def config_manger_service(self, *args, **kwargs):
            result, count = self.comm(*args, **kwargs)
            self.record(result, count)
    
        @pt.service_run(els.JAVA_ALERT_STRATEGY)
        def alert_strategy_service(self, *args, **kwargs):
            result, count = self.comm(*args, **kwargs)
            self.record(result, count)
    
        @pt.service_run(els.JAVA_ALERT_PUSHER)
        def alert_pusher_service(self, *args, **kwargs):
            result, count = self.comm(*args, **kwargs)
            self.record(result, count)
    
        @pt.service_run(els.JAVA_ALERT_ENGINE)
        def alert_engine_service(self, *args, **kwargs):
            result, count = self.comm(*args, **kwargs)
            self.record(result, count)
    
        @pt.service_run(els.JAVA_REALTIME_DATAVIEW)
        def realtime_dataview_service(self, *args, **kwargs):
            result, count = self.comm(*args, **kwargs)
            self.record(result, count)
    
        @pt.service_run(els.JAVA_REALTIME)
        def realtime_service(self, *args, **kwargs):
            result, count = self.comm(*args, **kwargs)
            self.record(result, count)
    
        @pt.service_run(els.JAVA_LOGIN)
        def login_service(self, *args, **kwargs):
            res, count = self.comm(*args, **kwargs)
            self.record(res, count)
    
        @pt.service_run(els.JAVA_GATEWAY)
        def gateway_service(self, *args, **kwargs):
            result, count = self.comm(*args, **kwargs)
            self.record(result, count)
    
    class TotalServicesRun(YiLianSystemServe):
        YiLianJavaServices = {
            "8089": "gateway",
            "8090": "config-manger",
            "8080": "gateway-api",
            "6077": "alert-strategy",
            "6088": "alert-pusher",
            "6099": "alert-engine",
            "8099": "realtime-dataview",
            "8100": "realtime",
            "8101": "login"
        }
        YiLianDependOnServices = {
            "3306": "mysql",
            "6379": "redis",
            "80": "nginx",
            "8088": "influx",
            "3000": "grafana",
            "9092": "kafka",
            "2181": "zookeeper"
        }
    
        def __init__(self):
            self.elc = ExecuteLinuxCommands
            self.pt = ParsesTools
            self.ports = []
            self.error_info = None
            self.total_status = dict()
            super(self.__class__, self).__init__()
    
        def methods(self, dictionary):
            """
            校验初始状态下服务情况,已成功启动服务做记录,未启动则执行启动
            :param dictionary: 各个服务详情
            :return: None
            """
            self.serve_recorder = {}
            self.serve_counter = 0
    
            for k, v in dictionary.items():
                if k not in self.ports:
                    v = v.replace("-", "_") if "-" in v else v
                    print "{}服务未启动!!!".format(v)
                    getattr(self, "{}_service".format(v))()
                else:
                    print "{}服务启动成功!".format(v)
                    self.serve_recorder[v] = True
                    self.serve_counter += 1
    
    
        def statistical_services(self,services,before=False):
            """
            执行不同服务组的统计
            :param services: 要启动的服务组
            :param before: 依赖服务标识
            :return:
            """
            self.methods(services)
            self.total_status.update(self.serve_recorder)
            services_name = "依赖" if before else "Java"
            if self.serve_counter < len(services):
                self.error_info = {k: v for k, v in self.serve_recorder.items() if v is False}
    
                print "易联系统{}服务启动失败,共{}个服务未启动{}失败服务详情(json格式):{}".format(
                    services_name,len(self.error_info), "
    ", self.error_info)
                return False
            print "易联系统{}服务已全部启动正常{}启动详情(字典格式):{}".format(services_name,"
    ", self.serve_recorder)
            return True
    
        def run(self):
            origin_ingo = self.pt.execute_result(self.elc.CENTOS7_NETSTAT)
    
            self.ports = [i.get("port") for i in self.pt.parse_netstat_response(origin_ingo)]
    
            if self.statistical_services(self.YiLianDependOnServices,before=True):
                self.statistical_services(self.YiLianJavaServices)
    
            print "易联系统服务启动状态总览(字典格式):{}".format(self.total_status)
            return self.total_status
    
    if __name__ == '__main__':
        count = 1
        while True:
            start = time.time()
            ts = TotalServicesRun()
            ts.run()
            diff_time = time.time() - start
            print "程序执行耗时{}s".format(diff_time)
            time.sleep(300)
    ServicesSupervision

    代码开发已完成,只用了些常用模块,逻辑相对简单,功能已满足当前公司需要。各位大佬,欢迎可劲指正!有更好的写法烦请留言告知我,帮助小弟更好的优化,共同进步!O(∩_∩)O谢谢

  • 相关阅读:
    07 总结ProgressDialog 异步任务
    1. vim 的安装及配置
    debian 源设置 ( apt-get 不能安装)
    在Debian中安装VNC Server
    让BB-Black通过usb0上网
    常用的一些 linux 指令
    Linux下同一目录内文件和目录为什么不能同名?
    beaglebone black 与电脑互传文件(夹)
    永久修改 putty字体大小
    Beaglebone Black的引脚分配
  • 原文地址:https://www.cnblogs.com/CrazySheldon1/p/12829035.html
Copyright © 2011-2022 走看看