zoukankan      html  css  js  c++  java
  • Python实现自动化监控远程服务器

    最近发现Python课器做很多事情,在监控服务器有其独特的优势,耗费资源少,开发周期短。

    首先我们做一个定时或者实时脚本timedtask.py,让其定时监控目标服务器,两种方式:

    第一种:

    #!/usr/bin/env python
    # -*- coding: utf-8 -*-
    # @Time    : 2017/11/27 15:59
    # @Desc    : 定时任务,以需要的时间间隔执行某个命令
    # @File    : timedtask.py
    # @Software: PyCharm
    
    import time, os
    from monitorserver import alltask
    
    
    def roll_back(cmd, inc = 60):
        while True:
            #执行方法,函数
            alltask()
            time.sleep(inc)
    
    roll_back("echo %time%", 5)

    第二种:

    #!/usr/bin/env python
    # -*- coding: utf-8 -*-
    # @Time    : 2017/11/27 15:59
    # @Desc    : 定时任务,以需要的时间间隔执行某个命令
    # @File    : timedtask.py
    # @Software: PyCharm
    
    import time, os
    
    def roll_back(cmd, inc = 60):
        while True:
            #监控代码文件所在位置
            os.system('python  /home/../monitorserver.py');
            time.sleep(inc)
    
    roll_back("echo %time%", 5)

    做过监控应该都知道,我们主要监控服务器,负载均衡、磁盘、内存、CPU、网络接口(流量)、端口代码,主要针对这些,我做了以下远程监控,第一种和第二种监控代码一样,代码monitorserver.py如下:

    #!/usr/bin/env python
    # -*- coding: utf-8 -*-
    # @Time    : 2017/11/27 15:59
    # @Desc    : 服务器监控代码
    # @File    : monitorserver.py
    # @Software: PyCharm
    
    import pexpect
    import re
    
    import time
    import threading
    
    """
    主方法
    127.0.0.1#远程服务器ip地址
    """
    def ssh_command(user, host, password, command):
        ssh_new_key = 'Are you sure you want to continue connecting'
        child = pexpect.spawn('ssh -l %s %s %s' % (user, host, command))
        i = child.expect([pexpect.TIMEOUT, ssh_new_key, 'password: '])
        if i == 0:
            print 'ERROR!'
            print 'SSH could not login. Here is what SSH said:'
            print child.before, child.after
            return None
        if i == 1:
            child.sendline('yes')
            child.expect('password: ')
            i = child.expect([pexpect.TIMEOUT, 'password: '])
            if i == 0:
                print 'ERROR!'
                print 'SSH could not login. Here is what SSH said:'
                print child.before, child.after
                return None
        child.sendline(password)
        return child
    
    
    """
    内存监控
    """
    def mem_info():
    
        child = ssh_command("远程服务器用户名", "127.0.0.1", "远程服务器密码", "cat /proc/meminfo")
        child.expect(pexpect.EOF)
        mem = child.before
        mem_values = re.findall("(d+) kB", mem)
        MemTotal = mem_values[0]
        MemFree = mem_values[1]
        Buffers = mem_values[2]
        Cached = mem_values[3]
        SwapCached=mem_values[4]
        SwapTotal = mem_values[13]
        SwapFree = mem_values[14]
        print '******************************内存监控*********************************'
        print "*******************时间:", time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), "******************"
        print "总内存:",MemTotal
        print "空闲内存:", MemFree
        print "给文件的缓冲大小:",Buffers
        print "高速缓冲存储器使用的大小:", Cached
        print "被高速缓冲存储用的交换空间大小:", SwapCached
        print "给文件的缓冲大小:", Buffers
        if int(SwapTotal) == 0:
            print u"交换内存总共为:0"
        else:
            Rate_Swap = 100 - 100*int(SwapFree)/float(SwapTotal)
            print u"交换内存利用率:", Rate_Swap
        Free_Mem = int(MemFree) + int(Buffers) + int(Cached)
        Used_Mem = int(MemTotal) - Free_Mem
        Rate_Mem = 100*Used_Mem/float(MemTotal)
        print u"内存利用率:", str("%.2f" % Rate_Mem), "%"
    
    
    """
    内核线程、虚拟内存、磁盘、陷阱和 CPU 活动的统计信息
    """
    def vm_stat_info():
        child = ssh_command("远程服务器用户名", "127.0.0.1", "远程服务器密码", "vmstat 1 2 | tail -n 1")
        child.expect(pexpect.EOF)
        vmstat_info = child.before.strip().split()
        processes_waiting = vmstat_info[0]
        processes_sleep = vmstat_info[1]
        swpd = vmstat_info[2]
        free = vmstat_info[3]
        buff = vmstat_info[4]
        cache = vmstat_info[5]
        si = vmstat_info[6]
        so = vmstat_info[7]
        io_bi = vmstat_info[8]
        io_bo = vmstat_info[9]
        system_interrupt = vmstat_info[10]
        system_context_switch = vmstat_info[11]
        cpu_user = vmstat_info[12]
        cpu_sys = vmstat_info[13]
        cpu_idle = vmstat_info[14]
        cpu_wait = vmstat_info[15]
        st=vmstat_info[16]
        print '****************************内核线程、虚拟内存、磁盘、陷阱和 CPU 活动的统计信息监控****************************'
        print "*******************时间:", time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), "******************"
        print "等待运行进程的数量:", processes_waiting
        print "处于不间断状态的进程:", processes_sleep
        print "使用虚拟内存(swap)的总量:", swpd
        print "空闲的内存总量:", free
        print "用作缓冲的内存总量:", buff
        print "用作缓存的内存总量:", cache
        print "交换出内存总量 :", si
        print "交换入内存总量 :", so
        print "从一个块设备接收:", io_bi
        print "发送到块设备:", io_bo
        print "每秒的中断数:", system_interrupt
        print "每秒的上下文切换数:", system_context_switch
        print "用户空间上进程运行的时间百分比:", cpu_user
        print "内核空间上进程运行的时间百分比:", cpu_sys
        print "闲置时间百分比:", cpu_idle
        print "等待IO的时间百分比:", cpu_wait
        print "从虚拟机偷取的时间百分比:", st
    
    
    '''
    cpu监控
    '''
    def cpu_info():
        child = ssh_command("远程服务器用户名", "127.0.0.1", "远程服务器密码", "cat /proc/cpuinfo")
        child.expect(pexpect.EOF)
        cpuinfo = child.before
        cpu_num = re.findall('processor.*?(d+)', cpuinfo)[-1]
        cpu_num = str(int(cpu_num) + 1)
        print '***************************************cpu监控***************************************'
        print "*******************时间:", time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), "******************"
        print u"CPU数目:", cpu_num
        li = cpuinfo.replace('	', '').split('
    ')
        CPUinfo = {}
        procinfo = {}
        nprocs = 0
        for line in li:
            if line.find("processor") > -1:
                CPUinfo['CPU%s' % nprocs] = procinfo
                nprocs = nprocs + 1
            else:
                if len(line.split(':')) == 2:
                    procinfo[line.split(':')[0].strip()] = line.split(':')[1].strip()
                else:
                    procinfo[line.split(':')[0].strip()] = ''
        for processor in CPUinfo.keys():
            print "CPU属于的名字及其编号、标称主频:",CPUinfo[processor]['model name']
            print "CPU属于其系列中的哪一代的代号:", CPUinfo[processor]['model']
            print "CPU制造商:", CPUinfo[processor]['vendor_id']
            print "CPU产品系列代号:", CPUinfo[processor]['cpu family']
            print "CPU的实际使用主频:", CPUinfo[processor]['cpu MHz']
    
    
    """
    负载均衡
    """
    def load_stat():
        child = ssh_command("远程服务器用户名", "127.0.0.1", "远程服务器密码", "cat /proc/loadavg")
        child.expect(pexpect.EOF)
        loadavgs = child.before.strip().split()
        print '************************负载均衡监控****************************'
        print "*******************时间:",time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),"******************"
        print "系统5分钟前的平均负载:", loadavgs[0]
        print "系统10分钟前的平均负载:", loadavgs[1]
        print "系统15分钟前的平均负载:", loadavgs[2]
        print "分子是正在运行的进程数,分母为总进程数:",loadavgs[3]
        print "最近运行的进程id:", loadavgs[4]
    
    
    
    """
    获取网络接口的输入和输出
    """
    def  ionetwork():
        child = ssh_command("远程服务器用户名", "127.0.0.1", "远程服务器密码", "cat /proc/net/dev")
        child.expect(pexpect.EOF)
        netdata = child.before
        li = netdata.strip().split('
    ')
        print '************************获取网络接口的输入和输出监控****************************'
        print "*******************时间:",time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),"******************"
        net = {}
        for line in li[2:]:
            line = line.split(":")
            eth_name = line[0].strip()
            # if eth_name != 'lo':
            net_io = {}
            net_io['Receive'] = round(float(line[1].split()[0]) / (1024.0 * 1024.0), 2)
            net_io['Transmit'] = round(float(line[1].split()[8]) / (1024.0 * 1024.0), 2)
            net[eth_name] = net_io
        print net
    
    
    """ 
    磁盘空间监控
    """
    def disk_stat():
        child = ssh_command("远程服务器用户名", "127.0.0.1", "远程服务器密码", "df -h")
        child.expect(pexpect.EOF)
        disk = child.before
        disklist = disk.strip().split('
    ')
        disklists=[]
        for  disk  in disklist:
            disklists.append(disk.strip().split())
        print '************************磁盘空间监控****************************'
        print "*******************时间:", time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), "******************"
        for  i in disklists[1:]:
            print "	文件系统:", i[0],
            print "	容量:", i[1],
            print "	已用:", i[2],
            print "	可用:", i[3],
            print "	已用%挂载点:", i[4]
    
    
    """ 
    端口监控
    一般是远程服务器用户名用户
    """
    def getComStr():
        child = ssh_command("远程服务器用户名", "127.0.0.1", "远程服务器密码", "netstat -tpln")
        child.expect(pexpect.EOF)
        Com = child.before
        print '******************************端口监控*********************************'
        print "*******************时间:", time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), "******************"
        print Com
    
    
    
    """
    获取网络接口的输入和输出
    """
    def  cpu():
        child = ssh_command("远程服务器用户名", "127.0.0.1", "远程服务器密码", 'cat /proc/stat | grep "cpu "')
        child.expect(pexpect.EOF)
        child1 = ssh_command("远程服务器用户名", "127.0.0.1", "远程服务器密码", 'cat /proc/stat | grep "cpu "')
        child1.expect(pexpect.EOF)
        cpus = child.before.strip().split()
        cpus1 = child1.before.strip().split()
        print '************************cpu使用情况****************************'
        print "*******************时间:",time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),"******************"
        T1=int(cpus[1])+int(cpus[2])+int(cpus[3])+int(cpus[4])+int(cpus[5])+int(cpus[6])+int(cpus[8])+int(cpus[9])
        T2=int(cpus1[1]) + int(cpus1[2]) + int(cpus1[3]) + int(cpus1[4] )+ int(cpus1[5] )+int( cpus1[6] )+ int(cpus1[8] )+ int(cpus1[9])
        Tol=T2-T1
        Idle=int(cpus1[4]) - int(cpus[4])
        print '总的cpu时间1:',T1
        print '总的cpu时间2:', T2
        print '时间间隔内的所有时间片:', Tol
        print '计算空闲时间idle:', Idle
        print "计算cpu使用率:",100*(Tol-Idle)/Tol,"%"
    
    """
    第一种执行
    """
    def alltask():
        try:
            threads = []
            t1 = threading.Thread(target=mem_info)
            threads.append(t1)
            t2 = threading.Thread(target=vm_stat_info)
            threads.append(t2)
            t3 = threading.Thread(target=cpu_info)
            threads.append(t3)
            t4 = threading.Thread(target=load_stat)
            threads.append(t4)
            t5 = threading.Thread(target=ionetwork)
            threads.append(t5)
            t6 = threading.Thread(target=disk_stat)
            threads.append(t6)
            t7 = threading.Thread(target=getComStr)
            threads.append(t7)
            t8 = threading.Thread(target=cpu)
            threads.append(t8)
            for n in range(len(threads)):
                threads[n].start()
        except Exception, e:
            print str(e)
    
    """
    第二种执行
    """
    if __name__ == '__main__':
        try:
            threads = []
            t1 = threading.Thread(target=mem_info)
            threads.append(t1)
            t2 = threading.Thread(target=vm_stat_info)
            threads.append(t2)
            t3 = threading.Thread(target=cpu_info)
            threads.append(t3)
            t4 = threading.Thread(target=load_stat)
            threads.append(t4)
            t5 = threading.Thread(target=ionetwork)
            threads.append(t5)
            t6 = threading.Thread(target=disk_stat)
            threads.append(t6)
            t7 = threading.Thread(target=getComStr)
            threads.append(t7)
            t8 = threading.Thread(target=cpu)
            threads.append(t8)
            for n in range(len(threads)):
                threads[n].start()
        except Exception, e:
            print str(e)

    监控结果如下:

    接下来做的是把监控结果可视化,即可,可惜没时间做,就交给各位了!!!

    花了两天时间整理的,分享给大家,希望对各位有帮助!!!

  • 相关阅读:
    mysql数据库常用指令
    解决windows的mysql无法启动 服务没有报告任何错误的经验。
    “Can't open file for writing”或“operation not permitted”的解决办法
    启动Apache出现错误Port 80 in use by "Unable to open process" with PID 4!
    如何打开windows的服务services.msc
    常见的HTTP状态码 404 500 301 200
    linux系统常用的重启、关机指令
    (wifi)wifi移植之命令行调试driver和supplicant
    linux(debian)安装USB无线网卡(tp-link TL-WN725N rtl8188eu )
    alloc_chrdev_region申请一个动态主设备号,并申请一系列次设备号
  • 原文地址:https://www.cnblogs.com/IT-LearnHall/p/9426235.html
Copyright © 2011-2022 走看看