zoukankan      html  css  js  c++  java
  • python收集jvm数据

    之前前辈用 java 写的收集 jvm 脚本, 不太方便组内小伙伴维护, 遂用 python 重写了

    #!/usr/bin/env python
    # -*- coding: utf-8 -*-
    # Filename:    jvm_monitor
    # Description: collect jvm info
    # Author:      quke
    # Date:        2018/8/22
    
    
    import base64
    import datetime
    import json
    import logging.handlers
    import os
    import random
    import re
    import socket
    import time
    from subprocess import Popen, PIPE
    
    import MySQLdb
    import requests
    from requests.adapters import HTTPAdapter
    from requests.packages.urllib3.util.retry import Retry
    
    logging.basicConfig(level=logging.INFO,
                        format='%(asctime)s [%(levelname)s] [%(filename)s:%(lineno)s - %(funcName)20s() ] %(message)s',
                        datefmt='%Y-%m-%d %H:%M:%S',
                        )
    console_handler = logging.StreamHandler()
    file_handler = logging.handlers.RotatingFileHandler('jvm_monitor.log', maxBytes=10485760, backupCount=5)
    
    logger = logging.getLogger(__name__)
    logger.addHandler(file_handler)
    
    hostname = socket.gethostname()
    
    
    def run_command(cmd):
        process = Popen(cmd, shell=True, stdout=PIPE, stderr=PIPE)
        stdout, stderr = process.communicate()
        if stderr:
            logger.error('Excepiton with run %s:%s' % (cmd, stderr))
            raise SystemExit
        else:
            return stdout.strip('
    ').split('
    ')
    
    
    def requests_retry(
            retries=3,
            backoff_factor=0.3,
            status_forcelist=(500, 502, 504),
            session=None,
    ):
        session = session or requests.Session()
        retry = Retry(
            total=retries,
            read=retries,
            connect=retries,
            backoff_factor=backoff_factor,
            status_forcelist=status_forcelist,
        )
        adapter = HTTPAdapter(max_retries=retry)
        session.mount('http://', adapter)
        session.mount('https://', adapter)
        return session
    
    
    def execute_sql(sql, host='192.168.1.1', user='user', password='password', db='db'):
        db = MySQLdb.connect(host, user, password, db)
        cursor = db.cursor()
        cursor.execute(sql)
        if 'insert' in sql or 'update' in sql:
            db.commit()
        ret = cursor.fetchall()
        cursor.close()
        db.close()
        return ret
    
    
    def get_all_mixed_info():
        sql = 'select mixhost,module,alias from cmdb_mixed_relation'
        db_detail = execute_sql(sql, host='192.168.1.1', user='user', password='password', db='db')
        ret = {}
        for obj in db_detail:
            hostname, modulename, alias = obj
            ret.setdefault(hostname, {}).update({modulename: alias})
    
        return ret
    
    
    def get_java_module(args):
        cur_dir = '/apps'
        for d in os.listdir(cur_dir):
            if os.path.isdir(os.path.join(cur_dir, d)):
                if 'java' in d or 'boot' in d or 'tomcat' in d or 'mycat' in d:
                    if d in args:
                        return d
    
    
    def get_alias(module_name):
        all_alias = get_all_mixed_info()
        alias = all_alias.get(hostname, {}).get(module_name)
        alias = alias if alias else 'null'
        return alias
    
    
    def get_gc_collector_name(line):
        for gc in ['UseParNewGC', 'UseG1GC', 'UseSerialGC', 'UseParallelGC']:
            if gc in line:
                ygc = gc
                break
        else:
            ygc = 'ParNew'
    
        for gc in ['UseConcMarkSweepGC', 'UseG1GC', 'UseParallelOldGC', 'UseSerialGC']:
            if gc in line:
                ogc = gc
                break
        else:
            ogc = 'CMS'
    
        return ygc, ogc
    
    
    def get_start_time(pid):
        ret = run_command('ps -o lstart -p %s' % pid)
        start_time = time.strftime('%Y-%m-%d %H:%M:%S', time.strptime(ret[1], '%a %b %d %H:%M:%S %Y'))
        return start_time
    
    
    def get_jstat_info(pid):
        ret = run_command('jstat -gc %s' % pid)
        rc = re.compile(
            r'(?P<s0c>[0-9.]+)s+(?P<s1c>[0-9.]+)s+(?P<s0u>[0-9.]+)s+(?P<s1u>[0-9.]+)s+(?P<ec>[0-9.]+)s+(?P<eu>[0-9.]+)s+(?P<oc>[0-9.]+)s+(?P<ou>[0-9.]+)s+(?P<pc>[0-9.]+)s+(?P<pu>[0-9.]+)s+(?P<jvmYgc>[0-9.]+)s+(?P<jvmYgct>[0-9.]+)s+(?P<jvmFgc>[0-9.]+)s+(?P<jvmFgct>[0-9.]+)s+(?P<jvmGct>[0-9.]+)')
        gc_statistics = rc.match(ret[1]).groupdict()
        return gc_statistics
    
    
    def get_thread_count(pid):
        ret = run_command('jstat -snap %s' % pid)
        active_thread_count = ret[-3].split('=')[1]
        total_thread_count = ret[-1].split('=')[1]
        return active_thread_count, total_thread_count
    
    
    def get_jvm_info():
        instances = []
        ret = run_command('jps -mlv')
        for line in ret:
            if line and 'sun.tools.jps.Jps' not in line and 'com.lagou.jmonitor.AgentWatcher' not in line:
                module = get_java_module(line)
                alias = hostname if module in hostname else get_alias(module)
    
                if 'null' == alias:
                    logger.error('[%s] can not get mixed module alias name , continue' % module)
                    continue
    
                ygc, ogc = get_gc_collector_name(line)
                instances_list = line.split(' ')
                pid = instances_list[0]
                start_time = get_start_time(pid)
                gc_statistics = get_jstat_info(pid)
                active_thread_count, total_thread_count = get_thread_count(pid)
                main_function = instances_list[1]
                main_args = ' '.join(instances_list[2:])
                instances.append(
                    dict(
                        pid=pid,
                        module=module,
                        alias=alias,
                        start_time=start_time,
                        gc_statistics=gc_statistics,
                        active_thread_count=active_thread_count,
                        total_thread_count=total_thread_count,
                        ygc=ygc,
                        ogc=ogc,
                        main_function=main_function,
                        main_args=main_args
                    )
                )
        return instances
    
    
    def push_to_oss(jvm):
        modulename = jvm.get('module')
        hostname = jvm.get('alias')
        pid = jvm.get('pid')
        mainclassname = jvm.get('main_function')
        vmparam = jvm.get('main_args')
        updated = jvm.get('start_time')
    
        gclist = json.dumps(
            [dict(useTime=jvm['gc_statistics']['jvmYgct'], name=jvm['ygc'], times=jvm['gc_statistics']['jvmYgc']),
             dict(useTime=jvm['gc_statistics']['jvmFgct'], name=jvm['ogc'], times=jvm['gc_statistics']['jvmFgc'])])
    
        fgcygc = json.dumps(dict(jvmFgc=jvm['gc_statistics']['jvmFgc'],
                                 jvmYgc=jvm['gc_statistics']['jvmYgc'],
                                 jvmFgct=jvm['gc_statistics']['jvmFgct'],
                                 jvmYgct=jvm['gc_statistics']['jvmYgct'], ))
    
        get_hostnames_sql = 'select hostname,modulename from jvmmonitordata where modulename="%s"' % modulename
        ignore_hostname_ne_modulename = 'select hostname from jvmmonitordata where hostname="%s"' % hostname
        logger.info('execute sql :%s' % get_hostnames_sql)
    
        is_existing = False
        for obj in execute_sql(get_hostnames_sql):
            if hostname in obj:
                is_existing = True
    
        for obj in execute_sql(ignore_hostname_ne_modulename):
            if hostname in obj:
                is_existing = True
    
        if is_existing:
            update_jvmmonitordata_sql = "update jvmmonitordata set pid=%d,gclist='%s',fgcygc='%s' where hostname='%s'" % (
                int(pid), gclist, fgcygc, hostname)
            logger.info('execute sql :%s' % update_jvmmonitordata_sql)
            execute_sql(update_jvmmonitordata_sql)
        else:
            insert_jvmmonitordata_sql = "insert into jvmmonitordata(hostname,modulename,mainclassname,pid,vmparam,gclist,updated,fgcygc) values ('%s','%s','%s',%d,'%s','%s','%s','%s')" % (
                hostname, modulename, mainclassname, int(pid), vmparam, gclist, updated, fgcygc)
            logger.info('execute sql :%s' % insert_jvmmonitordata_sql)
            execute_sql(insert_jvmmonitordata_sql)
    
    
    def get_hbase_svr():
        hbase_list = ["http://192.168.100.1:8080", "http://192.168.100.2:8080", "http://192.168.100.3:8080"]
        hbase_url = None
        retry = 10
        while retry > 0:
            hbase_url = random.choice(hbase_list)
            try:
                r = requests.head(hbase_url, timeout=2)
            except:
                logger.info("connect" + hbase_url + "error, try another")
            else:
                if r.status_code == 200:
                    break
            retry -= 1
            if retry == 0:
                logger.error("connect hbase failed with 10 times")
        return hbase_url
    
    
    def build_hbase_data(jvm):
        hostName = jvm['alias']
        jvmEc = float(jvm['gc_statistics']['ec']) * 1000
        jvmEu = float(jvm['gc_statistics']['eu']) * 1000
        jvmOc = float(jvm['gc_statistics']['oc']) * 1000
        jvmOu = float(jvm['gc_statistics']['ou']) * 1000
        jvmPc = float(jvm['gc_statistics']['pc']) * 1000
        jvmPu = float(jvm['gc_statistics']['pu']) * 1000
        jvmSc = (float(jvm['gc_statistics']['s0c']) + float(jvm['gc_statistics']['s1c'])) * 1000
        jvmSu = (float(jvm['gc_statistics']['s0u']) + float(jvm['gc_statistics']['s1u'])) * 1000
        totalThreadCount = int(jvm['total_thread_count'])
        activeThreadCount = int(jvm['active_thread_count'])
    
        return dict(
            hostName=hostName,
            jvmEc=int(jvmEc),
            jvmEu=int(jvmEu),
            jvmOc=int(jvmOc),
            jvmOu=int(jvmOu),
            jvmPc=int(jvmPc),
            jvmPu=int(jvmPu),
            jvmSc=int(jvmSc),
            jvmSu=int(jvmSu),
            totalThreadCount=totalThreadCount,
            activeThreadCount=activeThreadCount,
        )
    
    
    def jvm_hbase_constructor(jvm):
        """jvm hbase 数据构造器"""
        data = build_hbase_data(jvm)
        rows = []
        json_rows = {"Row": rows}
        row_key = base64.b64encode(data['hostName'] + ":" + datetime.datetime.now().strftime('%Y%m%d%H%M'))
        cell = []
        for column in ['jvmEc', 'jvmEu', 'jvmOc', 'jvmOu', 'jvmPc', 'jvmPu', 'jvmSc', 'jvmSu',
                       'totalThreadCount', 'activeThreadCount']:
            cell.append({"column": base64.b64encode('jvm' + ":" + column), "$": base64.b64encode(str(data[column]))})
        rows.append({'key': row_key, 'Cell': cell})
        return row_key, json_rows
    
    
    def push_to_hbase(jvm):
        table_name = 'jvm'
        try:
            row_key, json_rows = jvm_hbase_constructor(jvm)
        except Exception as e:
            logger.error("construct hbase data error %s" % str(e))
        else:
            for i in range(10):
                hbase_url = get_hbase_svr()
                try:
                    response = requests.post(hbase_url + '/' + table_name + '/' + row_key, data=json.dumps(json_rows),
                                             headers={"Content-Type": "application/json", "Accept": "application/json"},
                                             timeout=60)
                    if response.status_code == 200:
                        break
                except:
                    pass
                if i == 9:
                    logger.error("try to save hbase failed with 10 times,exit")
    
    
    def push_data(jvm_infos):
        for jvm in jvm_infos:
            push_to_oss(jvm)
            push_to_hbase(jvm)
    
    
    if __name__ == '__main__':
        jvm_infos = get_jvm_info()
        push_data(jvm_infos)

  • 相关阅读:
    加快火狐启动速度的几种方法 Leone
    我国的社保到底是多交好,还是少交好? Leone
    “情商”和“智商”究竟有哪些区别? Leone
    Atitti 知识图谱构建方法attilax 总结
    Atitit 知识图谱的数据来源
    Atitit java 二维码识别 图片识别
    Atitit 跨平台异常处理(2)异常转换 java c# js异常对象结构比较and转换
    Atitit 异常机制与异常处理的原理与概论
    Atitti knn实现的具体四个距离算法 欧氏距离、余弦距离、汉明距离、曼哈顿距离
    屏幕取词技术实现原理
  • 原文地址:https://www.cnblogs.com/txwsqk/p/9633471.html
Copyright © 2011-2022 走看看