zoukankan      html  css  js  c++  java
  • 用python编写exporter监控接口访问数量+错误率+响应时间

    1.编写exporter

    import prometheus_client
    from prometheus_client import Gauge,start_http_server,Counter
    import pycurl
    import time
    import threading
    from io import BytesIO
    
    #创建client_python里提供的prometheus Counter数据类型
    url_http_code = Counter("url_http_code", "request http_code of the host",['code','url'])
    url_http_request_time = Counter("url_http_request_time", "request http_request_time of the host",['le','url'])
    http_request_total = Counter("http_request_total", "request request total of the host",['url'])
    
    #curl url,返回状态码和总共耗时 -- 返回状态和响应时间
    def test_website(url):
        buffer_curl = BytesIO()
        c = pycurl.Curl()
        c.setopt(pycurl.URL, url)
    #    c.setopt(pycurl.WRITEDATA, buffer_curl)
        c.setopt(pycurl.CONNECTTIMEOUT, 3)
        c.setopt(pycurl.TIMEOUT, 3)
        try:
            c.perform()
        except pycurl.error:
            http_code = 500
            http_total_time = 999
        else:
            http_code = c.getinfo(pycurl.HTTP_CODE)
            http_total_time = c.getinfo(pycurl.TOTAL_TIME)
        return http_code, http_total_time
        
    #根据curl返回值,统计放到exporter显示的数据 -- 统计各个状态的总数
    def count_metric(url):
        http_code, http_total_time = test_website(url)
        if http_code >= 100 and http_code < 200 :
            url_http_code.labels('1xx',url).inc()
        elif http_code >= 200 and http_code < 300 :
            url_http_code.labels('2xx',url).inc()
        elif http_code >= 300 and http_code < 400 :
            url_http_code.labels('3xx',url).inc()
        elif http_code >= 400 and http_code < 500 :
            url_http_code.labels('4xx',url).inc()
        else:
            url_http_code.labels('5xx',url).inc()
        if http_total_time < 1 :
            url_http_request_time.labels('1',url).inc()
        elif http_total_time < 2 :
            url_http_request_time.labels('2',url).inc()
        elif http_total_time < 3 :
            url_http_request_time.labels('3',url).inc()
        else :
            url_http_request_time.labels('+Inf',url).inc()
        http_request_total.labels(url).inc()
        
    #线程控制,每隔5s执行curl url
    def count_threads(url):
        while True:
            t = threading.Thread(target=count_metric,args=(url,))
            t.setDaemon(True)
            t.start()
            time.sleep(5)
    #将每个需要监控的域名起一个进程
    if __name__ == '__main__':
        start_http_server(9091)
        server_list = [
                'www.baidu.com',
                'www.qq.com',
                'blog.csdn.net',
                'github.com',
                'google.com'
                ]
        threads = []
        for url in server_list:
            t = threading.Thread(target=count_threads,args=(url,))
            threads.append(t)
        for thread in threads:
            thread.setDaemon(True)
            thread.start()
        thread.join()

     坑:prometheus不会提醒metrics的名字,要主动复制进去:curl http://10.0.0.111:19091/metrics

    复制:https://blog.csdn.net/specter11235/article/details/87927202

    --------------------------------------------------------------------------------------

    计算exporter的metrics的比率

    #自定义exporter-counter
    ##状态码是500的个数
    url_http_code_total{code="5xx",url="10.0.0.111:55555/a.txt"} 
    #访问接口的个数
    http_request_total{url="10.0.0.111:55555/a.txt"}
    ------------------------------------------------------------------
    #错误率
    delta(url_http_code_total{code="5xx",url="10.0.0.111:55555/a.txt"}[1m]) / on(url) group_left delta(http_request_total{url="10.0.0.111:55555/a.txt"}[1m])
    
    #http code的每分钟增长率,如果出现5xx,就说明有问题了
    irate(http_request_total[1m])
    
    #显示期望时间的比例,比如只显示小于1秒,占总次数的比例
    delta(url_http_request_time_total{le='1'}[1m]) / on(url) group_left delta(http_request_total[1m])
    #复制
    https://blog.csdn.net/specter11235/article/details/87927202
  • 相关阅读:
    如何确定系统上的CPU插槽数量
    如何查找物理cpu,cpu核心和逻辑cpu的数量
    libssl.so.10: cannot open shared object file: No such file or directory
    yum安装的时候报错,关于python的函数库
    LVM分区无损增减
    LVM管理之减少LV的大小
    Windows 系统判断MD5 值的办法
    CentOS6系列系统启动常见故障排查与解决方法
    解决centos中vsftpd中文乱码
    我的博客即将入驻“云栖社区”,诚邀技术同仁一同入驻。
  • 原文地址:https://www.cnblogs.com/hixiaowei/p/13697750.html
Copyright © 2011-2022 走看看