zoukankan      html  css  js  c++  java
  • ConfigMap

    创建configmap

    cat prometheus-rules.yaml
    
    apiVersion: v1
    data:
      cpu-usage.rules: |
        groups:
        - name: cpu-usage.rules
          rules:
          - alert: CPU使用率过高(大于80%)
            expr: (100 - (avg by(instance) (irate(node_cpu{mode="idle",name="node-exporter"}[5m]))* 100)) > 80
            for: 5m
            labels:
              severity: page
            annotations:
              description: 'CPU使用率过高: {{ $value }}'
              summary: 'CPU使用率过高: {{ $value }}'
              value: '{{ $value }}'
      mysql-usage.rules: |
        groups:
        - name: mysql-usage.rules
          rules:
          - alert: mysql状态没有正常up
            expr: mysql_up != 1
            for: 2m
            labels:
              severity: warning
            annotations:
              description: 'mysql状态没有正常up: {{ $value }}'
              summary: 'mysql状态没有正常up: {{ $value }}'
              value: '{{ $value }}'
      odl.rules: |
        groups:
        - name: odl.rules
          rules:
          - alert: OVSDB跟OPENFLOW数量差异过大
            expr: abs(ODL_ESTABLISHED_NUMBER_6640 - ODL_ESTABLISHED_NUMBER_6633) > 200
            for: 5m
            labels:
              severity: warning
            annotations:
              description: 'OVSDB跟OPENFLOW数量差异过大: {{ $value }}'
              summary: 'OVSDB跟OPENFLOW数量差异过大: {{ $value }}'
              value: '{{ $value }}'
      memory.rules: |
        groups:
        - name: memory.rules
          rules:
          - alert: 内存使用率过高(>75%)
            expr: (node_memory_MemTotal - node_memory_MemFree - node_memory_Cached - node_memory_Buffers) / node_memory_MemTotal* 100 > 75
            for: 5m
            labels:
              severity: warning
            annotations:
              description: 'CPU使用率过高: {{ $value }}'
              summary: 'CPU使用率过高: {{ $value }}'
              value: '{{ $value }}'
      mysql-used-connections.rules: |
        groups:
        - name: mysql-used-connections.rules
          rules:
          - alert: mysql的used_connections变化过大
            expr: rate(mysql_global_status_max_used_connections[5m])> 40
            for: 2m
            labels:
              severity: warning
            annotations:
              description: 'mysql的used_connections变化过大: {{ $value }}'
              summary: 'mysql的used_connections变化过大: {{ $value }}'
              value: '{{ $value }}'
      diskhighuse.rules: |
        groups:
        - name: diskhighuse.rules
          rules:
          - alert: 磁盘使用率过高(>75%)
            expr: (node_filesystem_size{mountpoint="/"} - node_filesystem_avail{mountpoint="/"}) / node_filesystem_size{mountpoint="/"}* 100 > 75
            for: 5m
            labels:
              severity: warning
            annotations:
              description: '磁盘使用率过高: {{ $value }}'
              summary: '磁盘使用率过高: {{ $value }}'
              value: '{{ $value }}'
      diskiohighuse.rules: |
        groups:
        - name: diskiohighuse.rules
          rules:
          - alert: iowait过高(>50%)
            expr: node_disk_io_now{device="sda"}
              > 50
            for: 1m
            labels:
              severity: warning
            annotations:
              description: '磁盘iowait过高: {{ $value }}'
              summary: '磁盘iowait过高: {{ $value }}'
              value: '{{ $value }}'
      ODL_OVSDB_closewait.rules: |
        groups:
        - name: ODL_OVSDB_closewait.rules
          rules:
          - alert: ODL_OVSDB_closewait数量过多
            expr: ODL_CLOSEWAIT_NUMBER_6640 > 200
            labels:
              severity: warning
            annotations:
              description: 'ODL_OVSDB_closewait数量过多: {{ $value }}'
              summary: 'ODL_OVSDB_closewait数量过多: {{ $value }}'
              value: '{{ $value }}'
      Openflow.rules: |
        groups:
        - name: openflow.rules
          rules:
          - alert: OPENFLOW数量跟前12小时的平均数量差距过大
            expr: abs(ODL_ESTABLISHED_NUMBER_6633-avg_over_time(ODL_ESTABLISHED_NUMBER_6633[1h])) > 2000
            for: 5m
            labels:
              severity: warning
            annotations:
              description: 'OPENFLOW数量跟前12小时的平均数量差距过大: {{ $value }}'
              summary: 'OPENFLOW数量跟前12小时的平均数量差距过大: {{ $value }}'
              value: '{{ $value }}'
      ODL_OPENFLOW_closewait.rules: |
        groups:
        - name: ODL_OPENFLOW_closewait.rules
          rules:
          - alert: ODL_OPENFLOW_closewait数量过多
            expr: ODL_CLOSEWAIT_NUMBER_6633 > 200
            labels:
              severity: warning
            annotations:
              description: 'ODL_OPENFLOW_closewait数量过多: {{ $value }}'
              summary: 'ODL_OPENFLOW_closewait数量过多: {{ $value }}'
              value: '{{ $value }}'
      node_reboot.rules: |
        groups:
        - name: node_reboot.rules
          rules:
          - alert: node被重启
            expr: changes(node_boot_time[5m]) > 0
            labels:
              severity: warning
            annotations:
              description: 'node被重启: {{ $value }}'
              summary: 'node被重启: {{ $value }}'
              value: '{{ $value }}'
      redis-cluster.rules: |
        groups:
        - name: redis-cluster.rules
          rules:
          - alert: redis_cluster一分钟内没有收到数据
            expr: absent(redis_cluster_nodes_fail)
            for: 1m
            labels:
              severity: warning
            annotations:
              description: 'redis_cluster一分钟内没有收到数据: {{ $value }}'
              summary: 'redis_cluster一分钟内没有收到数据: {{ $value }}'
              value: '{{ $value }}'
      redis_cluster_nodes.rules: |
        groups:
        - name: redis_cluster_nodes.rules
          rules:
          - alert: redis_cluster_nodes数量发生变化
            expr: rate(redis_cluster_nodes_connected[5m]) > 0
            for: 1m
            labels:
              severity: warning
            annotations:
              description: 'redis_cluster_nodes数量发生变化: {{ $value }}'
              summary: 'redis_cluster_nodes数量发生变化: {{ $value }}'
              value: '{{ $value }}'
      redis_fail.rules: |
        groups:
        - name: redis_fail.rules
          rules:
          - alert: redis_cluster_nodes存在fail的数量
            expr: redis_cluster_nodes_fail > 0
            for: 1m
            labels:
              severity: warning
            annotations:
              description: 'redis_cluster_nodes存在fail的数量: {{ $value }}'
              summary: 'redis_cluster_nodes存在fail的数量: {{ $value }}'
              value: '{{ $value }}'
    kind: ConfigMap
    metadata:
      creationTimestamp: null
      name: prometheus-rules
      namespace: monitoring
    View Code

    挂载configmap

    cat prometheus.yaml
    
    apiVersion: extensions/v1beta1
    kind: Deployment
    metadata:
      name: prometheus-core
      namespace: monitoring
      labels:
        app: prometheus
        component: core
    spec:
      replicas: 1
      template:
        metadata:
          name: prometheus-main
          labels:
            app: prometheus
            component: core
        spec:
          serviceAccountName: prometheus-k8s
          containers:
          - name: prometheus
            image: 10.50.51.95:5000/prometheus:v2.4.3
            args:
              - --storage.tsdb.retention=15d
              - --config.file=/etc/prometheus/prometheus.yml
             # - '-storage.local.retention=168h'
             # - '-storage.local.memory-chunks=1536000000'
             # - '-config.file=/etc/prometheus/prometheus.yml'
             # - '-alertmanager.url=http://alertmanager:9093/'
            ports:
            - name: webui
              containerPort: 9090
            resources:
              requests:
                #cpu: 500m
                #memory: 200M
              limits:
                #cpu: 500m
                #memory: 200M
            volumeMounts:
            - name: config-volume
              mountPath: /etc/prometheus
            - name: pdata
              mountPath: /prometheus
            - name: rules-volume
              mountPath: /etc/prometheus-rules
          volumes:
          - name: config-volume
            configMap:
              name: prometheus-core
          - name: pdata
            hostPath:
              path: /opt/prometheusdata
          - name: rules-volume
            configMap:
              name: prometheus-rules
          nodeSelector:
           environment: monitor
    View Code

    https://blog.51cto.com/wzlinux/2331050

  • 相关阅读:
    小学生入门激光SLAM<二>、2.1-前端点云帧间匹配
    小学生入门激光SLAM<一>、激光雷达去除畸变
    imgui开发笔记<1>、ubuntu环境下快速应用
    ROS问题记录
    LibTorch实战六:U2-Net理论详解<一>
    git-clone小技巧
    深度学习模型部署概述
    debian10 docker 安装 jenkins
    FastDFS集群部署
    python中的metaclass
  • 原文地址:https://www.cnblogs.com/hanwei666/p/13228825.html
Copyright © 2011-2022 走看看