zoukankan      html  css  js  c++  java
  • helm安装prometheus-operator

    一、环境介绍

    [root@master test]# uname -r
    4.4.223-1.el7.elrepo.x86_64
    [root@master test]# kubectl  get node 
    NAME     STATUS   ROLES    AGE   VERSION
    master   Ready    master   27d   v1.18.0
    node     Ready    <none>   27d   v1.18.0
    [root@master test]# helm  version
    version.BuildInfo{Version:"v3.2.0", GitCommit:"e11b7ce3b12db2941e90399e874513fbd24bcb71", GitTreeState:"clean", GoVersion:"go1.13.10"}
    

      

    二、helm添加仓库

    #阿里云
    helm  repo add aliyuncs https://apphub.aliyuncs.com
    #官方
    helm  repo add stable  https://kubernetes-charts.storage.googleapis.com

    三、helm search prometheus-operator

    [root@master test]# helm search repo  prometheus-operator
    NAME                            CHART VERSION    APP VERSION    DESCRIPTION                                       
    aliyuncs/prometheus-operator    8.7.0            0.35.0         Provides easy monitoring definitions for Kubern...
    stable/prometheus-operator      8.13.7           0.38.1         Provides easy monitoring definitions for Kubern...

    四、安装

    helm  install mypro aliyuncs/prometheus-operator 

    五、查看

    [root@master test]# helm list
    NAME     NAMESPACE    REVISION    UPDATED                                    STATUS      CHART                        APP VERSION
    mypro    default      1           2020-06-09 09:32:37.091220013 +0800 CST    deployed    prometheus-operator-8.7.0    0.35.0     
    [root@master test]# helm  status mypro
    NAME: mypro
    LAST DEPLOYED: Tue Jun  9 09:32:37 2020
    NAMESPACE: default
    STATUS: deployed
    REVISION: 1
    NOTES:
    The Prometheus Operator has been installed. Check its status by running:
      kubectl --namespace default get pods -l "release=mypro"
    
    Visit https://github.com/coreos/prometheus-operator for instructions on how
    to create & configure Alertmanager and Prometheus instances using the Operator.
    [root@master test]# kubectl --namespace default get pods -l "release=mypro"
    NAME                                                  READY   STATUS    RESTARTS   AGE
    mypro-grafana-f5b868868-8ckgs                         2/2     Running   0          55m
    mypro-prometheus-node-exporter-dg6w4                  1/1     Running   0          55m
    mypro-prometheus-node-exporter-x9l4b                  1/1     Running   0          55m
    mypro-prometheus-operator-operator-5b458d4659-p7t4l   2/2     Running   0          55m

    六、配置ingress浏览器访问

    [root@master test]# cat grafana-ingress.yaml 
    apiVersion: extensions/v1beta1
    kind: Ingress
    metadata:
      name: ingress-grafana
    spec:
      rules:
      - host: grafana.zhang.com
        http:
          paths:
          - backend:
              serviceName: mypro-grafana
              servicePort: 80
    [root@master test]# cat prometheus-ingress.yaml 
    apiVersion: extensions/v1beta1
    kind: Ingress
    metadata:
      name: ingress-prometheus
    spec:
      rules:
      - host: prometheus.zhang.com
        http:
          paths:
          - backend:
              serviceName: mypro-prometheus-operator-prometheus
              servicePort: 9090
    [root@master test]# cat alertmanager-ingress.yaml 
    apiVersion: extensions/v1beta1
    kind: Ingress
    metadata:
      name: ingress-alertmanager
    spec:
      rules:
      - host: alertmanager.zhang.com
        http:
          paths:
          - backend:
              serviceName: mypro-prometheus-operator-alertmanager
              servicePort: 9093

    七、查看

    [root@master test]# kubectl get ingress
    NAME                   CLASS    HOSTS                    ADDRESS         PORTS   AGE
    ingress-alertmanager   <none>   alertmanager.zhang.com   10.111.12.239   80      29m
    ingress-grafana        <none>   grafana.zhang.com        10.111.12.239   80      32m
    ingress-prometheus     <none>   prometheus.zhang.com     10.111.12.239   80      30m

    八、浏览器访问

     

     

    九、修改alertmanager的报警配置

    1、创建alertmanger的配置文件

    [root@master test]# cat  alertmanger_config.yaml 
    global: 
      resolve_timeout: 5m #处理超时时间,默认为5min
      smtp_smarthost: 'smtp.163.com:465' # 邮箱smtp服务器代理
      smtp_from: 'xxxx@163.com' # 发送邮箱名称
      smtp_auth_username: 'xxxx@163.com' # 邮箱名称
      smtp_auth_password: 'xxxxxxxxx' #邮箱密码
      smtp_require_tls: false 
    route:
      group_by: ['alertname'] # 报警分组名称
      group_wait: 10s # 最初即第一次等待多久时间发送一组警报的通知
      group_interval: 10s # 在发送新警报前的等待时间
      repeat_interval: 1m # 发送重复警报的周期
      receiver: 'email' # 发送警报的接收者的名称,以下receivers name的名称
    
    receivers:
      - name: 'email' # 警报
        email_configs: # 邮箱配置
        - to: 'xxxxxx@163.com'  # 接收警报的email配置
    
    inhibit_rules:
      - source_match:
          severity: 'critical'
        target_match:
          severity: 'warning'
        equal: ['alertname', 'dev', 'instance']

    2、base64编码(要使用xargs设置为一行,再用sed删掉空格)

    cat  alertmanger_config.yaml |base64 

    3、替换secret中的alertmanager.yaml配置(用上面base64编码的替换)

     kubectl edit secret  alertmanager-mypro-prometheus-operator-alertmanager 

    4、查看邮箱

    十、自定义监控报警项

     1、介绍

    prometheus-operator可以使用PrometheusRule来动态的添加自定义监控项

    2、查看prometheus-operator项目中Prometheus的标签选择器

    kubectl get  prometheus mypro-prometheus-operator-prometheus  -o jsonpath={".spec.ruleSelector"};echo 

    3、创建自定义的PrometheusRule监控

    [root@master ~]# cat  test-PrometheusRule.yaml 
    apiVersion: monitoring.coreos.com/v1
    kind: PrometheusRule
    metadata:
      labels:
        app: prometheus-operator   #和Prometheus中的标签选择器中的标签,如果要自己创建一个Prometheus的配置关联到PrometheusRule的labels
        release: mypro             #和Prometheus中的标签选择器中的标签,如果要自己创建一个Prometheus的配置关联到PrometheusRule的labels
        prometheus: test-example
      name: test-load1-prometheusrule  
    spec:
      groups:
      - name: test-load-1
        rules:
        - alert: test-load-1
          expr: node_load1 > 1
          for: 2m
          labels:
            team: node
          annotations:
            summary: "{{$labels.instance}}: load 1 >1"
            description: "{{$labels.instance}}: job {{$labels.job}} 测试测试 负载大于1"
    
    
    #导入
    kubectl  apply -f test-PrometheusRule.yaml 
    

      

    4、登录pod查看

    [root@master ~]# kubectl exec -it   prometheus-mypro-prometheus-operator-prometheus-0  sh 
    kubectl exec [POD] [COMMAND] is DEPRECATED and will be removed in a future version. Use kubectl kubectl exec [POD] -- [COMMAND] instead.
    Defaulting container name to prometheus.
    Use 'kubectl describe pod/prometheus-mypro-prometheus-operator-prometheus-0 -n default' to see all of the containers in this pod.
    /prometheus $ ls /etc/prometheus/rules/prometheus-mypro-prometheus-operator-prometheus-rulefiles-0/default-test-load1-prometheusrule.yaml 
    /etc/prometheus/rules/prometheus-mypro-prometheus-operator-prometheus-rulefiles-0/default-test-load1-prometheusrule.yaml
    /prometheus $ cat  /etc/prometheus/rules/prometheus-mypro-prometheus-operator-prometheus-rulefiles-0/default-test-load1-prometheusrule.yaml 
    groups:
    - name: test-load-1
      rules:
      - alert: test-load-1
        annotations:
          description: '{{$labels.instance}}: job {{$labels.job}} 测试测试 负载大于1'
          summary: '{{$labels.instance}}: load 1 >1'
        expr: node_load1 > 1
        for: 2m
        labels:
          team: node

    5、浏览器查看prometheus

  • 相关阅读:
    leetcode108 Convert Sorted Array to Binary Search Tree
    leetcode98 Validate Binary Search Tree
    leetcode103 Binary Tree Zigzag Level Order Traversal
    leetcode116 Populating Next Right Pointers in Each Node
    Python全栈之路Day15
    Python全栈之路Day11
    集群监控
    Python全栈之路Day10
    自动部署反向代理、web、nfs
    5.Scss的插值
  • 原文地址:https://www.cnblogs.com/zhangb8042/p/13071006.html
Copyright © 2011-2022 走看看