一、环境介绍
[root@master test]# uname -r 4.4.223-1.el7.elrepo.x86_64 [root@master test]# kubectl get node NAME STATUS ROLES AGE VERSION master Ready master 27d v1.18.0 node Ready <none> 27d v1.18.0 [root@master test]# helm version version.BuildInfo{Version:"v3.2.0", GitCommit:"e11b7ce3b12db2941e90399e874513fbd24bcb71", GitTreeState:"clean", GoVersion:"go1.13.10"}
二、helm添加仓库
#阿里云 helm repo add aliyuncs https://apphub.aliyuncs.com #官方 helm repo add stable https://kubernetes-charts.storage.googleapis.com
三、helm search prometheus-operator
[root@master test]# helm search repo prometheus-operator NAME CHART VERSION APP VERSION DESCRIPTION aliyuncs/prometheus-operator 8.7.0 0.35.0 Provides easy monitoring definitions for Kubern... stable/prometheus-operator 8.13.7 0.38.1 Provides easy monitoring definitions for Kubern...
四、安装
helm install mypro aliyuncs/prometheus-operator
五、查看
[root@master test]# helm list NAME NAMESPACE REVISION UPDATED STATUS CHART APP VERSION mypro default 1 2020-06-09 09:32:37.091220013 +0800 CST deployed prometheus-operator-8.7.0 0.35.0 [root@master test]# helm status mypro NAME: mypro LAST DEPLOYED: Tue Jun 9 09:32:37 2020 NAMESPACE: default STATUS: deployed REVISION: 1 NOTES: The Prometheus Operator has been installed. Check its status by running: kubectl --namespace default get pods -l "release=mypro" Visit https://github.com/coreos/prometheus-operator for instructions on how to create & configure Alertmanager and Prometheus instances using the Operator. [root@master test]# kubectl --namespace default get pods -l "release=mypro" NAME READY STATUS RESTARTS AGE mypro-grafana-f5b868868-8ckgs 2/2 Running 0 55m mypro-prometheus-node-exporter-dg6w4 1/1 Running 0 55m mypro-prometheus-node-exporter-x9l4b 1/1 Running 0 55m mypro-prometheus-operator-operator-5b458d4659-p7t4l 2/2 Running 0 55m
六、配置ingress浏览器访问
[root@master test]# cat grafana-ingress.yaml apiVersion: extensions/v1beta1 kind: Ingress metadata: name: ingress-grafana spec: rules: - host: grafana.zhang.com http: paths: - backend: serviceName: mypro-grafana servicePort: 80 [root@master test]# cat prometheus-ingress.yaml apiVersion: extensions/v1beta1 kind: Ingress metadata: name: ingress-prometheus spec: rules: - host: prometheus.zhang.com http: paths: - backend: serviceName: mypro-prometheus-operator-prometheus servicePort: 9090 [root@master test]# cat alertmanager-ingress.yaml apiVersion: extensions/v1beta1 kind: Ingress metadata: name: ingress-alertmanager spec: rules: - host: alertmanager.zhang.com http: paths: - backend: serviceName: mypro-prometheus-operator-alertmanager servicePort: 9093
七、查看
[root@master test]# kubectl get ingress NAME CLASS HOSTS ADDRESS PORTS AGE ingress-alertmanager <none> alertmanager.zhang.com 10.111.12.239 80 29m ingress-grafana <none> grafana.zhang.com 10.111.12.239 80 32m ingress-prometheus <none> prometheus.zhang.com 10.111.12.239 80 30m
八、浏览器访问
九、修改alertmanager的报警配置
1、创建alertmanger的配置文件
[root@master test]# cat alertmanger_config.yaml global: resolve_timeout: 5m #处理超时时间,默认为5min smtp_smarthost: 'smtp.163.com:465' # 邮箱smtp服务器代理 smtp_from: 'xxxx@163.com' # 发送邮箱名称 smtp_auth_username: 'xxxx@163.com' # 邮箱名称 smtp_auth_password: 'xxxxxxxxx' #邮箱密码 smtp_require_tls: false route: group_by: ['alertname'] # 报警分组名称 group_wait: 10s # 最初即第一次等待多久时间发送一组警报的通知 group_interval: 10s # 在发送新警报前的等待时间 repeat_interval: 1m # 发送重复警报的周期 receiver: 'email' # 发送警报的接收者的名称,以下receivers name的名称 receivers: - name: 'email' # 警报 email_configs: # 邮箱配置 - to: 'xxxxxx@163.com' # 接收警报的email配置 inhibit_rules: - source_match: severity: 'critical' target_match: severity: 'warning' equal: ['alertname', 'dev', 'instance']
2、base64编码(要使用xargs设置为一行,再用sed删掉空格)
cat alertmanger_config.yaml |base64
3、替换secret中的alertmanager.yaml配置(用上面base64编码的替换)
kubectl edit secret alertmanager-mypro-prometheus-operator-alertmanager
4、查看邮箱
十、自定义监控报警项
1、介绍
prometheus-operator可以使用PrometheusRule来动态的添加自定义监控项
2、查看prometheus-operator项目中Prometheus的标签选择器
kubectl get prometheus mypro-prometheus-operator-prometheus -o jsonpath={".spec.ruleSelector"};echo
3、创建自定义的PrometheusRule监控
[root@master ~]# cat test-PrometheusRule.yaml apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: labels: app: prometheus-operator #和Prometheus中的标签选择器中的标签,如果要自己创建一个Prometheus的配置关联到PrometheusRule的labels release: mypro #和Prometheus中的标签选择器中的标签,如果要自己创建一个Prometheus的配置关联到PrometheusRule的labels prometheus: test-example name: test-load1-prometheusrule spec: groups: - name: test-load-1 rules: - alert: test-load-1 expr: node_load1 > 1 for: 2m labels: team: node annotations: summary: "{{$labels.instance}}: load 1 >1" description: "{{$labels.instance}}: job {{$labels.job}} 测试测试 负载大于1" #导入 kubectl apply -f test-PrometheusRule.yaml
4、登录pod查看
[root@master ~]# kubectl exec -it prometheus-mypro-prometheus-operator-prometheus-0 sh kubectl exec [POD] [COMMAND] is DEPRECATED and will be removed in a future version. Use kubectl kubectl exec [POD] -- [COMMAND] instead. Defaulting container name to prometheus. Use 'kubectl describe pod/prometheus-mypro-prometheus-operator-prometheus-0 -n default' to see all of the containers in this pod. /prometheus $ ls /etc/prometheus/rules/prometheus-mypro-prometheus-operator-prometheus-rulefiles-0/default-test-load1-prometheusrule.yaml /etc/prometheus/rules/prometheus-mypro-prometheus-operator-prometheus-rulefiles-0/default-test-load1-prometheusrule.yaml /prometheus $ cat /etc/prometheus/rules/prometheus-mypro-prometheus-operator-prometheus-rulefiles-0/default-test-load1-prometheusrule.yaml groups: - name: test-load-1 rules: - alert: test-load-1 annotations: description: '{{$labels.instance}}: job {{$labels.job}} 测试测试 负载大于1' summary: '{{$labels.instance}}: load 1 >1' expr: node_load1 > 1 for: 2m labels: team: node