1. 添加service
apiVersion: v1
kind: Service
metadata:
annotations:
field.cattle.io/creatorId: user-9r9cp
field.cattle.io/ipAddresses: '["10.211.41.10","10.211.41.15","10.211.41.20"]'
field.cattle.io/targetDnsRecordIds: "null"
field.cattle.io/targetWorkloadIds: "null"
labels:
app: etcd
release: cluster-monitoring
name: etcd-svc
namespace: cattle-prometheus
spec:
ports:
- name: https-metrics
port: 2379
protocol: TCP
targetPort: 2379
sessionAffinity: None
type: ClusterIP
2. prometheus挂载secret
kubectl create secret generic etcd-certs --from-file=/etc/kubernetes/pki/etcd/ca.crt --from-file=/etc/kubernetes/pki/etcd/server.crt --from-file=/etc/kubernetes/pki/etcd/server.key -n cattle-prometheus
3. 这时候在pod里就能看到挂在的证书
kubectl edit prometheus -n cattle-prometheus
replicas: 1
secrets:
- etcd-certs
4. 增加etcd servicemontor
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
app: prometheus-operator-etcd
chart: prometheus-operator-7.4.0
heritage: Tiller
release: pharos-prometheus-operator
name: pharos-prometheus-operator-etcd
namespace: cattle-prometheus
spec:
endpoints:
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
port: https-metrics
scheme: https
tlsConfig:
caFile: /etc/prometheus/secrets/etcd-certs/ca.crt
certFile: /etc/prometheus/secrets/etcd-certs/server.crt
keyFile: /etc/prometheus/secrets/etcd-certs/server.key
insecureSkipVerify: true
namespaceSelector:
matchNames:
- cattle-prometheus
selector:
matchLabels:
app: etcd
release: cluster-monitoring
5. 添加规则
---
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
annotations:
prometheus-operator-validated: "true"
generation: 1
labels:
app: prometheus-operator
release: pharos-prometheus-operator
name: etcd-has-down
namespace: cattle-prometheus
spec:
groups:
- name: etcd-has-down
rules:
- alert: etcd-has-down
annotations:
description: Etcd has down ,check instance {{ $labels.instance }}
expr: up{job="etcd-svc"} != 1
for: 5m
labels:
alertname: etcd-has-down
severity: warning
---
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
annotations:
prometheus-operator-validated: "true"
generation: 1
labels:
app: prometheus-operator
release: pharos-prometheus-operator
name: etcd-has-leader
namespace: cattle-prometheus
spec:
groups:
- name: etcd-has-leader
rules:
- alert: etcd-has-leader
annotations:
description: etcd leader error, please check !
expr: max(etcd_server_has_leader) != 1
for: 5m
labels:
alertname: etcd-has-leader
severity: warning
---
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
annotations:
prometheus-operator-validated: "true"
generation: 1
labels:
app: prometheus-operator
release: pharos-prometheus-operator
name: etcd-proposals-load
namespace: cattle-prometheus
spec:
groups:
- name: etcd-proposals-load
rules:
- alert: etcd-proposals-load
annotations:
description: Etcd proposals applied is slowly ,maybe etcd server is overloaded, please
check !
expr: (etcd_server_proposals_committed_total - etcd_server_proposals_applied_total)
>= 1000
for: 15m
labels:
alertname: etcd-proposals-load
severity: warning