1. 添加service
apiVersion: v1 kind: Service metadata: annotations: field.cattle.io/creatorId: user-9r9cp field.cattle.io/ipAddresses: '["10.211.41.10","10.211.41.15","10.211.41.20"]' field.cattle.io/targetDnsRecordIds: "null" field.cattle.io/targetWorkloadIds: "null" labels: app: etcd release: cluster-monitoring name: etcd-svc namespace: cattle-prometheus spec: ports: - name: https-metrics port: 2379 protocol: TCP targetPort: 2379 sessionAffinity: None type: ClusterIP
2. prometheus挂载secret
kubectl create secret generic etcd-certs --from-file=/etc/kubernetes/pki/etcd/ca.crt --from-file=/etc/kubernetes/pki/etcd/server.crt --from-file=/etc/kubernetes/pki/etcd/server.key -n cattle-prometheus
3. 这时候在pod里就能看到挂在的证书
kubectl edit prometheus -n cattle-prometheus replicas: 1 secrets: - etcd-certs
4. 增加etcd servicemontor
apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor metadata: labels: app: prometheus-operator-etcd chart: prometheus-operator-7.4.0 heritage: Tiller release: pharos-prometheus-operator name: pharos-prometheus-operator-etcd namespace: cattle-prometheus spec: endpoints: - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token port: https-metrics scheme: https tlsConfig: caFile: /etc/prometheus/secrets/etcd-certs/ca.crt certFile: /etc/prometheus/secrets/etcd-certs/server.crt keyFile: /etc/prometheus/secrets/etcd-certs/server.key insecureSkipVerify: true namespaceSelector: matchNames: - cattle-prometheus selector: matchLabels: app: etcd release: cluster-monitoring
5. 添加规则
--- apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: annotations: prometheus-operator-validated: "true" generation: 1 labels: app: prometheus-operator release: pharos-prometheus-operator name: etcd-has-down namespace: cattle-prometheus spec: groups: - name: etcd-has-down rules: - alert: etcd-has-down annotations: description: Etcd has down ,check instance {{ $labels.instance }} expr: up{job="etcd-svc"} != 1 for: 5m labels: alertname: etcd-has-down severity: warning --- apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: annotations: prometheus-operator-validated: "true" generation: 1 labels: app: prometheus-operator release: pharos-prometheus-operator name: etcd-has-leader namespace: cattle-prometheus spec: groups: - name: etcd-has-leader rules: - alert: etcd-has-leader annotations: description: etcd leader error, please check ! expr: max(etcd_server_has_leader) != 1 for: 5m labels: alertname: etcd-has-leader severity: warning --- apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: annotations: prometheus-operator-validated: "true" generation: 1 labels: app: prometheus-operator release: pharos-prometheus-operator name: etcd-proposals-load namespace: cattle-prometheus spec: groups: - name: etcd-proposals-load rules: - alert: etcd-proposals-load annotations: description: Etcd proposals applied is slowly ,maybe etcd server is overloaded, please check ! expr: (etcd_server_proposals_committed_total - etcd_server_proposals_applied_total) >= 1000 for: 15m labels: alertname: etcd-proposals-load severity: warning