一、基本概念
- requests:资源最低需求
- limits:资源最高限制
- cpu:1c = 1000m
- QoS Class:
- Guranteed:每个容器都同时设置cpu和内存的requests等于limits属性
- Burstable:至少有一个容器设置cpu和内存资源的requests属性
- BestEffort:没有任何一个容器设置requests或limits属性
二、定义资源配额
apiVersion: v1
kind: Pod
metadata:
name: pod-demo
namespace: default
labels:
app: myapp
tier: frontend
annotations:
dongfei.tech/created-by: "cluster admin"
spec:
containers:
- name: myapp
image: dongfeimg/stress-ng
imagePullPolicy: IfNotPresent
command: ["/usr/bin/stress-ng", "-c 1", "--metrics-brief"]
resources:
requests:
cpu: "200m"
memory: "128Mi"
limits:
cpu: "500m"
memory: "512Mi"
三、Heapster
- cAdvisor:单节点上的采集组件,负责向heapster报告
- heapster:k8s集群各资源指标的收集组件,使用缓存系统存储,可以持久存储至InfluxDB
- InfluxDB:时序数据库,可被Grafana展示数据
参考:https://github.com/kubernetes-retired/heapster/tree/master/deploy/kube-config/influxdb
1、部署InfluxDB
# cat influxdb.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: monitoring-influxdb
namespace: kube-system
spec:
replicas: 1
selector:
matchLabels:
task: monitoring
k8s-app: influxdb
template:
metadata:
labels:
task: monitoring
k8s-app: influxdb
spec:
containers:
- name: influxdb
image: k8s.gcr.io/heapster-influxdb-amd64:v1.5.2
imagePullPolicy: IfNotPresent
volumeMounts:
- mountPath: /data
name: influxdb-storage
volumes:
- name: influxdb-storage
emptyDir: {}
---
apiVersion: v1
kind: Service
metadata:
labels:
task: monitoring
# For use as a Cluster add-on (https://github.com/kubernetes/kubernetes/tree/master/cluster/addons)
# If you are NOT using this as an addon, you should comment out this line.
kubernetes.io/cluster-service: 'true'
kubernetes.io/name: monitoring-influxdb
name: monitoring-influxdb
namespace: kube-system
spec:
ports:
- port: 8086
targetPort: 8086
selector:
k8s-app: influxdb
2、部署Heapster
# kubectl edit clusterrole system:heapster
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
annotations:
rbac.authorization.kubernetes.io/autoupdate: "true"
creationTimestamp: "2019-06-23T03:17:57Z"
labels:
kubernetes.io/bootstrapping: rbac-defaults
name: system:heapster
resourceVersion: "2596459"
selfLink: /apis/rbac.authorization.k8s.io/v1/clusterroles/system%3Aheapster
uid: d62a4e66-358d-4835-b416-99793a2f4088
rules:
- apiGroups:
- ""
resources:
- events
- namespaces
- nodes
- nodes/stats #增加nodes/stats资源
- pods
verbs:
- get
- list
- watch
- create #加入create权限
- apiGroups:
- extensions
resources:
- deployments
verbs:
- get
- list
- watch
# cat heapster-rbac.yaml
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1beta1
metadata:
name: heapster
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: system:heapster
subjects:
- kind: ServiceAccount
name: heapster
namespace: kube-system
[root@master heapster]# cat heapster.yaml
apiVersion: v1
kind: ServiceAccount
metadata:
name: heapster
namespace: kube-system
---
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
name: heapster
namespace: kube-system
spec:
replicas: 1
template:
metadata:
labels:
task: monitoring
k8s-app: heapster
spec:
serviceAccountName: heapster
containers:
- name: heapster
image: k8s.gcr.io/heapster-amd64:v1.5.4
imagePullPolicy: IfNotPresent
command:
- /heapster
- --source=kubernetes:https://kubernetes.default?useServiceAccount=true&kubeletHttps=true&kubeletPort=10250&insecure=true
- --sink=influxdb:http://monitoring-influxdb.kube-system.svc:8086
---
apiVersion: v1
kind: Service
metadata:
labels:
task: monitoring
# For use as a Cluster add-on (https://github.com/kubernetes/kubernetes/tree/master/cluster/addons)
# If you are NOT using this as an addon, you should comment out this line.
kubernetes.io/cluster-service: 'true'
kubernetes.io/name: Heapster
name: heapster
namespace: kube-system
spec:
ports:
- port: 80
targetPort: 8082
selector:
k8s-app: heapster
3、部署Grafana
# cat grafana.yaml
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
name: monitoring-grafana
namespace: kube-system
spec:
replicas: 1
template:
metadata:
labels:
task: monitoring
k8s-app: grafana
spec:
containers:
- name: grafana
image: k8s.gcr.io/heapster-grafana-amd64:v5.0.4
ports:
- containerPort: 3000
protocol: TCP
volumeMounts:
- mountPath: /etc/ssl/certs
name: ca-certificates
readOnly: true
- mountPath: /var
name: grafana-storage
env:
- name: INFLUXDB_HOST
value: monitoring-influxdb
- name: GF_SERVER_HTTP_PORT
value: "3000"
# The following env variables are required to make Grafana accessible via
# the kubernetes api-server proxy. On production clusters, we recommend
# removing these env variables, setup auth for grafana, and expose the grafana
# service using a LoadBalancer or a public IP.
- name: GF_AUTH_BASIC_ENABLED
value: "false"
- name: GF_AUTH_ANONYMOUS_ENABLED
value: "true"
- name: GF_AUTH_ANONYMOUS_ORG_ROLE
value: Admin
- name: GF_SERVER_ROOT_URL
# If you're only using the API Server proxy, set this value instead:
# value: /api/v1/namespaces/kube-system/services/monitoring-grafana/proxy
value: /
volumes:
- name: ca-certificates
hostPath:
path: /etc/ssl/certs
- name: grafana-storage
emptyDir: {}
---
apiVersion: v1
kind: Service
metadata:
labels:
# For use as a Cluster add-on (https://github.com/kubernetes/kubernetes/tree/master/cluster/addons)
# If you are NOT using this as an addon, you should comment out this line.
kubernetes.io/cluster-service: 'true'
kubernetes.io/name: monitoring-grafana
name: monitoring-grafana
namespace: kube-system
spec:
# In a production setup, we recommend accessing Grafana through an external Loadbalancer
# or through a public IP.
# type: LoadBalancer
# You could also use NodePort to expose the service at a randomly-generated port
# type: NodePort
ports:
- port: 80
targetPort: 3000
selector:
k8s-app: grafana
type: NodePort
四、metrics-server
- 核心指标流水线:由kubelet、metrics-server和API server提供的api组成,核心指标包括cpu累积使用率、内存实时使用率、Pod的资源占用率和容器的磁盘占用率
- 监控流水线:用于从系统收集各种指标数据提供给存储系统和HPA,
参考:https://github.com/kubernetes/kubernetes/tree/master/cluster/addons/metrics-server
- 部署metrics-server
# git clone https://github.com/kubernetes/kubernetes.git
# cd kubernetes/cluster/addons/metrics-server/
metrics-server]# ll
total 24
-rw-r--r-- 1 root root 398 Feb 12 04:59 auth-delegator.yaml
-rw-r--r-- 1 root root 419 Feb 12 04:59 auth-reader.yaml
-rw-r--r-- 1 root root 393 Feb 12 04:59 metrics-apiservice.yaml
-rw-r--r-- 1 root root 3373 Feb 20 10:53 metrics-server-deployment.yaml
-rw-r--r-- 1 root root 336 Feb 12 04:59 metrics-server-service.yaml
-rw-r--r-- 1 root root 817 Feb 20 10:53 resource-reader.yaml
metrics-server]# cat resource-reader.yaml
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: system:metrics-server
labels:
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: Reconcile
rules:
- apiGroups:
- ""
resources:
- pods
- nodes
- nodes/stats #此处添加
- namespaces
verbs:
- get
- list
- watch
- apiGroups:
- "extensions"
resources:
- deployments
verbs:
- get
- list
- update
- watch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: system:metrics-server
labels:
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: Reconcile
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: system:metrics-server
subjects:
- kind: ServiceAccount
name: metrics-server
namespace: kube-system
metrics-server]# cat metrics-server-deployment.yaml
apiVersion: v1
kind: ServiceAccount
metadata:
name: metrics-server
namespace: kube-system
labels:
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: Reconcile
---
apiVersion: v1
kind: ConfigMap
metadata:
name: metrics-server-config
namespace: kube-system
labels:
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: EnsureExists
data:
NannyConfiguration: |-
apiVersion: nannyconfig/v1alpha1
kind: NannyConfiguration
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: metrics-server-v0.3.3
namespace: kube-system
labels:
k8s-app: metrics-server
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: Reconcile
version: v0.3.3
spec:
selector:
matchLabels:
k8s-app: metrics-server
version: v0.3.3
template:
metadata:
name: metrics-server
labels:
k8s-app: metrics-server
version: v0.3.3
annotations:
scheduler.alpha.kubernetes.io/critical-pod: ''
seccomp.security.alpha.kubernetes.io/pod: 'docker/default'
spec:
priorityClassName: system-cluster-critical
serviceAccountName: metrics-server
containers:
- name: metrics-server
image: k8s.gcr.io/metrics-server-amd64:v0.3.3
command:
- /metrics-server
- --kubelet-insecure-tls #此处增加
- --metric-resolution=30s
# These are needed for GKE, which doesn't support secure communication yet.
# Remove these lines for non-GKE clusters, and when GKE supports token-based auth.
#- --kubelet-port=10255 #此处注释
#- --deprecated-kubelet-completely-insecure=true #此处注释
- --kubelet-preferred-address-types=InternalIP,Hostname,InternalDNS,ExternalDNS,ExternalIP
ports:
- containerPort: 443
name: https
protocol: TCP
- name: metrics-server-nanny
image: k8s.gcr.io/addon-resizer:1.8.5
resources:
limits:
cpu: 100m
memory: 300Mi
requests:
cpu: 5m
memory: 50Mi
env:
- name: MY_POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: MY_POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
volumeMounts:
- name: metrics-server-config-volume
mountPath: /etc/config
command:
- /pod_nanny
- --config-dir=/etc/config
- --cpu=80m #此处增加
#- --cpu={{ base_metrics_server_cpu }} #此处注释
- --extra-cpu=0.5m #此处增加
- --memory=80Mi #此处增加
#- --memory={{ base_metrics_server_memory }} #此处注释
- --extra-memory=8Mi #此处增加
#- --extra-memory={{ metrics_server_memory_per_node }}Mi #此处注释
- --threshold=5
- --deployment=metrics-server-v0.3.3
- --container=metrics-server
- --poll-period=300000
- --estimator=exponential
# Specifies the smallest cluster (defined in number of nodes)
# resources will be scaled to.
#- --minClusterSize={{ metrics_server_min_cluster_size }} #此处注释
volumes:
- name: metrics-server-config-volume
configMap:
name: metrics-server-config
tolerations:
- key: "CriticalAddonsOnly"
operator: "Exists"
- 验证metrics-server
# kubectl get po -owide -nkube-system |grep metrics-server
# kubectl logs -f -c metrics-server metrics-server-v0.3.3-79887c66f9-gfkrn -n kube-system
# kubectl logs -f --all-containers=true metrics-server-v0.3.3-79887c66f9-gfkrn -n kube-system
# kubectl proxy --port=8080
# curl http://localhost:8080/apis/metrics.k8s.io/v1beta1/nodes
# kubectl top nodes
# kubectl top po -n kube-system
五、Prometheus
- Custom Metrics API:自定义指标API
- k8s-promethues-adpater:负责查询PromQL获取Prometheus中采集的指标数据
# cd /etc/kubernetes/pki/
pki]# (umask 077; openssl genrsa -out serving.key 2048)
pki]# openssl req -new -key serving.key -out serving.csr -subj "/CN=serving"
# openssl x509 -req -in serving.csr -CA ./ca.crt -CAkey ./ca.key -CAcreateserial -out serving.crt -days 3650
pki]# kubectl create secret generic cm-adapter-serving-certs --from-file=serving.crt=./serving.crt --from-file=serving.key=./serving.key -n prom
# git clone https://github.com/L-dongf/k8s-prom.git
manifests]# cd k8s-prom/
k8s-prom]# kubectl apply -f namespace.yaml
k8s-prom]# cd node_exporter/
node_exporter]# kubectl apply -f .
node_exporter]# cd ../prometheus/
prometheus]# kubectl apply -f .
prometheus]# cd ../kube-state-metrics/
kube-state-metrics]# kubectl apply -f .
kube-state-metrics]# cd ../k8s-prometheus-adapter/
k8s-prometheus-adapter]# git clone https://github.com/L-dongf/k8s-prometheus-adapter.git
[root@master k8s-prometheus-adapter]# mv custom-metrics-apiserver-deployment.yaml custom-metrics-apiserver-deployment.yaml.bak
[root@master k8s-prometheus-adapter]# cp k8s-prometheus-adapter/deploy/manifests/custom-metrics-apiserver-deployment.yaml .
[root@master k8s-prometheus-adapter]# cp k8s-prometheus-adapter/deploy/manifests/custom-metrics-config-map.yaml .
k8s-prometheus-adapter]# kubectl apply -f .
# kubectl api-versions |grep custom
六、HAP:自动弹性伸缩
- V1版本:只支持根据CPU使用来自动伸缩
# kubectl run myapp --image=dongfeimg/myapp:v1 --replicas=1 --requests='cpu=50m,memory=256Mi' --limits='cpu=50m,memory=256Mi' --labels='app=myapp' --expose --port=80
# kubectl patch svc myapp -p '{"spec":{"type":"NodePort"}}'
# kubectl autoscale deployment myapp --min=1 --max=8 --cpu-percent=60
# ab -c 100 -n 50000 http://192.168.100.51:30556/index.html
# kubectl get hpa
# kubectl get po #观察po是否增加
- V2版本:加入支持内存指标
apiVersion: autoscaling/v2beta1
kind: HorizontalPodAutoscaler
metadata:
name: myapp-hpa-v2
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: myapp
minReplicas: 1
maxReplicas: 8
metrics:
- type: Resource
resource:
name: cpu
targetAverageUtilization: 60
- type: Resource
resource:
name: memory
targetAverageValue: 50Mi
- 自定义指标
apiVersion: autoscaling/v2beta1
kind: HorizontalPodAutoscaler
metadata:
name: myapp-hpa-v2
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: myapp
minReplicas: 1
maxReplicas: 8
metrics:
- type: Pods
pods:
metricsName: http_requests
targetAverageValue: 1000m #并发连接数超过1000自动扩展Pod