zoukankan      html  css  js  c++  java
  • kubernetes全栈监控部署

    一、软件功能说明

    1.MetricServer:是kubernetes集群资源使用情况的聚合器,收集数据给kubernetes集群内使用,如kubectl,hpa,scheduler等。

    2.PrometheusOperator:是一个系统监测和警报工具箱,用来存储监控数据。

    3.NodeExporter:用于各node的关键度量指标状态数据。

    4.KubeStateMetrics:收集kubernetes集群内资源对象数据,制定告警规则。

    5.Prometheus:采用pull方式收集apiserver,scheduler,controller-manager,kubelet组件数据,通过http协议传输。

    6.Grafana:是可视化数据统计和监控平台。

    7.Alertmanager:实现短信或邮件报警。

    二、程序安装部署

    下载各程序部署文件

    # mkdir /opt/kubernetes/monitor && cd /opt/kubernetes/monitor
    # git clone https://github.com/kubernetes-incubator/metrics-server.git
    # git clone https://github.com/mgxian/k8s-monitor.git

    1.部署MetricServer

    1)修改metrics-server-deployment.yaml文件为如下内容

    # echo '' > metrics-server/deploy/1.8+/metrics-server-deployment.yaml
    
    # vi metrics-server/deploy/1.8+/metrics-server-deployment.yaml
    apiVersion: v1
    kind: ServiceAccount
    metadata:
      name: metrics-server
      namespace: kube-system
    ---
    apiVersion: extensions/v1beta1
    kind: Deployment
    metadata:
      name: metrics-server
      namespace: kube-system
      labels:
        k8s-app: metrics-server
    spec:
      selector:
        matchLabels:
          k8s-app: metrics-server
      template:
        metadata:
          name: metrics-server
          labels:
            k8s-app: metrics-server
        spec:
          serviceAccountName: metrics-server
          containers:
          - name: metrics-server
            image: mirrorgooglecontainers/metrics-server-amd64:v0.2.1
            imagePullPolicy: Always
            volumeMounts:
            - mountPath: /opt/kubernetes/ssl
              name: ca-ssl
            command:
            - /metrics-server
            - --source=kubernetes.summary_api:''
            - --requestheader-client-ca-file=/opt/kubernetes/ssl/ca.pem
          volumes:
           - name: ca-ssl
             hostPath:
              path: /opt/kubernetes/ssl

    2)部署MetricServer

    # kubectl create -f metrics-server/deploy/1.8+/
    clusterrolebinding.rbac.authorization.k8s.io "metrics-server:system:auth-delegator" created
    rolebinding.rbac.authorization.k8s.io "metrics-server-auth-reader" created
    apiservice.apiregistration.k8s.io "v1beta1.metrics.k8s.io" created
    serviceaccount "metrics-server" created
    deployment.extensions "metrics-server" created
    service "metrics-server" created
    clusterrole.rbac.authorization.k8s.io "system:metrics-server" created
    clusterrolebinding.rbac.authorization.k8s.io "system:metrics-server" created

    3)状态查看

    # kubectl get svc -o wide --all-namespaces
    NAMESPACE     NAME                   TYPE        CLUSTER-IP   EXTERNAL-IP   PORT(S)         AGE       SELECTOR
    default       kubernetes             ClusterIP   10.1.0.1     <none>        443/TCP         18d       <none>
    kube-system   coredns                ClusterIP   10.1.0.2     <none>        53/UDP,53/TCP   10d       k8s-app=coredns
    kube-system   kubernetes-dashboard   NodePort    10.1.56.6    <none>        443:31944/TCP   14d       k8s-app=kubernetes-dashboard
    kube-system   metrics-server         ClusterIP   10.1.79.15   <none>        443/TCP         15m       k8s-app=metrics-server
    
    # kubectl get pods -n kube-system
    NAME                                     READY     STATUS    RESTARTS   AGE
    calico-kube-controllers-98989846-6th9k   1/1       Running   21         18d
    calico-node-bdhj4                        2/2       Running   42         18d
    calico-node-wv9nb                        2/2       Running   38         18d
    coredns-77c989547b-9p9fs                 1/1       Running   5          10d
    coredns-77c989547b-k6g2c                 1/1       Running   8          10d
    kubernetes-dashboard-66c9d98865-kdhpg    1/1       Running   12         14d
    metrics-server-6d6df698b9-7zscb          1/1       Running   0          16m

    2.创建namespace并部署PrometheusOperator

    # kubectl apply -f k8s-monitor/monitoring-namespace.yaml
    namespace "monitoring" created
    
    # kubectl apply -f k8s-monitor/prometheus-operator.yaml
    serviceaccount "prometheus-operator" created
    clusterrole.rbac.authorization.k8s.io "prometheus-operator" created
    clusterrolebinding.rbac.authorization.k8s.io "prometheus-operator" created
    deployment.apps "prometheus-operator" created
    service "prometheus-operator" created

    2)状态查看

    # kubectl get pod -n monitoring
    NAME                                   READY     STATUS    RESTARTS   AGE
    prometheus-operator-7d9fd546c4-bmjc4   1/1       Running   0          1h
    
    # kubectl get svc -n monitoring
    NAME                  TYPE        CLUSTER-IP   EXTERNAL-IP   PORT(S)    AGE
    prometheus-operator   ClusterIP   None         <none>        8080/TCP   1h
    
    # kubectl get crd
    NAME                                    AGE
    alertmanagers.monitoring.coreos.com     3m
    prometheuses.monitoring.coreos.com      3m
    prometheusrules.monitoring.coreos.com   3m
    servicemonitors.monitoring.coreos.com   3m

    3.部署kubernetes组件服务

    # kubectl apply -f k8s-monitor/kube-k8s-service.yaml 
    service "kube-scheduler-prometheus-discovery" created
    service "kube-controller-manager-prometheus-discovery" created
    service "coredns-prometheus-discovery" created

    状态查看

    # kubectl get svc -n kube-system
    NAME                                           TYPE        CLUSTER-IP   EXTERNAL-IP   PORT(S)         AGE
    coredns                                        ClusterIP   10.1.0.2     <none>        53/UDP,53/TCP   10d
    coredns-prometheus-discovery                   ClusterIP   None         <none>        9153/TCP        13s
    kube-controller-manager-prometheus-discovery   ClusterIP   None         <none>        10252/TCP       13s
    kube-scheduler-prometheus-discovery            ClusterIP   None         <none>        10251/TCP       13s
    kubelet                                        ClusterIP   None         <none>        10250/TCP       1m
    kubernetes-dashboard                           NodePort    10.1.56.6    <none>        443:31944/TCP   14d
    metrics-server                                 ClusterIP   10.1.48.143   <none>        443/TCP         13m

    4.部署NodeExporter

    # kubectl apply -f k8s-monitor/node_exporter.yaml
    serviceaccount "node-exporter" created
    clusterrole.rbac.authorization.k8s.io "node-exporter" created
    clusterrolebinding.rbac.authorization.k8s.io "node-exporter" created
    daemonset.apps "node-exporter" created
    service "node-exporter" created

    状态查看

    # kubectl get pods -n monitoring
    NAME                                   READY     STATUS    RESTARTS   AGE
    node-exporter-767lz                    2/2       Running   0          4s
    node-exporter-8t8wh                    2/2       Running   0          4s
    prometheus-operator-7d9fd546c4-bmjc4   1/1       Running   0          2h
    
    # kubectl get svc -n monitoring
    NAME                  TYPE        CLUSTER-IP   EXTERNAL-IP   PORT(S)    AGE
    node-exporter         ClusterIP   None         <none>        9100/TCP   20s
    prometheus-operator   ClusterIP   None         <none>        8080/TCP   2h

    5.部署KubeStateMetrics

    # kubectl apply -f k8s-monitor/kube-state-metrics.yaml
    serviceaccount "kube-state-metrics" created
    role.rbac.authorization.k8s.io "kube-state-metrics" created
    rolebinding.rbac.authorization.k8s.io "kube-state-metrics" created
    clusterrole.rbac.authorization.k8s.io "kube-state-metrics" created
    clusterrolebinding.rbac.authorization.k8s.io "kube-state-metrics" created
    deployment.apps "kube-state-metrics" created
    service "kube-state-metrics" created

    状态查看

    # kubectl get pods -n monitoring
    NAME                                   READY     STATUS    RESTARTS   AGE
    kube-state-metrics-8545d67875-lwwd9    4/4       Running   0          1m
    node-exporter-767lz                    2/2       Running   0          37m
    node-exporter-8t8wh                    2/2       Running   0          37m
    prometheus-operator-7d9fd546c4-bmjc4   1/1       Running   0          2h
    
    # kubectl get svc -n monitoring
    NAME                  TYPE        CLUSTER-IP   EXTERNAL-IP   PORT(S)             AGE
    kube-state-metrics    ClusterIP   None         <none>        8443/TCP,9443/TCP   2m
    node-exporter         ClusterIP   None         <none>        9100/TCP            38m
    prometheus-operator   ClusterIP   None         <none>        8080/TCP            2h

    6.部署Prometheus

    # kubectl apply -f k8s-monitor/prometheus.yaml
    serviceaccount "prometheus-k8s" created
    clusterrole.rbac.authorization.k8s.io "prometheus-k8s" created
    clusterrolebinding.rbac.authorization.k8s.io "prometheus-k8s" created
    prometheus.monitoring.coreos.com "k8s" created
    service "prometheus-k8s" created
    prometheusrule.monitoring.coreos.com "prometheus-k8s-rules" created

    状态查看

    # kubectl get pods -n monitoring
    NAME                                   READY     STATUS    RESTARTS   AGE
    kube-state-metrics-8545d67875-lwwd9    4/4       Running   0          59m
    node-exporter-767lz                    2/2       Running   0          1h
    node-exporter-8t8wh                    2/2       Running   0          1h
    prometheus-k8s-0                       3/3       Running   1          1m
    prometheus-k8s-1                       3/3       Running   0          1m
    prometheus-operator-7d9fd546c4-bmjc4   1/1       Running   0          3h
    
    # kubectl get svc -n monitoring
    NAME                  TYPE        CLUSTER-IP   EXTERNAL-IP   PORT(S)             AGE
    kube-state-metrics    ClusterIP   None         <none>        8443/TCP,9443/TCP   59m
    node-exporter         ClusterIP   None         <none>        9100/TCP            1h
    prometheus-k8s        NodePort    10.1.31.15   <none>        9090:30172/TCP      1m
    prometheus-operated   ClusterIP   None         <none>        9090/TCP            54m
    prometheus-operator   ClusterIP   None         <none>        8080/TCP            3h 

    1)配置数据收集

    # kubectl apply -f k8s-monitor/kube-servicemonitor.yaml
    servicemonitor.monitoring.coreos.com "kube-apiserver" created
    servicemonitor.monitoring.coreos.com "kubelet" created
    servicemonitor.monitoring.coreos.com "kube-controller-manager" created
    servicemonitor.monitoring.coreos.com "kube-scheduler" created
    servicemonitor.monitoring.coreos.com "coredns" created
    servicemonitor.monitoring.coreos.com "kube-state-metrics" created
    servicemonitor.monitoring.coreos.com "node-exporter" created
    servicemonitor.monitoring.coreos.com "prometheus-operator" created
    servicemonitor.monitoring.coreos.com "prometheus" created

    状态查看

    # kubectl get servicemonitors -n monitoring
    NAME                      AGE
    coredns                   8s
    kube-apiserver            8s
    kube-controller-manager   8s
    kube-scheduler            8s
    kube-state-metrics        8s
    kubelet                   8s
    node-exporter             8s
    prometheus                7s
    prometheus-operator       8s

    2)查看Prometheus中的数据

    查看Prometheus页面访问端口

    # echo $(kubectl get svc -n monitoring | grep prometheus-k8s | awk '{print $(NF-1)}' | cut -d ':' -f 2 | cut -d '/' -f 1)
    30172

    注:可使用任一node主机IP加此端口号以HTTP方式访问

    3)prometheus主页

    7.部署Grafana

    # kubectl apply -f k8s-monitor/grafana.yaml
    secret "grafana-datasources" created
    serviceaccount "grafana" created
    configmap "grafana-dashboards" created
    configmap "grafana-dashboard-k8s-cluster-rsrc-use" created
    configmap "grafana-dashboard-k8s-node-rsrc-use" created
    configmap "grafana-dashboard-k8s-resources-cluster" created
    configmap "grafana-dashboard-k8s-resources-namespace" created
    configmap "grafana-dashboard-k8s-resources-pod" created
    configmap "grafana-dashboard-nodes" created
    configmap "grafana-dashboard-pods" created
    configmap "grafana-dashboard-statefulset" created
    deployment.apps "grafana" created
    service "grafana" created

    1)状态查看

    # kubectl get pods -n monitoring
    NAME                                   READY     STATUS    RESTARTS   AGE
    grafana-5b68464b84-b9qtz               1/1       Running   0          4m
    
    # kubectl get svc -n monitoring
    NAME                    TYPE        CLUSTER-IP     EXTERNAL-IP   PORT(S)             AGE
    alertmanager-main       NodePort    10.1.139.227   <none>        9093:31953/TCP      19h
    alertmanager-operated   ClusterIP   None           <none>        9093/TCP,6783/TCP   19h
    grafana                 NodePort    10.1.199.80    <none>        3000:30809/TCP      20h
    kube-state-metrics      ClusterIP   None           <none>        8443/TCP,9443/TCP   21h
    node-exporter           ClusterIP   None           <none>        9100/TCP            22h
    prometheus-k8s          NodePort    10.1.31.15     <none>        9090:30172/TCP      20h
    prometheus-operated     ClusterIP   None           <none>        9090/TCP            21h
    prometheus-operator     ClusterIP   None           <none>        8080/TCP            1d
    
    # kubectl get svc -n monitoring | grep grafana
    grafana               NodePort    10.1.199.80   <none>        3000:30809/TCP      5m

    2)查看Grafana页面访问端口

    # echo $(kubectl get svc -n monitoring | grep grafana | awk '{print $(NF-1)}' | cut -d ':' -f 2 | cut -d '/' -f 1)
    30809

    注:可使用任一node主机IP加此端口号以HTTP方式访问

    3)Grafana主页面,默认用户名/密码,admin/admin

    4)集群状态页面

    5)集群状态以命名空间视角页面

    6)POD信息页面

    8.部署Alertmanager

    # kubectl apply -f k8s-monitor/alertmanager.yaml
    serviceaccount "alertmanager-main" created
    secret "alertmanager-main" created
    alertmanager.monitoring.coreos.com "main" created
    service "alertmanager-main" created
    servicemonitor.monitoring.coreos.com "alertmanager" created

    1)状态查看

    # kubectl get pods -n monitoring
    NAME                                   READY     STATUS    RESTARTS   AGE
    alertmanager-main-0                    2/2       Running   0          17s
    alertmanager-main-1                    2/2       Running   0          11s
    
    # kubectl get svc -n monitoring
    NAME                    TYPE        CLUSTER-IP     EXTERNAL-IP   PORT(S)             AGE
    alertmanager-main       NodePort    10.1.139.227   <none>        9093:31953/TCP      1m
    alertmanager-operated   ClusterIP   None           <none>        9093/TCP,6783/TCP   1m
    
    # kubectl get svc -n monitoring | grep alertmanager-main
    alertmanager-main       NodePort    10.1.139.227   <none>        9093:31953/TCP      20h

    2)Alertmanager页面查看访问端口

    # echo $(kubectl get svc -n monitoring | grep alertmanager-main | awk '{print $(NF-1)}' | cut -d ':' -f 2 | cut -d '/' -f 1)
    31953

    注:可使用任一node主机IP加此端口号以HTTP方式访问

    3)Alertmanager主页面

    一入运维深似海,从此不见彼岸花
  • 相关阅读:
    【leetcode】Reverse Words in a String
    使用windows的远程桌面连接连接Ubuntu
    Ubuntu下快速安装php环境
    面试题之【打印1到最大的N位数】
    gnuplot安装问题(set terminal "unknown")
    java获取文件的md5值
    jQuery全选/反选checkbox
    PowerDesigner反向工程,根据Oracle数据库结构生成ER图(2014-3-25记)
    SVN服务端启动解决方案(2013-12-10 记)
    Oracle数据库DOC命令导入导出(2014-3-10记)
  • 原文地址:https://www.cnblogs.com/cn-jasonho/p/13285592.html
Copyright © 2011-2022 走看看