zoukankan      html  css  js  c++  java
  • Kubernetes-3:k8s集群环境安装部署及常见报错解决方法

    k8s集群安装

    环境说明:

    k8s-Master-Centos8 ip:192.168.152.53

    k8s-Node1-Centos7 ip:192.168.152.253

    k8s-Node2-Centos8 ip:192.168.152.252

    注意:

    Master与Node节点操作步骤基本一致

    Node节点只需配置到本文的第6步即可

    1、前期准备

    #关闭防火墙
    [root@Centos8 ~]# systemctl stop firewalld.service 
    
    #关闭SElinux,永久关闭可修改/etc/selinux/config文件
    [root@Centos8 ~]# setenforce 0    
    
    #关闭Swap分区,防止将K8S安装至swap内存中
    [root@Centos8 ~]# swapoff -a
    [root@Centos8 ~]# sed -i '/swap/ s/^/#/g' /etc/fstab
    
    #安装iptables并设置为空规则及开机自启
    yum -y install iptables-services iptables
    systemctl enable --now iptables.service
    iptables -F  #清空规则
    service iptables save  #保存
    iptables -L  #检查是否为空规则

    2、升级内核,建议 >= 4.4

    #—————————————— Centos8 ————————————————#
    #——————Centos8最好不要做升级,如果非要要建议也升级到4.4 ————————#
    #——————以下只是演示升级过程,真实不要升级到5.6,貌似本身不支持nf_conntrack_ipv4 ————————#
    #安装ELrepo仓库
    rpm --import https://www.elrepo.org/RPM-GPG-KEY-elrepo.org
    yum install https://www.elrepo.org/elrepo-release-8.0-2.el8.elrepo.noarch.rpm
    
    #查看可用的系统内核包
    yum --disablerepo="*" --enablerepo="elrepo-kernel" list available
    
    #安装内核
    yum --enablerepo=elrepo-kernel install kernel-ml
    
    # 安装完毕后,查看现有所有内核
    grubby --info=ALL
    
    #设置启动内核
    grubby --set-default /boot/vmlinuz-5.6.2-1.el8.elrepo.x86_64
    
    #———————————————— Centos7 ————————————————#
    #下载内核rpm包
    rpm -Uvh http://www.elrepo.org/elrepo-release-7.0-3.el7.elrepo.noarch.rpm
    
    #指定包名安装新版kernel
    yum --enablerepo=elrepo-kernel install -y kernel-lt
    
    #设置默认启动的内核
    grub2-set-default 'CentOS Linux (4.4.218-1.el7.elrepo.x86_64) 7 (Core)'
    
    #———————————————— 以下相同 ————————————————#
    #重启生效
    systemctl reboot
    
    #重新查看内核版本
    [root@Centos8 ~]# uname -r
    5.6.2-1.el8.elrepo.x86_64

    3、设置kube-proxy开启ipvs的前置条件

    modprobe br_netfilter
    
    cat > /etc/sysconfig/modules/ipvs.modules <<EOF
    #!/bin/bash
    modprobe -- ip_vs
    modprobe -- ip_vs_rr
    modprobe -- ip_vs_wrr
    modprobe -- ip_vs_sh
    modprobe -- nf_conntrack_ipv4
    EOF
    
    chmod 755 /etc/sysconfig/modules/ipvs.modules && bash /etc/sysconfig/modules/ipvs.modules
    
    # 查看是否加载成功ipvs模块
    lsmod | grep -e ip_vs -e nf_conntrack_ipv4

      nf_conntrack_ipv4 20480 4
      nf_defrag_ipv4 16384 1 nf_conntrack_ipv4
      ip_vs_sh 16384 0
      ip_vs_wrr 16384 0
      ip_vs_rr 16384 0
      ip_vs 147456 6 ip_vs_rr,ip_vs_sh,ip_vs_wrr
      nf_conntrack 114688 9 ip_vs,nf_nat,nf_nat_ipv4,nf_nat_ipv6,xt_conntrack,nf_nat_masquerade_ipv4,nf_conntrack_netlink,nf_conntrack_ipv4,nf_conntrack_ipv6
      libcrc32c 16384 2 xfs,ip_vs

    4、安装Docker

    #安装环境
    yum install -y yum-utils device-mapper-persistent-data lvm2
    
    #添加docker仓库
    yum-config-manager --add-repo http://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo
    
    #安装dpcker
    yum -y install docker-ce
    报错:
       package docker-ce-3:19.03.8-3.el7.x86_64 requires containerd.io >= 1.2.2-3, but none of the providers can be installed
    
    先安装containerd.io >= 1.2.2-3:
    dnf install https://download.docker.com/linux/centos/7/x86_64/stable/Packages/containerd.io-1.2.6-3.3.el7.x86_64.rpm
    
    再次安装docker:
    yum -y install docker-ce
    
    #启动docker,并设置开机自启
    systemctl enable --now docker.service
    
    #配置daemon.设置默认的cgroup组为systemd,并使docker的日志以json形式输出
    cat > /etc/docker/daemon.json <<EOF
    {
        "exec-opts": ["native.cgroupdriver=systemd"],
        "log-driver": "json-file",
        "log-opts": {
            "max-size": "100m"
        }
    }
    EOF
    
    #创建配置文件目录
    mkdir -p /etc/systemd/system/docker.service.d
    
    #重新加载daemon 及 重启docker
    systemctl daemon-reload && systemctl restart docker.service

    5、安装Kubeadm(主从配置)

    #配置yum仓库
    cat <<EOF > /etc/yum.repos.d/kubernetes.repo
    [kubernetes]
    name=Kubernetes
    baseurl=http://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64
    enabled=1
    gpgcheck=0
    repo_gpgcheck=0
    gpgkey=http://mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg
    EOF
    
    #安装kubeadm、kubectl、kubelet
    yum -y install kubeadm-1.15.1 kubectl-1.15.1 kubelet-1.15.1
    
    #设置开机自启
    systemctl enable kubelet.service

    6、拉取初始化镜像

    vim initimage.sh
    ...
    #!/usr/bin/env bash
    K8S_VERSION=v1.15.1
    ETCD_VERSION=3.3.10
    DASHBOARD_VERSION=v1.8.3
    FLANNEL_VERSION=v0.10.0-amd64
    DNS_VERSION=1.3.1
    PAUSE_VERSION=3.1
    # 基本组件
    docker pull mirrorgooglecontainers/kube-apiserver-amd64:$K8S_VERSION
    docker pull mirrorgooglecontainers/kube-controller-manager-amd64:$K8S_VERSION
    docker pull mirrorgooglecontainers/kube-scheduler-amd64:$K8S_VERSION
    docker pull mirrorgooglecontainers/kube-proxy-amd64:$K8S_VERSION
    docker pull mirrorgooglecontainers/etcd-amd64:$ETCD_VERSION
    docker pull mirrorgooglecontainers/pause:$PAUSE_VERSION
    docker pull coredns/coredns:$DNS_VERSION
    
    # 修改tag
    docker tag mirrorgooglecontainers/kube-apiserver-amd64:$K8S_VERSION k8s.gcr.io/kube-apiserver:$K8S_VERSION
    docker tag mirrorgooglecontainers/kube-controller-manager-amd64:$K8S_VERSION k8s.gcr.io/kube-controller-manager:$K8S_VERSION
    docker tag mirrorgooglecontainers/kube-scheduler-amd64:$K8S_VERSION k8s.gcr.io/kube-scheduler:$K8S_VERSION
    docker tag mirrorgooglecontainers/kube-proxy-amd64:$K8S_VERSION k8s.gcr.io/kube-proxy:$K8S_VERSION
    docker tag mirrorgooglecontainers/etcd-amd64:$ETCD_VERSION k8s.gcr.io/etcd:$ETCD_VERSION
    docker tag mirrorgooglecontainers/pause:$PAUSE_VERSION k8s.gcr.io/pause:$PAUSE_VERSION
    docker tag coredns/coredns:$DNS_VERSION k8s.gcr.io/coredns:$DNS_VERSION
    
    #删除冗余的images
    docker rmi mirrorgooglecontainers/kube-apiserver-amd64:$K8S_VERSION
    docker rmi mirrorgooglecontainers/kube-controller-manager-amd64:$K8S_VERSION
    docker rmi mirrorgooglecontainers/kube-scheduler-amd64:$K8S_VERSION
    docker rmi mirrorgooglecontainers/kube-proxy-amd64:$K8S_VERSION
    docker rmi mirrorgooglecontainers/etcd-amd64:$ETCD_VERSION
    docker rmi mirrorgooglecontainers/pause:$PAUSE_VERSION
    docker rmi coredns/coredns:$DNS_VERSION
    ...
    chmod +x initimage.sh
    
    #执行此脚本,开始导入镜像...
    ./initimage.sh
    
    [root@Centos8 ]# docker images 
    REPOSITORY                           TAG                 IMAGE ID            CREATED             SIZE
    quay.io/coreos/flannel               v0.12.0-amd64       4e9f801d2217        4 weeks ago         52.8MB
    k8s.gcr.io/kube-scheduler            v1.15.1             b0b3c4c404da        8 months ago        81.1MB
    k8s.gcr.io/kube-controller-manager   v1.15.1             d75082f1d121        8 months ago        159MB
    k8s.gcr.io/kube-proxy                v1.15.1             89a062da739d        8 months ago        82.4MB
    k8s.gcr.io/kube-apiserver            v1.15.1             68c3eb07bfc3        8 months ago        207MB
    k8s.gcr.io/coredns                   1.3.1               eb516548c180        15 months ago       40.3MB
    k8s.gcr.io/etcd                      3.3.10              2c4adeb21b4f        16 months ago       258MB
    k8s.gcr.io/pause                     3.1                 da86e6ba6ca1        2 years ago         742kB

    7、初始化主节点

    #打印k8s默认的初始化配置文件至kubeadm-conf.yaml中
    kubeadm config print init-defaults > kubeadm-conf.yaml
    
    #修改kubeadm-conf.yaml
    vim kubeadm-conf.yaml
    ...
    apiVersion: kubeadm.k8s.io/v1beta2
    bootstrapTokens:
    - groups:
      - system:bootstrappers:kubeadm:default-node-token
      token: abcdef.0123456789abcdef
      ttl: 24h0m0s
      usages:
      - signing
      - authentication
    kind: InitConfiguration
    localAPIEndpoint:
      advertiseAddress: 192.168.152.53    #此处需要修改为你的真实ip
      bindPort: 6443
    nodeRegistration:
      criSocket: /var/run/dockershim.sock
      name: centos8
      taints:
      - effect: NoSchedule
        key: node-role.kubernetes.io/master
    ---
    apiServer:
      timeoutForControlPlane: 4m0s
    apiVersion: kubeadm.k8s.io/v1beta2
    certificatesDir: /etc/kubernetes/pki
    clusterName: kubernetes
    controllerManager: {}
    dns:
      type: CoreDNS
    etcd:
      local:
        dataDir: /var/lib/etcd
    imageRepository: k8s.gcr.io
    kind: ClusterConfiguration
    kubernetesVersion: v1.15.1   #版本信息修改一致
    networking:
      dnsDomain: cluster.local
      podSubnet: "10.244.0.0/16"  #添加此配置,用于指定flannel的默认PodNet网段
      serviceSubnet: 10.96.0.0/12
    scheduler: {}
    ---    #以下为新增配置,将默认的调度方式改为ipvs
    apiVersion: kubeproxy.config.k8s.io/v1alpha1
    kind: KubeProxyConfiguration
    featureGates:
      SupportIPVSProxyMode: true
    mode: ipvs
    ...
    
    #开始初始化
    kubeadm init --config=kubeadm-config.yaml --experimental-upload-certs | tee kubeadm-init.log
    报错:
        [ERROR NumCPU]: the number of available CPUs 1 is less than the required 2
        很明显,cpu数目小于所需的2,重新设置一个虚拟机CPU个数就ok
    
    初始化成功回显:
    To start using your cluster, you need to run the following as a regular user:
      mkdir -p $HOME/.kube
      sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
      sudo chown $(id -u):$(id -g) $HOME/.kube/config
    
    kubeadm join 192.168.152.53:6443 --token abcdef.0123456789abcdef 
        --discovery-token-ca-cert-hash sha256:50ca5375950abfa05cd4bd37dfb60e9ccd078083aeca49fa8bb6275c13d2a2cd 
    
    #根据回显创建文件及目录
    #目的为保存 kubectl 与 api server 交互时的缓存,交互过程为https协议
    mkdir -p $HOME/.kube
    cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
    chown $(id -u):$(id -g) $HOME/.kube/config
    
    #查看当前拥有节点,可见状态为NotReady,是因为没有添加网络
    [root@Centos8 .kube]# kubectl get node 
    NAME      STATUS     ROLES    AGE    VERSION
    centos8   NotReady   master   4m1s   v1.15.1
    
    #添加flannel网络
    mkdir -p install-k8s/plugin/flannel
    mkdir -p install-k8s/core
    cd install-k8s/core
    mv /etc/kubernetes/kubeadm-init.log /etc/kubernetes/kubeadm-config.yaml ./
    cd ../plugin/flannel
    #下载flannel.yml
    wget https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml
    #执行yaml文件
    kubectl create -f kube-flannel.yml
    
    #构建完成,查看当前名称空间为kube-system的pod状态,-n 指定名称空间
    [root@Centos8 core]# kubectl get pod -n kube-system
    NAME                              READY   STATUS             RESTARTS   AGE
    coredns-5c98db65d4-5gwmj          0/1     CrashLoopBackOff   22         91m
    coredns-5c98db65d4-c277w          0/1     CrashLoopBackOff   22         91m
    etcd-centos8                      1/1     Running            0          90m
    kube-apiserver-centos8            1/1     Running            0          90m
    kube-controller-manager-centos8   1/1     Running            0          90m
    kube-flannel-ds-amd64-ggghn       1/1     Running            0          8m45s
    kube-proxy-gslw2                  1/1     Running            0          91m
    kube-scheduler-centos8            1/1     Running            0          90m
    
    #构建完成后,网卡界面会显示flannel信息
    [root@Centos8 core]# ifconfig
    flannel.1: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1450
            inet 10.244.0.0  netmask 255.255.255.255  broadcast 0.0.0.0
            inet6 fe80::4019:beff:fe7c:5582  prefixlen 64  scopeid 0x20<link>
            ether 42:19:be:7c:55:82  txqueuelen 0  (Ethernet)
            RX packets 0  bytes 0 (0.0 B)
            RX errors 0  dropped 0  overruns 0  frame 0
            TX packets 0  bytes 0 (0.0 B)
            TX errors 0  dropped 13 overruns 0  carrier 0  collisions 0

     #网络加载成功,状态变为Ready

     [root@Centos8 ~]# kubectl get node
     NAME   STATUS ROLES AGE VERSION
     centos8 Ready master 53d v1.15.1

    8、配置从节点与主节点关联

    # 直接将kubeadm-init.log的最后一句回显在node节点运行即可
    [root@kube-node2 ~]# kubeadm join 192.168.152.53:6443 --token abcdef.0123456789abcdef 
    >     --discovery-token-ca-cert-hash sha256:c291b4fc646b5925299f8cdf7fafe33ad9c0505a1609041d8c8214d104eb08da
    
    [root@kube-node2 ~]# kubeadm join 192.168.152.53:6443 --token abcdef.0123456789abcdef 
    >     --discovery-token-ca-cert-hash sha256:c291b4fc646b5925299f8cdf7fafe33ad9c0505a1609041d8c8214d104eb08da

    补充:

    node节点为了方便以后的部署和扩展,我采用Ansible自动化安装docker及k8s。

    ansible服务端为k8s的MASTER端:192.168.152.53

    客户端根据node节点需求来控制

    #### 注意:所有的操作及文件都在192.168.152.53中 ####
    [root@Centos8 ansible]# pwd
    /root/ansible
    
    *** 先编辑好导入导出images的脚本 ***
    [root@Centos8 ansible]# cat saveImages.sh  #导出镜像脚本
    #!/usr/bin/env bash
    
    IMAGESNAME=(`docker images | awk '/ago/{print $1}'`)
    IMAGESTAG=(`docker images | awk '/ago/{print $2}'`)
    IPADDR1='192.168.152.253'
    IPADDR2='192.168.152.252'
    
    if [[ -d /root/images ]];then
      for i in `seq 0 6`;do
        docker save > /root/images/${i}.tar.gz ${IMAGESNAME[$i]}:${IMAGESTAG[$i]}
        scp /root/images/${i}.tar.gz ${IPADDR1}:/root/
        scp /root/images/${i}.tar.gz ${IPADDR2}:/root/
      done
    else
      mkdir -p /root/images
      for i in `seq 0 6`;do
        docker save > /root/images/${IMAGESNAME[$i]}.tar.gz ${IMAGESNAME[$i]}:${IMAGESTAG[$i]}
        scp /root/images/${i}.tar.gz ${IPADDR1}:/root/
        scp /root/images/${i}.tar.gz ${IPADDR2}:/root/
      done
    fi
    
    [root@Centos8 ansible]# cat loadImages.sh  #导入镜像脚本
    #!/usr/bin/env bash
    
    for i in `seq 0 6`;do
      docker load < /root/${i}.tar.gz && rm -f /root/${i}.tar.gz
    done
    
    *** 在本地执行saveImages.sh ***
    ./saveImages.sh
    
    *** 创建PlayBook ***
    [root@Centos8 ansible]# vim kuber.yaml
    ...
    ---
    - name: Install docker and k8s
      hosts: all
      tasks:
      - block:
        - name: Add repository
          shell: yum-config-manager --add-repo http://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo
        - name: install docker-ass
          yum:
            name:
              - yum-utils
              - device-mapper-persistent-data
              - lvm2
              - https://download.docker.com/linux/centos/7/x86_64/stable/Packages/containerd.io-1.2.6-3.3.el7.x86_64.rpm
              - docker-ce
            state: latest
        - name: mkdir docker.service.d
          file:
            path: '{{ item }}'
            state: directory
          loop:
          - /etc/docker
          - /etc/systemd/system/docker.service.d
        - name: Copy daemon.json
          copy:
            src: /etc/docker/daemon.json
            dest: /etc/docker/daemon.json
        - name: daemon-reload
          shell: systemctl daemon-reload
        - name: Start docker
          service:
            name: docker
            state: restarted
            enabled: yes
        rescue:
        - debug:
            msg: 'docker Installation failed!'
      - name: Copy using inline content
        copy:
          content: 
            '[kubernetes]
            
            name=Kubernetes
            
            baseurl=http://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64
            
            enabled=1
            
            gpgcheck=0
            
            repo_gpgcheck=0
            
            gpgkey=http://mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg'
          dest: /etc/yum.repos.d/kubernetes.repo
      - name: Install k8s
        yum:
          name:
            - kubeadm-1.15.1
            - kubectl-1.15.1
            - kubelet-1.15.1
          state: present
      - name: Start kubelet
        service:
          name: kubelet
          state: started
          enabled: yes
      - name: Run script load Images
        script: /root/ansible/loadImages.sh
    ...
    
    ansible-playbook kuber.yaml  #执行,完成,最后在node节点再执行以上第8步即可

    遇到的问题及启动服务报错:

    1、W0412 04:57:19.803140     846 watcher.go:87] Error while processing event ("/sys/fs/cgroup/devices/libcontainer_30695_systemd_test_default.slice": 0x40000100 == IN_CREATE|IN_ISDIR): inotify_add_watch /sys/fs/cgroup/devices/libcontainer_30695_systemd_test_default.slice: no such file or directory

    ## 问题的原因是docker与kubelet的cgroup组不一致
    ## 查看kubeadm的配置文件位置
    [root@Centos8 ~]# rpm -ql kubeadm
    /usr/bin/kubeadm
    /usr/lib/systemd/system/kubelet.service.d/10-kubeadm.conf
    
    ## 修改配置文件,将kubelet的cgroup改为systemd
    vim /usr/lib/systemd/system/kubelet.service.d/10-kubeadm.conf
    ...
    Environment="KUBELET_CONFIG_ARGS=--config=/var/lib/kubelet/config.yaml --cgroup-driver=systemd"
    ...
    
    ## 重新加载配置文件及重启服务
    [root@Centos8 ~]# systemctl daemon-reload 
    [root@Centos8 ~]# systemctl restart kubelet.service 

    2、node2节点的flannel一直报错

    ## 还未找到根本原因,正在排错,如有大佬路过,希望指点
    kube-flannel-ds-amd64-b47l9 0/1 Init:ErrImagePull 0 3m50s

    3、如果需要重新安装master节点,执行:

    # kubeadm reset
    然后再进行重新安装

    4、如果需要重新安装node节点,执行:

    # kubectl drain <node name> --delete-local-data --force --ignore-daemonsets
    # kubectl delete node <node name>
    或
    kubeadm reset #直接重新join

    5、node节点加入master时报错:error execution phase preflight: couldn't validate the identity of the API Server: abort connecting to API servers after timeout of 5m0s

    原因:master的token过期了,需要重新创建

    解决:

    Master:
    [root@Centos8 ~]# kubeadm token create
    blopur.fn8gtr06gsjlq7yi
    
    Node:
    kubeadm join 192.168.152.53:6443 --token blopur.fn8gtr06gsjlq7yi --discovery-token-ca-cert-hash sha256:c291b4fc646b5925299f8cdf7fafe33ad9c0505a1609041d8c8214d104eb08da

    6、node2节点加入master后,systemctl status kubelet.service报错:Unable to update cni config: No networks found in /etc/cni/net.d

    vim /usr/lib/systemd/system/kubelet.service.d/10-kubeadm.conf
    ...
    添加:
    Environment="KUBELET_NETWORK_ARGS=--network-plugin=cni --cni-conf-dir=/etc/cni/ --cni-bin-dir=/opt/cni/bin"
    ...
    
    systemctl daemon-reload && systemctl restart kubelet.service

    7、node2节点加入master后,systemctl status kubelet.service报错:open /run/flannel/subnet.env: no such file or directory

    ## 将master节点的/run/flannel/subnet.env文件拷贝到此处即可
    [root@Centos8 flannel]# scp subnet.env kubenode2:/run/flannel/
    
    ## 重启
    systemctl restart kubelet.service   
  • 相关阅读:
    精确率、召回率、F1 值、ROC、AUC 各自的优缺点是什么?
    使用sklearn做单机特征工程
    机器学习算法比较
    CNN-卷积层和池化层学习
    卷积神经网络概念与原理
    CNN(卷积神经网络)、RNN(循环神经网络)、DNN(深度神经网络)概念区分理解
    技能|三次简化一张图:一招理解LSTM/GRU门控机制
    【转】TensorFlow四种Cross Entropy算法实现和应用
    http://blog.csdn.net/u014595019/article/details/52805444
    神经网络和深度学习简史(三)
  • 原文地址:https://www.cnblogs.com/v-fan/p/13034223.html
Copyright © 2011-2022 走看看