zoukankan      html  css  js  c++  java
  • OCP升级(3.6->3.7)

    有个好文档还是靠普很多,感谢同事的文档。升级步骤记录如下

    1.检查现有环境

    [root@master ~]# etcd --version
    etcd Version: 3.2.22
    Git SHA: 1674e68
    Go Version: go1.9.2
    Go OS/Arch: linux/amd64
    [root@master ~]# etcdctl2 cluster-health
    member a4e80e96ea75f7c8 is healthy: got healthy result from https://192.168.0.103:2379
    cluster is healthy
    [root@master ~]# etcdctl2 member list
    a4e80e96ea75f7c8: name=master.example.com peerURLs=https://192.168.0.103:2380 clientURLs=https://192.168.0.103:2379 isLeader=true
    [root@master ~]# oc get dc/router -n default
    NAME      REVISION   DESIRED   CURRENT   TRIGGERED BY
    router    4          2         2         config
    [root@master ~]# oc get dc/docker-registry -n default
    NAME              REVISION   DESIRED   CURRENT   TRIGGERED BY
    docker-registry   1          1         1         config
    [root@master ~]# oc get node --show-labels
    NAME                 STATUS     AGE       VERSION             LABELS
    master.example.com   Ready      14h       v1.6.1+5115d708d7   beta.kubernetes.io/arch=amd64,beta.kubernetes.io/os=linux,kubernetes.io/hostname=master.example.com,openshift-infra=apiserver,region=infra,router=true,zone=default
    node1.example.com    Ready      14h       v1.6.1+5115d708d7   beta.kubernetes.io/arch=amd64,beta.kubernetes.io/os=linux,kubernetes.io/hostname=node1.example.com,region=infra,router=true,zone=default
    node2.example.com    NotReady   14h       v1.6.1+5115d708d7   beta.kubernetes.io/arch=amd64,beta.kubernetes.io/os=linux,kubernetes.io/hostname=node2.example.com,region=infra,zone=default
    
    [root@master ~]# oc get pv
    No resources found.
    [root@master ~]# oc get pvc
    No resources found.

    2.备份ansible脚本

    cd /usr/share/ansible
    cp -r plugins/ plugins-3.6.bak
    cp -r openshift-ansible/ openshift-ansible-3.6.bak
    ll
    total 0
    drwxr-xr-x. 5 root root 51 Jul 27 19:21 openshift-ansible
    drwxr-xr-x. 5 root root 51 Jul 28 01:41 openshift-ansible-3.6.bak
    drwxr-xr-x. 2 root root 50 Jul 27 19:21 plugins
    drwxr-xr-x. 2 root root 50 Jul 28 01:40 plugins-3.6.bak
    drwxr-xr-x. 2 root root  6 Jan 16  2018 roles

    3.备份etcd数据

    cp -r /etc/etcd /etc/etcd.bak
    etcdctl2 backup --data-dir /var/lib/etcd --backup-dir /tmp/etcd-data
    cp /var/lib/etcd/member/snap/db /tmp/etcd-data
    ll /etc/etcd.bak
    total 48
    drwx------. 5 root root  212 Jul 28 01:47 ca
    -rw-------. 1 root root 1895 Jul 28 01:47 ca.crt
    -rw-r--r--. 1 root root 1526 Jul 28 01:47 etcd.conf
    -rw-r--r--. 1 root root 1686 Jul 28 01:47 etcd.conf.32617.2018-07-28@00:22:18~
    drwx------. 8 root root 4096 Jul 28 01:47 generated_certs
    -rw-------. 1 root root 5916 Jul 28 01:47 peer.crt
    -rw-r--r--. 1 root root 1005 Jul 28 01:47 peer.csr
    -rw-------. 1 root root 1704 Jul 28 01:47 peer.key
    -rw-------. 1 root root 5873 Jul 28 01:47 server.crt
    -rw-r--r--. 1 root root 1005 Jul 28 01:47 server.csr
    -rw-------. 1 root root 1704 Jul 28 01:47 server.key
    
    ll /var/lib/etcd/member/snap/db /tmp/etcd-data
    -rw-------. 1 etcd etcd 25952256 Jul 28 01:48 /var/lib/etcd/member/snap/db
    
    /tmp/etcd-data:
    total 16320
    -rw-------. 1 root root 25952256 Jul 28 01:48 db
    drwx------. 4 root root       29 Jul 28 01:47 member

    4.备份配置文件

    master节点

    cp -r /etc/origin/master /etc/origin/master.bak
    cp -r /etc/origin/node /etc/origin/node.bak
    cp -r /etc/sysconfig/atomic-openshift-master-api /etc/sysconfig/atomic-openshift-master-api.bak
    cp -r /etc/sysconfig/atomic-openshift-master-controllers  /etc/sysconfig/atomic-openshift-master-controller.bak
    cp -r /etc/sysconfig/atomic-openshift-node /etc/sysconfig/atomic-openshift-node.bak
    
    ll /etc/origin/master.bak
    ll /etc/origin/node /etc/origin/node.bak
    ll /etc/sysconfig/atomic-openshift-master-api.bak
    ll /etc/sysconfig/atomic-openshift-master-controller.bak
    ll /etc/sysconfig/atomic-openshift-node.bak

    node节点

    cp -r /etc/origin/node /etc/origin/node.bak
    cp -r /etc/sysconfig/atomic-openshift-node /etc/sysconfig/atomic-openshift-node.bak
    
    ll /etc/origin/node.bak
    ll /etc/sysconfig/atomic-openshift-node.bak

    备份yum repo文件

    cp /etc/yum.repos.d/ocp.repo /etc/yum.repos.d/ocp.repo.bak

    5.修改yum repository文件

    当然先需要把Package导入

    [root@master ~]# cat /etc/yum.repos.d/ocp.repo
    [local-rhel-7-server-rpms]
    name=rhel-7-server-rpms
    baseurl=http://192.168.56.103:8080/repo/rhel-7-server-rpms
    enabled=1
    gpgcheck=0
    [local-rhel-7-server-extras-rpms]
    name=rhel-7-server-extras-rpms
    baseurl=http://192.168.56.103:8080/repo/rhel-7-server-extras-rpms
    enabled=1
    gpgcheck=0
    [local-rhel-7-fast-datapath-rpms]
    name=rhel-7-fast-datapath-rpms
    baseurl=http://192.168.56.103:8080/repo/rhel-7-fast-datapath-rpms
    enabled=1
    gpgcheck=0
    [local-rhel-7-server-ose-3.7-rpms]
    name=rhel-7-server-ose-3.7-rpms
    baseurl=http://192.168.56.103:8080/repo/rhel-7-server-ose-3.7-rpms
    enabled=1
    gpgcheck=0
    yum clean all
    yum repolist
    
    yum update -y

    6.升级ansible

    yum update atomic-openshift-utils ansible
    rpm -qa | grep atomic-openshift-utils
    atomic-openshift-utils-3.7.57-1.git.33.cf01e48.el7.noarch

    7.备份模板

    mv /usr/share/openshift/examples /usr/share/openshift/examples.3.6
    mkdir -p /usr/share/openshift/examples
    cp -R /usr/share/ansible/openshift-ansible/roles/openshift_examples/files/examples/v3.7/* /usr/share/openshift/examples/

    8.导入镜像

    导入镜像过程略,完成后需要对registry-console进行重新tag和push

    docker tag registry.example.com/openshift3/registry-console:v3.7.64 registry.example.com/openshift3/registry-console:v3.7
    
    docker push registry.example.com/openshift3/registry-console:v3.7

    10.修改hosts文件

    /etc/ansible/hosts文件

    [OSEv3:children]
    masters
    nodes
    etcd
    nfs
    
    [OSEv3:vars]
    ansible_ssh_user=root
    openshift_deployment_type=openshift-enterprise
    openshift_release=v3.7
    
    osm_use_cockpit=true
    osm_cockpit_plugins=['cockpit-kubernetes']
    openshift_cockpit_deployer_prefix='openshift3/'
    openshift_cockpit_deployer_version='v3.7.64'
    
    osm_cluster_network_cidr=10.128.0.0/14
    openshift_portal_net=172.30.0.0/16
    openshift_master_api_port=8443
    openshift_master_console_port=8443
    
    
    openshift_enable_service_catalog=true
    openshift_service_catalog_image_prefix=registry.example.com/openshift3/ose-
    openshift_service_catalog_image_version=v3.7.64
    ansible_service_broker_image_prefix=registry.example.com/openshift3/ose-
    ansible_service_broker_etcd_image_prefix=registry.example.com/rhel7/
    template_service_broker_prefix=registry.example.com/openshift3/
    oreg_url=registry.example.com/openshift3/ose-${component}:${version}
    openshift_examples_modify_imagestreams=true
    openshift_clock_enabled=true
    
    openshift_metrics_storage_kind=nfs
    openshift_metrics_install_metrics=true
    openshift_metrics_storage_access_modes=['ReadWriteOnce']
    openshift_metrics_storage_host=nfs.example.com
    openshift_metrics_storage_nfs_directory=/exports
    openshift_metrics_storage_volume_name=metrics
    openshift_metrics_storage_volume_size=10Gi
    openshift_metrics_hawkular_hostname=hawkular-metrics.apps.example.com
    openshift_metrics_cassandra_storage_type=emptydir
    openshift_metrics_image_prefix=registry.example.com/openshift3/
    openshift_hosted_metrics_deploy=true
    openshift_hosted_metrics_public_url=https://hawkular-metrics.apps.example.com/hawkular/metrics
    openshift_metrics_image_prefix=registry.example.com/openshift3/
    openshift_metrics_image_version=v3.7.64
    
    
    openshift_template_service_broker_namespaces=['openshift']
    template_service_broker_selector={"node": "true"}
    openshift_master_identity_providers=[{'name': 'htpasswd_auth', 'login': 'true', 'challenge': 'true', 'kind': 'HTPasswdPasswordIdentityProvider', 'filename': '/etc/origin/master/htpasswd'}]
    # Default login account: admin / handhand
    openshift_master_htpasswd_users={'admin': '$apr1$gfaL16Jf$c.5LAvg3xNDVQTkk6HpGB1'}
    
    
    #openshift_repos_enable_testing=true
    openshift_disable_check=docker_image_availability,disk_availability,memory_availability,docker_storage
    docker_selinux_enabled=false
    openshift_docker_options=" --selinux-enabled --insecure-registry 172.30.0.0/16 --log-driver json-file --log-opt max-size=50M --log-opt max-file=3 --insecure-registry registry.example.com --add-registry registry.example.com"
    openshift_docker_additional_registries=registry.example.com
    openshift_docker_insecure_registries=registry.example.com
    osm_etcd_image=rhel7/etcd
    openshift_logging_image_prefix=registry.example.com/openshift3/
    
    openshift_hosted_router_selector='region=infra,router=true'
    openshift_master_default_subdomain=app.example.com
    
    
    # host group for masters
    [masters]
    master.example.com
    # host group for etcd
    [etcd]
    master.example.com
    
    # host group for nodes, includes region info
    [nodes]
    master.example.com openshift_node_labels="{'region': 'infra', 'router': 'true', 'zone': 'default'}" openshift_schedulable=true
    node1.example.com openshift_node_labels="{'region': 'infra', 'router': 'true', 'zone': 'default'}" openshift_schedulable=true
    node2.example.com openshift_node_labels="{'region': 'infra', 'zone': 'default'}" openshift_schedulable=true
    
    [nfs]
    nfs.example.com

    11.升级前的检查

    ansible-playbook --tags pre_upgrade /usr/share/ansible/openshift-ansible/playbooks/byo/openshift-cluster/upgrades/v3_7/upgrade.yml 

    遇到一个问题说master,node1,node2的节点的Docker的版本需要1.12以上,但master,node1,node2的docker版本已经是1.13.1

    解决办法,修改/etc/yum.conf文件,屏蔽下面这句话,跳过检查

    #exclude= docker*1.20*  docker*1.19*  docker*1.18*  docker*1.17*  docker*1.16*  docker*1.15*  docker*1.14*  docker*1.13* 

    12.升级

    升级master节点

    ansible-playbook -vv /usr/share/ansible/openshift-ansible/playbooks/byo/openshift-cluster/upgrades/v3_7/upgrade_control_plane.yml | tee /tmp/upgrade_control_plane_to_3_7.log;

    升级完后显示

    TASK [Warn if shared-resource-viewer could not be updated] **************************************************************************************************************
    task path: /usr/share/ansible/openshift-ansible/playbooks/common/openshift-cluster/upgrades/post_control_plane.yml:134
    skipping: [master.example.com] => {"changed": false, "skip_reason": "Conditional result was False"}
    META: ran handlers
    
    PLAY RECAP **************************************************************************************************************************************************************
    localhost                  : ok=26   changed=0    unreachable=0    failed=0   
    master.example.com         : ok=410  changed=87   unreachable=0    failed=0   
    nfs.example.com            : ok=33   changed=2    unreachable=0    failed=0   
    node1.example.com          : ok=50   changed=2    unreachable=0    failed=0   
    node2.example.com          : ok=50   changed=2    unreachable=0    failed=0   

    检查

    [root@master ansible]# oc get nodes
    NAME                 STATUS    AGE       VERSION
    master.example.com   Ready     3h        v1.7.6+a08f5eeb62
    node1.example.com    Ready     3h        v1.6.1+5115d708d7
    node2.example.com    Ready     3h        v1.6.1+5115d708d7
    
    
    [root@master ansible]# oc get pods --all-namespaces
    NAMESPACE              NAME                         READY     STATUS             RESTARTS   AGE
    default                docker-registry-2-94zld      1/1       Running            0          6m
    default                registry-console-2-lmhgx     1/1       Running            0          6m
    default                router-2-8hnmz               1/1       Running            0          6m
    default                router-2-g6tlm               1/1       Running            0          5m
    kube-service-catalog   apiserver-z6nmz              1/1       Running            4          2h
    kube-service-catalog   controller-manager-d2jgc     0/1       CrashLoopBackOff   9          2h
    openshift-infra        hawkular-cassandra-1-gp4n9   1/1       Running            0          10m
    openshift-infra        hawkular-metrics-4j828       1/1       Running            1          2h
    openshift-infra        heapster-rgwrw               1/1       Running            6          3h
    
    
    [root@master ansible]# oc version
    oc v3.7.64
    kubernetes v1.7.6+a08f5eeb62
    features: Basic-Auth GSSAPI Kerberos SPNEGO
    
    Server https://master.example.com:8443
    openshift v3.7.64
    kubernetes v1.7.6+a08f5eeb62

    升级node节点

    ansible-playbook -vv /usr/share/ansible/openshift-ansible/playbooks/byo/openshift-cluster/upgrades/v3_7/upgrade_nodes.yml -e openshift_upgrade_nodes_serial=1 | tee /tmp/upgrade_node_to_3_7.log;

    升级完成后提示

    TASK [include] **********************************************************************************************************************************************************
    task path: /usr/share/ansible/openshift-ansible/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml:83
    skipping: [node2.example.com] => {"changed": false, "skip_reason": "Conditional result was False"}
    META: ran handlers
    
    PLAY RECAP **************************************************************************************************************************************************************
    localhost                  : ok=12   changed=0    unreachable=0    failed=0   
    master.example.com         : ok=76   changed=4    unreachable=0    failed=0   
    nfs.example.com            : ok=33   changed=2    unreachable=0    failed=0   
    node1.example.com          : ok=174  changed=30   unreachable=0    failed=0   
    node2.example.com          : ok=174  changed=30   unreachable=0    failed=0   

    检查

    [root@master ansible]# oc get nodes
    NAME                 STATUS    AGE       VERSION
    master.example.com   Ready     3h        v1.7.6+a08f5eeb62
    node1.example.com    Ready     3h        v1.7.6+a08f5eeb62
    node2.example.com    Ready     3h        v1.7.6+a08f5eeb62
    
    [root@master ansible]# oc get pods --all-namespaces
    NAMESPACE              NAME                         READY     STATUS    RESTARTS   AGE
    default                docker-registry-2-x7sqf      1/1       Running   0          4m
    default                registry-console-2-mslwb     1/1       Running   0          4m
    default                router-2-79qjr               1/1       Running   0          8m
    default                router-2-8hnmz               1/1       Running   0          20m
    kube-service-catalog   apiserver-z6nmz              1/1       Running   4          3h
    kube-service-catalog   controller-manager-d2jgc     0/1       Error     12         2h
    openshift-infra        hawkular-cassandra-1-qg2bj   1/1       Running   0          8m
    openshift-infra        hawkular-metrics-bqqm8       0/1       Running   0          4m
    openshift-infra        heapster-zt87w               1/1       Running   0          8m
  • 相关阅读:
    Improving .NET Application Performance and Scalability
    使用PerfView监测.NET程序性能(二):Perfview的使用
    【转载】Configure the max limit for concurrent TCP connections
    Constructor in depth
    使用PerfView监测.NET程序性能(一):Event Trace for Windows
    PHP工程师面临成长瓶颈
    关于前后端字符串长度计算不一致的问题
    最近踩坑汇总
    本周踩坑汇总
    上周踩坑汇总
  • 原文地址:https://www.cnblogs.com/ericnie/p/9987438.html
Copyright © 2011-2022 走看看