zoukankan      html  css  js  c++  java
  • k8s使用ceph存储


    ceph配置

    rbd create --size 100 rbd/nginx-image
    
    [root@localhost my-cluster]# rbd list
    nginx-image
    [root@localhost my-cluster]# rbd info nginx-image
    rbd image 'nginx-image':
            size 100MiB in 25 objects
            order 22 (4MiB objects)
            block_name_prefix: rbd_data.5e4d6b8b4567
            format: 2
            features: layering
            flags: 
            create_timestamp: Tue Apr 30 18:10:05 2019
    [root@localhost my-cluster]# 
    # 获取密码
    [root@localhost my-cluster]# ceph auth get-key client.admin | base64
    QVFDRTQ4ZGNLRFVIRFJBQTVGd2J5QzU0d3B0cGJuOTREcjM1VHc9PQ==
    
    

    k8s 配置

    通过静态pv,pvc使用ceph

    每次重建需要先执行 rbd map 将镜像挂载到node节点,rbd卷允许将Rados块设备卷安装到Pod中。与删除Pod时擦除emptyDir不同,rbd卷的内容被保留,而卷仅被卸载。这意味着RBD卷可以预先填充数据,并且可以在pod之间“传递”数据。ceph RBD只能进行单节点读写或多节点读,不能进行多节点读写.但是有的业务可能需要多节点读写的功能,可用cephfs解决了这个问题。

    • 安装ceph-common

      yum -y install ceph-common

    • ceph配置拷至k8s node 节点

      ceph.conf ,ceph.client.admin.keyring 至 /etc/ceph/ 目录

    • 格式化为 xfs

    [root@localhost my-cluster]# rbd map nginx-image 
    /dev/rbd0
    [root@localhost my-cluster]# mkfs.xfs /dev/rbd0
    [root@localhost my-cluster]# rbd unmap nginx-image
    
    • 创建ceph的secret
    cat ceph-secret.yaml
    **********************
    apiVersion: v1
    kind: Secret
    metadata:
      name: ceph-secret
    type: "kubernetes.io/rbd"
    data:
      key: QVFDTTlXOWFOMk9IR3hBQXZyUjFjdGJDSFpoZUtmckY0N2tZOUE9PQ==
    kubectl create -f ceph-secret.yaml
    [root@node1 work]# kubectl get secret
    NAME                  TYPE                                  DATA      AGE
    ceph-secret           Opaque                                1         8d
    default-token-7s88r   kubernetes.io/service-account-token   3         11d
    
    • 创建PV
    [root@node1 work]# cat nginx-pv.yaml 
    apiVersion: v1
    kind: PersistentVolume
    metadata:
      name: nginx-pv
    spec:
      capacity:
        storage: 1Gi
      accessModes:
        - ReadWriteOnce
      rbd:
        monitors:
          - 192.168.6.156:6789,192.168.6.157:6789,192.168.6.158:6789
        pool: rbd
        image: nginx-image
        user: admin
        secretRef:
          name: ceph-secret
        fsType: xfs
        readOnly: false
      persistentVolumeReclaimPolicy: Recycle
    
    • 创建PVC
    [root@node1 work]# cat nginx-pvc.yaml 
    kind: PersistentVolumeClaim
    apiVersion: v1
    metadata:
      name: nginx-pvc 
    spec:
      accessModes:
        - ReadWriteOnce
      resources:
        requests:
          storage: 1Gi
    
    kubectl create -f nginx-pvc.yml
    
    • 创建Deployment并挂载
    [root@node1 work]# cat nginx-deploy.yml 
    apiVersion: apps/v1beta1
    kind: Deployment
    metadata:
      name: demo
    spec:
      replicas: 3
      template:
        metadata:
          labels:
            app: demo
        spec:
          containers:
          - name: demo
            image: mritd/demo
            ports:
            - containerPort: 80
            volumeMounts:
              - mountPath: "/data"
                name: data
          volumes:
            - name: data
              persistentVolumeClaim:
                claimName: nginx-pvc
    
    [root@node1 work]# rbd map nginx-image
    /dev/rbd0
    [root@node1 work]# kubectl create -f nginx-deploy.yml 
    deployment "demo" created
    
    • 创建service
    [root@node1 work]# kubectl expose deployment/demo
    service "demo" exposed
    [root@node1 work]# kubectl get svc
    NAME         TYPE        CLUSTER-IP      EXTERNAL-IP   PORT(S)   AGE
    demo         ClusterIP   10.254.170.53   <none>        80/TCP    6m
    kubernetes   ClusterIP   10.254.0.1      <none>        443/TCP   11d
    

    当前k8s1.8版本对于pv 的 RWO 似乎是有bug的,三个pod都挂载了pv,都可写数据,但相互看不到,正常应当是只有一个pod 创建成功。rbd使用前,需要在节点上执行 rbd map 命令,也不太方便。
    https://github.com/kubernetes/kubernetes/issues/60903

    测试多pod挂载静态pv数据不一致问题

    [root@node1 work]# ansible k8s -a 'rbd map nginx-image'
    192.168.6.161 | SUCCESS | rc=0 >>
    /dev/rbd0
    
    192.168.6.162 | SUCCESS | rc=0 >>
    /dev/rbd0
    
    192.168.6.163 | SUCCESS | rc=0 >>
    /dev/rbd1
    [root@node1 work]# kubectl create -f nginx-deploy.yml 
    deployment "demo" created
    [root@node1 work]# kubectl get pods
    NAME                    READY     STATUS    RESTARTS   AGE
    ceph-mysql-pod          1/1       Running   0          8d
    demo-579d6c87d6-5c59l   1/1       Running   0          10s
    demo-579d6c87d6-dck74   1/1       Running   0          10s
    demo-579d6c87d6-gg9jf   1/1       Running   0          10s
    
    
    [root@node1 work]# for i in `kubectl get pods -l app=demo|grep demo|awk '{print $1}'`; do kubectl exec -it $i touch /data/$i.txt ;done;
    
    [root@node1 work]# for i in `kubectl get pods -l app=demo|grep demo|awk '{print $1}'`; do kubectl exec -it $i ls  /data ;done;            
    3.txt                      demo-579d6c87d6-5c59l.txt
    3.txt                      demo-579d6c87d6-dck74.txt
    3.txt                      demo-579d6c87d6-gg9jf.txt
    
    每个 pod 文件各不相同。
    
    删除deploy后重建pod
    [root@node1 work]# kubectl delete -f nginx-deploy.yml       
    deployment "demo" deleted
    [root@node1 work]# ansible k8s -a 'rbd map nginx-image'
    192.168.6.161 | SUCCESS | rc=0 >>
    /dev/rbd0
    
    192.168.6.162 | SUCCESS | rc=0 >>
    /dev/rbd0
    
    192.168.6.163 | SUCCESS | rc=0 >>
    /dev/rbd1
    
    [root@node1 work]# kubectl create -f nginx-deploy.yml 
    deployment "demo" created
    [root@node1 work]# kubectl get pods
    NAME                    READY     STATUS    RESTARTS   AGE
    ceph-mysql-pod          1/1       Running   0          8d
    demo-579d6c87d6-fbdc2   1/1       Running   0          4s
    demo-579d6c87d6-hslhw   1/1       Running   0          4s
    demo-579d6c87d6-p5dc5   1/1       Running   0          4s
    [root@node1 work]# for i in `kubectl get pods -l app=demo|grep demo|awk '{print $1}'`; do kubectl exec -it $i ls  /data ;done; 
    3.txt                      demo-579d6c87d6-gg9jf.txt
    3.txt                      demo-579d6c87d6-gg9jf.txt
    3.txt                      demo-579d6c87d6-gg9jf.txt
    
    rbd中只保留了最后一份。
    
    

    StoragaClass 方式

    在 1.4 以后,kubernetes 提供了一种更加方便的动态创建 PV 的方式;也就是说使用 StoragaClass 时无需预先创建固定大小的 PV,等待使用者创建 PVC 来使用;而是直接创建 PVC 即可分配使用。也无需到各Node节点上执行rbd map 镜像。

    • 创建系统级 Secret

    注意: 由于 StorageClass 要求 Ceph 的 Secret type必须为 kubernetes.io/rbd ,所以上一步创建的 ceph-secret 需要先被删除,然后使用如下命令重新创建;此时的 key 并没有经过 base64

    [root@node1 work]# kubectl delete secret ceph-secret
    secret "ceph-secret" deleted
    [root@node1 work]# ceph auth get-key client.admin
    AQCE48dcKDUHDRAA5FwbyC54wptpbn94Dr35Tw==
    # 这个 secret type 必须为 kubernetes.io/rbd,否则会造成 PVC 无法使用
    kubectl create secret generic ceph-secret --type="kubernetes.io/rbd" --from-literal=key='AQCE48dcKDUHDRAA5FwbyC54wptpbn94Dr35Tw==' 
    
    • 创建 StorageClass
    cat << EOF >> ceph.storageclass.yml
    apiVersion: storage.k8s.io/v1
    kind: StorageClass
    metadata:
      name: test-storageclass
    provisioner: kubernetes.io/rbd
    parameters:
      monitors: 192.168.6.156:6789,192.168.6.157:6789,192.168.6.158:6789
      # Ceph 客户端用户 ID(非 k8s 的)
      adminId: admin
      adminSecretName: ceph-secret
      pool: rbd 
      userId: admin
      userSecretName: ceph-secret
    EOF
    
    [root@node1 work]# kubectl create -f ceph.storageclass.yml 
    storageclass "ceph-storageclass" created
    [root@node1 work]# kubectl get storageclass
    NAME                PROVISIONER
    ceph-storageclass   kubernetes.io/rbd
    
    • 创建PVC
    [root@node1 work]# vim nginx-pvc2.yaml               
    
    kind: PersistentVolumeClaim
    apiVersion: v1
    metadata:
      name: nginx-pvc2
      annotations:
        volume.beta.kubernetes.io/storage-class: ceph-storageclass
    spec:
      accessModes:
        - ReadWriteOnce
      resources:
        requests:
          storage: 1Gi
    
    [root@node1 work]# kubectl create -f nginx-pvc2.yaml 
    persistentvolumeclaim "nginx-pvc2" create
    
    • 创建Deployment
    [root@node1 work]# vim nginx-deploy2.yml 
    
    apiVersion: apps/v1beta1
    kind: Deployment
    metadata:
      name: demo2
    spec:
      replicas: 3
      template:
        metadata:
          labels:
            app: demo2
        spec:
          containers:
          - name: nginx-demo2
            image: mritd/demo
            ports:
            - containerPort: 80
            volumeMounts:
              - mountPath: "/data"
                name: data
          volumes:
            - name: data
              persistentVolumeClaim:
                claimName: nginx-pvc2
    
    [root@node1 work]# kubectl create -f nginx-deploy2.yml
    deployment "demo2" created
    [root@node1 work]# kubectl get pods -o wide
    NAME                     READY     STATUS              RESTARTS   AGE       IP             NODE
    ceph-mysql-pod           1/1       Running             0          8d        172.30.128.6   node3
    demo2-66fd75bb8d-nwxmr   1/1       Running             0          15s       172.30.128.3   node3
    demo2-66fd75bb8d-wmt7g   0/1       ContainerCreating   0          15s       <none>         node1
    demo2-66fd75bb8d-xh47j   0/1       ContainerCreating   0          15s       <none>         node2
    
    可以看出只创建成功一个pod,符合预期,只有一个节点进行读写,其余节点无法创建成功。
    

    https://github.com/kubernetes/kubernetes/issues/67474
    将配置修改为 ReadOnlyMany 后仍不能多个节点挂载,可能也是bug.

    ceph 常用命令

    • rbd 查看锁

    rbd lock list nginx-image

    • rbd 查看 map情况

    rbd showmapped

    k8s 常用命令

    • 获取pod IP
    [root@node1 work]# kubectl get pods -l app=demo  -o yaml|grep podIP
      podIP: 172.30.128.3
      podIP: 172.30.96.3
      podIP: 172.30.184.2
    
    • 生成service创建配置文件

      kubectl get svc -l app=demo -o yaml

    • 查看 service 描述

      kubectl describe svc demo

    k8s各类端口及IP说明

    端口说明

    targetPort:容器接收流量的端口;port:抽象的 Service 端口,可以使任何其它 Pod 访问该 Service 的端口

    问题

    • rbd: failed to lock image nginx-image (maybe locked by other nodes), error exit status 1
      Error syncing pod
      May 7 10:50:19 node2 kubelet: E0507 10:50:19.891141 27177 kubelet.go:1633] Unable to mount volumes for pod "demo-67bf76f84c-z8kmx_defaul t(3eaf6dab-7072-11e9-b0eb-000c29bda28d)": timeout expired waiting for volumes to attach/mount for pod "default"/"demo-67bf76f84c-z8kmx". li st of unattached/unmounted volumes=[data]; skipping pod May 7 10:50:19 node2 kubelet: E0507 10:50:19.891219 27177 pod_workers.go:182] Error syncing pod 3eaf6dab-7072-11e9-b0eb-000c29bda28d ("d emo-67bf76f84c-z8kmx_default(3eaf6dab-7072-11e9-b0eb-000c29bda28d)"), skipping: timeout expired waiting for volumes to attach/mount for pod "default"/"demo-67bf76f84c-z8kmx". list of unattached/unmounted volumes=[data]
      解决
      每节点执行:
      rbd map nginx-image
      [root@node3 ~]# rbd showmapped
      id pool image       snap device    
      0  rbd  db-image    -    /dev/rbd0 
      1  rbd  nginx-image -    /dev/rbd1 
      

    PV访问模式

    • 访问模式包括:
      • ReadWriteOnce —— 该volume只能被单个节点以读写的方式映射
      • ReadOnlyMany —— 该volume可以被多个节点以只读方式映射
      • ReadWriteMany —— 该volume只能被多个节点以读写的方式映射
    • 状态
      • Available:空闲的资源,未绑定给PVC
      • Bound:绑定给了某个PVC
      • Released:PVC已经删除了,但是PV还没有被集群回收
      • Failed:PV在自动回收中失败了
    • 当前的回收策略有:
      • Retain:手动回收
      • Recycle:需要擦出后才能再使用
      • Delete:相关联的存储资产,如AWS EBS,GCE PD,Azure Disk,or OpenStack Cinder卷都会被删除

    当前,只有NFS和HostPath支持回收利用,AWS EBS,GCE PD,Azure Disk,or OpenStack Cinder卷支持删除操作。

    测试结果

    k8s RBD只能进行单节点读写或多节点读,不能进行多节点读写。测试ceph的rbd发现,多节点读好像有bug,生产使用还是最好能具备一定开发能力。

    参考

    https://mritd.me/2017/06/03/use-ceph-storage-on-kubernetes/

    rbd相关介绍

    PV支持模式

  • 相关阅读:
    品优购项目(web)
    Linux
    web前端面试题
    三级网络
    Vue报错:TypeError: Cannot create property ‘xxx‘ on string ‘xxxx
    vue 动态添加页面背景色
    vue 打开新页面 页面滚动到顶部
    ios 系统 h5 页面不发送请求
    小程序返回上一页
    小程序动态设置页面背景色、
  • 原文地址:https://www.cnblogs.com/bugbeta/p/10831339.html
Copyright © 2011-2022 走看看