zoukankan      html  css  js  c++  java
  • virtiofsd

    virtio-fs介绍

      • 在guest之间共享文件系统的方案

      • virtio-fs把文件mmap进qemu的进程地址空间并让不同guest使用DAX访问该内存空间

      • DAX数据访问和元数据的共享内存访问都是通过共享内存的方式避免不必要的VM/hypervisor之间通信(在元数据没有改变的情况下)

        1. Kata Containers utilizes the Linux kernel DAX (Direct Access filesystem)
          feature to efficiently map some host-side files into the guest VM space.

    参考https://www.cnblogs.com/yi-mu-xi/p/12923523.html

    root@cloud:~# mount | grep fuse
    fusectl on /sys/fs/fuse/connections type fusectl (rw,relatime)
    lxcfs on /var/lib/lxcfs type fuse.lxcfs (rw,nosuid,nodev,relatime,user_id=0,group_id=0,allow_other)
    root@cloud:~# 
    [root@bogon virtio]# ls
    Kconfig          vhost.c               vhost-user.c           vhost-vsock.c      virtio-balloon-pci.c  virtio-crypto.c          virtio-mmio.c     virtio-rng.c
    Makefile.objs    vhost-scsi-pci.c      vhost-user-fs.c        vhost-vsock-pci.c  virtio-blk-pci.c      virtio-crypto-pci.c      virtio-net-pci.c  virtio-rng-pci.c
    trace-events     vhost-stub.c          vhost-user-fs-pci.c    virtio-9p-pci.c    virtio-bus.c          virtio-input-host-pci.c  virtio-pci.c      virtio-scsi-pci.c
    vhost-backend.c  vhost-user-blk-pci.c  vhost-user-scsi-pci.c  virtio-balloon.c   virtio.c              virtio-input-pci.c       virtio-pci.h      virtio-serial-pci.c
    [root@bogon virtio]# 

    原理与架构设计

    virtio-fs方案使用FUSE协议在host和guest之间通信。在host端实现一个fuse server操作host上的文件,然后把guest kernel当作fuse client在guest内挂载fuse,server和client之间使用virtio来做传输层来承载FUSE协议,而不是传统结构上的/dev/fuse设备。为了支持在不同guest中同时mmap(MAP_SHARED)同一个文件,virtio-fs把文件mmap进qemu的进程地址空间并让不同guest使用DAX访问该内存空间,这样就绕过了guest pagecache达到不同guest都访问同一份数据的目的,同时也在多个guest之间共享了内存,节省了内存资源。

    简要架构图: 1.png

    从图中我们可以了解到,virtio-fs主要由以下几个组件组成:

    • guest kernel作为fuse client来挂载host上导出的目录
    • qemu新添加的vhost-user-fs-pci设备用于在guest kernel和virtiofsd之间建立起vhost-user连接
    • virtiofsd(同样在qemu仓库中):host上运行的基于libfuse开发的fuse daemon,用于向guest提供fuse服务
    qemu-system-x86_64 –chardev socket,id=char0,path=/tmp/vhost-fs.socket –device vhost-user-fs-pci,chardev=char0,tag=myfs,cache-size=2G
    vhost-user-fs-pci
    hw/virtio/vhost-user-fs-pci.c:27:#define TYPE_VHOST_USER_FS_PCI "vhost-user-fs-pci-base"
    hw/virtio/vhost-user-fs-pci.c:90:    .non_transitional_name = "vhost-user-fs-pci",
    427 static void vuf_class_init(ObjectClass *klass, void *data)
    428 {
    429     DeviceClass *dc = DEVICE_CLASS(klass);
    430     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
    431 
    432     dc->props = vuf_properties;
    433     set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
    434     vdc->realize = vuf_device_realize;
    435     vdc->unrealize = vuf_device_unrealize;
    436     vdc->get_features = vuf_get_features;
    437     vdc->get_config = vuf_get_config;
    438     vdc->set_status = vuf_set_status;
    439     vdc->guest_notifier_mask = vuf_guest_notifier_mask;
    440     vdc->guest_notifier_pending = vuf_guest_notifier_pending;
    441 }
     
     88 static const VirtioPCIDeviceTypeInfo vhost_user_fs_pci_info = {
     89     .base_name             = TYPE_VHOST_USER_FS_PCI,
     90     .non_transitional_name = "vhost-user-fs-pci",
     91     .instance_size = sizeof(VHostUserFSPCI),
     92     .instance_init = vhost_user_fs_pci_instance_init,
     93     .class_init    = vhost_user_fs_pci_class_init,
     94 };
    vhost_user_fs_pci_register
     96 static void vhost_user_fs_pci_register(void)
     97 {
     98     virtio_pci_types_register(&vhost_user_fs_pci_info);
     99 }
    100 
    101 type_init(vhost_user_fs_pci_register);
    hw/virtio/vhost-user-fs-pci.c:15:#include "hw/virtio/vhost-user-fs.h"
    hw/virtio/vhost-user-fs-pci.c:27:#define TYPE_VHOST_USER_FS_PCI "vhost-user-fs-pci-base"
    hw/virtio/vhost-user-fs-pci.c:90:    .non_transitional_name = "vhost-user-fs-pci",
    hw/virtio/vhost-user-fs.c:21:#include "hw/virtio/vhost-user-fs.h"
    hw/virtio/vhost-user-fs.c:367:    virtio_init(vdev, "vhost-user-fs", VIRTIO_ID_FS,
    hw/virtio/vhost-user.c:15:#include "hw/virtio/vhost-user-fs.h"
    vuf_device_realize
    303 static void vuf_device_realize(DeviceState *dev, Error **errp)
    304 {
    367     virtio_init(vdev, "vhost-user-fs", VIRTIO_ID_FS,
    368                 sizeof(struct virtio_fs_config));
    }
    427 static void vuf_class_init(ObjectClass *klass, void *data)
    428 {
    429     DeviceClass *dc = DEVICE_CLASS(klass);
    430     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
    431 
    432     dc->props = vuf_properties;
    433     set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
    434     vdc->realize = vuf_device_realize;
    435     vdc->unrealize = vuf_device_unrealize;
    436     vdc->get_features = vuf_get_features;
    437     vdc->get_config = vuf_get_config;
    438     vdc->set_status = vuf_set_status;
    439     vdc->guest_notifier_mask = vuf_guest_notifier_mask;
    440     vdc->guest_notifier_pending = vuf_guest_notifier_pending;
    441 }
    vuf_device_realize
        vhost_user_init
    2190 int main(int argc, char *argv[])
    2191 {
    
    
       2276     se = fuse_session_new(&args, &lo_oper, sizeof(lo_oper), &lo);
    2277     if (se == NULL)
    2278         goto err_out1;
    2279 
    2280     if (fuse_set_signal_handlers(se) != 0)
    2281         goto err_out2;
    2282 
    2283     if (fuse_session_mount(se) != 0)
    2284         goto err_out3;
    
    
    }
    2645 int fuse_session_mount(struct fuse_session *se)
    2646 {
    2647         return virtio_session_mount(se);
    2648 }
    721 
    722 int virtio_session_mount(struct fuse_session *se)
    723 {
    724         struct sockaddr_un un;
    725 
    726         if (strlen(se->vu_socket_path) >= sizeof(un.sun_path)) {
    727                 fprintf(stderr, "Socket path too long
    ");
    728                 return -1;
    729         }
    730 
    731         /* Poison the fuse FD so we spot if we accidentally use it;
    732          * DO NOT check for this value, check for se->vu_socket_path
    733          */
    734         se->fd = 0xdaff0d11;
    735 
    736         /* Create the Unix socket to communicate with qemu
    737          * based on QEMU's vhost-user-bridge
    738          */
    739         unlink(se->vu_socket_path);
    740         strcpy(un.sun_path, se->vu_socket_path);
    741         size_t addr_len = sizeof(un.sun_family) + strlen(se->vu_socket_path);
    742 
    743         int listen_sock = socket(AF_UNIX, SOCK_STREAM, 0);
    744         if (listen_sock == -1) {
    745                perror("vhost socket creation");
    746                return -1;
    747         }
    748         un.sun_family = AF_UNIX;
    749 
    750         if (bind(listen_sock, (struct sockaddr *) &un, addr_len) == -1) {
    751                 perror("vhost socket bind");
    752                 return -1;
    753         }
    754 
    755         if (listen(listen_sock, 1) == -1) {
    756                 perror("vhost socket listen");
    757                 return -1;
    758         }
    759 
    760         fprintf(stderr, "%s: Waiting for vhost-user socket connection...
    ", __func__);
    761         int data_sock = accept(listen_sock, NULL, NULL);
    762         if (data_sock == -1) {
    763                 perror("vhost socket accept");
    764                 close(listen_sock);
    765                 return -1;
    766         }
    767         close(listen_sock);
    768         fprintf(stderr, "%s: Received vhost-user socket connection
    ", __func__);
    769         se->vu_socketfd = data_sock;
    
    770 
    771         /* TODO: Some cleanup/deallocation! */
    772         se->virtio_dev = calloc(sizeof(struct fv_VuDev), 1);
    773         se->virtio_dev->se = se;
    774         vu_init(&se->virtio_dev->dev, se->vu_socketfd,
    775                 fv_panic,
    776                 fv_set_watch, fv_remove_watch,
    777                 &fv_iface);
    778 
    779         return 0;
    780 }
  • 相关阅读:
    【Leetcode】92. Reverse Linked List II && 206. Reverse Linked List
    【Leetcode】91. Decode Ways
    记一次面经
    涨知识
    B-Tree 漫谈 (从二叉树到二叉搜索树到平衡树到红黑树到B树到B+树到B*树)
    涨知识
    HDU 1754 I Hate It 【线段树单点修改 维护区间最大值】
    POJ 1632 Vase collection【状态压缩+搜索】
    POJ 1011 Sticks 【DFS 剪枝】
    POJ 1088 滑雪 【记忆化搜索经典】
  • 原文地址:https://www.cnblogs.com/dream397/p/13867752.html
Copyright © 2011-2022 走看看