virtio-fs介绍
-
在guest之间共享文件系统的方案
-
virtio-fs把文件mmap进qemu的进程地址空间并让不同guest使用DAX访问该内存空间
-
DAX数据访问和元数据的共享内存访问都是通过共享内存的方式避免不必要的VM/hypervisor之间通信(在元数据没有改变的情况下)
- Kata Containers utilizes the Linux kernel DAX (Direct Access filesystem)
feature to efficiently map some host-side files into the guest VM space.
- Kata Containers utilizes the Linux kernel DAX (Direct Access filesystem)
参考https://www.cnblogs.com/yi-mu-xi/p/12923523.html
root@cloud:~# mount | grep fuse fusectl on /sys/fs/fuse/connections type fusectl (rw,relatime) lxcfs on /var/lib/lxcfs type fuse.lxcfs (rw,nosuid,nodev,relatime,user_id=0,group_id=0,allow_other) root@cloud:~#
[root@bogon virtio]# ls Kconfig vhost.c vhost-user.c vhost-vsock.c virtio-balloon-pci.c virtio-crypto.c virtio-mmio.c virtio-rng.c Makefile.objs vhost-scsi-pci.c vhost-user-fs.c vhost-vsock-pci.c virtio-blk-pci.c virtio-crypto-pci.c virtio-net-pci.c virtio-rng-pci.c trace-events vhost-stub.c vhost-user-fs-pci.c virtio-9p-pci.c virtio-bus.c virtio-input-host-pci.c virtio-pci.c virtio-scsi-pci.c vhost-backend.c vhost-user-blk-pci.c vhost-user-scsi-pci.c virtio-balloon.c virtio.c virtio-input-pci.c virtio-pci.h virtio-serial-pci.c [root@bogon virtio]#
原理与架构设计
virtio-fs方案使用FUSE协议在host和guest之间通信。在host端实现一个fuse server操作host上的文件,然后把guest kernel当作fuse client在guest内挂载fuse,server和client之间使用virtio来做传输层来承载FUSE协议,而不是传统结构上的/dev/fuse设备。为了支持在不同guest中同时mmap(MAP_SHARED)同一个文件,virtio-fs把文件mmap进qemu的进程地址空间并让不同guest使用DAX访问该内存空间,这样就绕过了guest pagecache达到不同guest都访问同一份数据的目的,同时也在多个guest之间共享了内存,节省了内存资源。
简要架构图:
从图中我们可以了解到,virtio-fs主要由以下几个组件组成:
- guest kernel:作为fuse client来挂载host上导出的目录
- qemu:新添加的vhost-user-fs-pci设备用于在guest kernel和virtiofsd之间建立起vhost-user连接
- virtiofsd(同样在qemu仓库中):host上运行的基于libfuse开发的fuse daemon,用于向guest提供fuse服务
qemu-system-x86_64 –chardev socket,id=char0,path=/tmp/vhost-fs.socket –device vhost-user-fs-pci,chardev=char0,tag=myfs,cache-size=2G …
vhost-user-fs-pci
hw/virtio/vhost-user-fs-pci.c:27:#define TYPE_VHOST_USER_FS_PCI "vhost-user-fs-pci-base" hw/virtio/vhost-user-fs-pci.c:90: .non_transitional_name = "vhost-user-fs-pci",
427 static void vuf_class_init(ObjectClass *klass, void *data) 428 { 429 DeviceClass *dc = DEVICE_CLASS(klass); 430 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); 431 432 dc->props = vuf_properties; 433 set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); 434 vdc->realize = vuf_device_realize; 435 vdc->unrealize = vuf_device_unrealize; 436 vdc->get_features = vuf_get_features; 437 vdc->get_config = vuf_get_config; 438 vdc->set_status = vuf_set_status; 439 vdc->guest_notifier_mask = vuf_guest_notifier_mask; 440 vdc->guest_notifier_pending = vuf_guest_notifier_pending; 441 }
88 static const VirtioPCIDeviceTypeInfo vhost_user_fs_pci_info = { 89 .base_name = TYPE_VHOST_USER_FS_PCI, 90 .non_transitional_name = "vhost-user-fs-pci", 91 .instance_size = sizeof(VHostUserFSPCI), 92 .instance_init = vhost_user_fs_pci_instance_init, 93 .class_init = vhost_user_fs_pci_class_init, 94 };
vhost_user_fs_pci_register
96 static void vhost_user_fs_pci_register(void) 97 { 98 virtio_pci_types_register(&vhost_user_fs_pci_info); 99 } 100 101 type_init(vhost_user_fs_pci_register);
hw/virtio/vhost-user-fs-pci.c:15:#include "hw/virtio/vhost-user-fs.h" hw/virtio/vhost-user-fs-pci.c:27:#define TYPE_VHOST_USER_FS_PCI "vhost-user-fs-pci-base" hw/virtio/vhost-user-fs-pci.c:90: .non_transitional_name = "vhost-user-fs-pci", hw/virtio/vhost-user-fs.c:21:#include "hw/virtio/vhost-user-fs.h" hw/virtio/vhost-user-fs.c:367: virtio_init(vdev, "vhost-user-fs", VIRTIO_ID_FS, hw/virtio/vhost-user.c:15:#include "hw/virtio/vhost-user-fs.h"
vuf_device_realize
303 static void vuf_device_realize(DeviceState *dev, Error **errp) 304 { 367 virtio_init(vdev, "vhost-user-fs", VIRTIO_ID_FS, 368 sizeof(struct virtio_fs_config)); }
427 static void vuf_class_init(ObjectClass *klass, void *data) 428 { 429 DeviceClass *dc = DEVICE_CLASS(klass); 430 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); 431 432 dc->props = vuf_properties; 433 set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); 434 vdc->realize = vuf_device_realize; 435 vdc->unrealize = vuf_device_unrealize; 436 vdc->get_features = vuf_get_features; 437 vdc->get_config = vuf_get_config; 438 vdc->set_status = vuf_set_status; 439 vdc->guest_notifier_mask = vuf_guest_notifier_mask; 440 vdc->guest_notifier_pending = vuf_guest_notifier_pending; 441 }
vuf_device_realize
vhost_user_init
2190 int main(int argc, char *argv[]) 2191 { 2276 se = fuse_session_new(&args, &lo_oper, sizeof(lo_oper), &lo); 2277 if (se == NULL) 2278 goto err_out1; 2279 2280 if (fuse_set_signal_handlers(se) != 0) 2281 goto err_out2; 2282 2283 if (fuse_session_mount(se) != 0) 2284 goto err_out3; }
2645 int fuse_session_mount(struct fuse_session *se) 2646 { 2647 return virtio_session_mount(se); 2648 }
721 722 int virtio_session_mount(struct fuse_session *se) 723 { 724 struct sockaddr_un un; 725 726 if (strlen(se->vu_socket_path) >= sizeof(un.sun_path)) { 727 fprintf(stderr, "Socket path too long "); 728 return -1; 729 } 730 731 /* Poison the fuse FD so we spot if we accidentally use it; 732 * DO NOT check for this value, check for se->vu_socket_path 733 */ 734 se->fd = 0xdaff0d11; 735 736 /* Create the Unix socket to communicate with qemu 737 * based on QEMU's vhost-user-bridge 738 */ 739 unlink(se->vu_socket_path); 740 strcpy(un.sun_path, se->vu_socket_path); 741 size_t addr_len = sizeof(un.sun_family) + strlen(se->vu_socket_path); 742 743 int listen_sock = socket(AF_UNIX, SOCK_STREAM, 0); 744 if (listen_sock == -1) { 745 perror("vhost socket creation"); 746 return -1; 747 } 748 un.sun_family = AF_UNIX; 749 750 if (bind(listen_sock, (struct sockaddr *) &un, addr_len) == -1) { 751 perror("vhost socket bind"); 752 return -1; 753 } 754 755 if (listen(listen_sock, 1) == -1) { 756 perror("vhost socket listen"); 757 return -1; 758 } 759 760 fprintf(stderr, "%s: Waiting for vhost-user socket connection... ", __func__); 761 int data_sock = accept(listen_sock, NULL, NULL); 762 if (data_sock == -1) { 763 perror("vhost socket accept"); 764 close(listen_sock); 765 return -1; 766 } 767 close(listen_sock); 768 fprintf(stderr, "%s: Received vhost-user socket connection ", __func__); 769 se->vu_socketfd = data_sock; 770 771 /* TODO: Some cleanup/deallocation! */ 772 se->virtio_dev = calloc(sizeof(struct fv_VuDev), 1); 773 se->virtio_dev->se = se; 774 vu_init(&se->virtio_dev->dev, se->vu_socketfd, 775 fv_panic, 776 fv_set_watch, fv_remove_watch, 777 &fv_iface); 778 779 return 0; 780 }