zoukankan      html  css  js  c++  java
  • 网络虚拟化Virtio-net

    网络虚拟化有和存储虚拟化类似的地方,例如它们都是基于virtio 的,因而在看网络虚拟化的过程中,会看到和存储虚拟化很像的数据结构和原理。但是网络虚拟化也有自己的特殊性。例如,存储虚拟化是将宿主机上的文件作为客户机上的硬盘,而网络虚拟化需要依赖于内核协议栈进行网络包的封装与解封装。那怎么实现客户机和宿主机之间的互通呢?就来看一看解析初始化的过程。还是从Virtio Network Device这个设备的初始化讲起,如下所示:

    static const TypeInfo device_type_info = {
        .name = TYPE_DEVICE,
        .parent = TYPE_OBJECT,
        .instance_size = sizeof(DeviceState),
        .instance_init = device_initfn,
        .instance_post_init = device_post_init,
        .instance_finalize = device_finalize,
        .class_base_init = device_class_base_init,
        .class_init = device_class_init,
        .abstract = true,
        .class_size = sizeof(DeviceClass),
    };
     
    static const TypeInfo virtio_device_info = {
        .name = TYPE_VIRTIO_DEVICE,
        .parent = TYPE_DEVICE,
        .instance_size = sizeof(VirtIODevice),
        .class_init = virtio_device_class_init,
        .instance_finalize = virtio_device_instance_finalize,
        .abstract = true,
        .class_size = sizeof(VirtioDeviceClass),
    };
     
    static const TypeInfo virtio_net_info = {
        .name = TYPE_VIRTIO_NET,
        .parent = TYPE_VIRTIO_DEVICE,
        .instance_size = sizeof(VirtIONet),
        .instance_init = virtio_net_instance_init,
        .class_init = virtio_net_class_init,
    };
     
    static void virtio_register_types(void)
    {
        type_register_static(&virtio_net_info);
    }
     
    type_init(virtio_register_types)

    Virtio Network Device这种类的定义是有多层继承关系的,TYPE_VIRTIO_NET的父类是TYPE_VIRTIO_DEVICE,TYPE_VIRTIO_DEVICE的父类是TYPE_DEVICE,TYPE_DEVICE的父类是TYPE_OBJECT,继承关系就到头了。type_init用于注册这种类,这里面每一层都有class_init,用于从TypeImpl生成xxxClass,也有instance_init,会将xxxClass初始化为实例。TYPE_VIRTIO_NET层的class_init函数是virtio_net_class_init,它定义了DeviceClass的realize函数为virtio_net_device_realize,这一点和存储块设备是一样的,如下所示:

    static void virtio_net_device_realize(DeviceState *dev, Error **errp)
    {
        VirtIODevice *vdev = VIRTIO_DEVICE(dev);
        VirtIONet *n = VIRTIO_NET(dev);
        NetClientState *nc;
        int i;
    ......
        virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size);
     
        /*
         * We set a lower limit on RX queue size to what it always was.
         * Guests that want a smaller ring can always resize it without
         * help from us (using virtio 1 and up).
         */
        if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
            n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
            !is_power_of_2(n->net_conf.rx_queue_size)) {
    ......
            return;
        }
     
        if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
            n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE ||
            !is_power_of_2(n->net_conf.tx_queue_size)) {
    ......
            return;
        }
     
        n->max_queues = MAX(n->nic_conf.peers.queues, 1);
        if (n->max_queues * 2 + 1 > VIRTIO_QUEUE_MAX) {
    ......
            return;
        }
        n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queues);
        n->curr_queues = 1;
    ......
        n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
                                        n->net_conf.tx_queue_size);
     
        for (i = 0; i < n->max_queues; i++) {
            virtio_net_add_queue(n, i);
        }
     
        n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
        qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
        memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
        n->status = VIRTIO_NET_S_LINK_UP;
     
        if (n->netclient_type) {
            n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
                                  n->netclient_type, n->netclient_name, n);
        } else {
            n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
                                  object_get_typename(OBJECT(dev)), dev->id, n);
        }
    ......
    }
    static void virtio_net_add_queue(VirtIONet *n, int index)
    {
        VirtIODevice *vdev = VIRTIO_DEVICE(n);
    
        n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
                                               virtio_net_handle_rx);
    
        if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
            n->vqs[index].tx_vq =
                virtio_add_queue(vdev, n->net_conf.tx_queue_size,
                                 virtio_net_handle_tx_timer);
            n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
                                                  virtio_net_tx_timer,
                                                  &n->vqs[index]);
        } else {
            n->vqs[index].tx_vq =
                virtio_add_queue(vdev, n->net_conf.tx_queue_size,
                                 virtio_net_handle_tx_bh);
            n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
        }
    
        n->vqs[index].tx_waiting = 0;
        n->vqs[index].n = n;
    }
    static void virtio_net_add_queue(VirtIONet *n, int index)
    {
        VirtIODevice *vdev = VIRTIO_DEVICE(n);
    
        n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
                                               virtio_net_handle_rx);
    
        if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
            n->vqs[index].tx_vq =
                virtio_add_queue(vdev, n->net_conf.tx_queue_size,
                                 virtio_net_handle_tx_timer);
            n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
                                                  virtio_net_tx_timer,
                                                  &n->vqs[index]);
        } else {
            n->vqs[index].tx_vq =
                virtio_add_queue(vdev, n->net_conf.tx_queue_size,
                                 virtio_net_handle_tx_bh);
            n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
        }
    
        n->vqs[index].tx_waiting = 0;
        n->vqs[index].n = n;
    }

    这里面创建了一个VirtIODevice,这一点和存储虚拟化也是一样的。virtio_init用来初始化这个设备。VirtIODevice结构里面有一个VirtQueue数组,这就是virtio前端和后端互相传数据的队列,最多有VIRTIO_QUEUE_MAX个。

    刚才说的都是一样的地方,其实也有不一样的地方。会发现这里面有这样的语句n->max_queues * 2 + 1 > VIRTIO_QUEUE_MAX。为什么要乘以2呢?这是因为对于网络设备来讲,应该分发送队列和接收队列两个方向。接下来调用virtio_net_add_queue来初始化队列,可以看出这里面就有发送tx_vq和接收rx_vq两个队列,如下所示:

    typedef struct VirtIONetQueue {
        VirtQueue *rx_vq;
        VirtQueue *tx_vq;
        QEMUTimer *tx_timer;
        QEMUBH *tx_bh;
        uint32_t tx_waiting;
        struct {
            VirtQueueElement *elem;
        } async_tx;
        struct VirtIONet *n;
    } VirtIONetQueue;
     
    static void virtio_net_add_queue(VirtIONet *n, int index)
    {
        VirtIODevice *vdev = VIRTIO_DEVICE(n);
     
        n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size, virtio_net_handle_rx);
     
    ......
     
        n->vqs[index].tx_vq = virtio_add_queue(vdev, n->net_conf.tx_queue_size, virtio_net_handle_tx_bh);
        n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
        n->vqs[index].n = n;

    每个VirtQueue中,都有一个vring用来维护这个队列里面的数据;另外还有函数virtio_net_handle_rx用于处理网络包的接收;函数virtio_net_handle_tx_bh用于网络包的发送,这个函数后面会用到。接下来,qemu_new_nic会创建一个虚拟机里面的网卡,如下所示

    NICState *qemu_new_nic(NetClientInfo *info,
                           NICConf *conf,
                           const char *model,
                           const char *name,
                           void *opaque)
    {
        NetClientState **peers = conf->peers.ncs;
        NICState *nic;
        int i, queues = MAX(1, conf->peers.queues);
    ......
        nic = g_malloc0(info->size + sizeof(NetClientState) * queues);
        nic->ncs = (void *)nic + info->size;
        nic->conf = conf;
        nic->opaque = opaque;
     
        for (i = 0; i < queues; i++) {
            qemu_net_client_setup(&nic->ncs[i], info, peers[i], model, name, NULL);
            nic->ncs[i].queue_index = i;
        }
     
        return nic;
    }
     
    static void qemu_net_client_setup(NetClientState *nc,
                                      NetClientInfo *info,
                                      NetClientState *peer,
                                      const char *model,
                                      const char *name,
                                      NetClientDestructor *destructor)
    {
        nc->info = info;
        nc->model = g_strdup(model);
        if (name) {
            nc->name = g_strdup(name);
        } else {
            nc->name = assign_name(nc, model);
        }
     
        QTAILQ_INSERT_TAIL(&net_clients, nc, next);
     
        nc->incoming_queue = qemu_new_net_queue(qemu_deliver_packet_iov, nc);
        nc->destructor = destructor;
        QTAILQ_INIT(&nc->filters);
    }

    初始化过程解析完毕以后,接下来从qemu的启动过程看起。对于网卡的虚拟化,qemu的启动参数里面有关的是下面两行

    -netdev tap,fd=32,id=hostnet0,vhost=on,vhostfd=37
    -device virtio-net-pci,netdev=hostnet0,id=net0,mac=fa:16:3e:d1:2d:99,bus=pci.0,addr=0x3

    qemu的main函数会调用net_init_clients进行网络设备的初始化,可以解析net参数,也可以解析netdev参数,如下所示:

    int net_init_clients(Error **errp)
    {
        QTAILQ_INIT(&net_clients);
        if (qemu_opts_foreach(qemu_find_opts("netdev"),
                              net_init_netdev, NULL, errp)) {
            return -1;
        }
        if (qemu_opts_foreach(qemu_find_opts("nic"), net_param_nic, NULL, errp)) {
            return -1;
       }
        if (qemu_opts_foreach(qemu_find_opts("net"), net_init_client, NULL, errp)) {
            return -1;
        }
        return 0;
    }

    net_init_clients会解析参数。上面的参数netdev会调用net_init_netdev->net_client_init->net_client_init1。net_client_init1会根据不同的driver类型,调用不同的初始化函数,如下所示:

    static int (* const net_client_init_fun[NET_CLIENT_DRIVER__MAX])(
        const Netdev *netdev,
        const char *name,
        NetClientState *peer, Error **errp) = {
            [NET_CLIENT_DRIVER_NIC]       = net_init_nic,
            [NET_CLIENT_DRIVER_TAP]       = net_init_tap,
            [NET_CLIENT_DRIVER_SOCKET]    = net_init_socket,
            [NET_CLIENT_DRIVER_HUBPORT]   = net_init_hubport,
    ......
    };

    由于配置的driver类型是tap,因而这里会调用net_init_tap->net_tap_init->tap_open,如下所示:

    #define PATH_NET_TUN "/dev/net/tun"
     
    int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
                 int vnet_hdr_required, int mq_required, Error **errp)
    {
        struct ifreq ifr;
        int fd, ret;
        int len = sizeof(struct virtio_net_hdr);
        unsigned int features;
     
        TFR(fd = open(PATH_NET_TUN, O_RDWR));
        memset(&ifr, 0, sizeof(ifr));
        ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
     
        if (ioctl(fd, TUNGETFEATURES, &features) == -1) {
            features = 0;
        }
     
        if (features & IFF_ONE_QUEUE) {
            ifr.ifr_flags |= IFF_ONE_QUEUE;
        }
     
        if (*vnet_hdr) {
            if (features & IFF_VNET_HDR) {
                *vnet_hdr = 1;
                ifr.ifr_flags |= IFF_VNET_HDR;
            } else {
                *vnet_hdr = 0;
            }
            ioctl(fd, TUNSETVNETHDRSZ, &len);
        }
    ......
        ret = ioctl(fd, TUNSETIFF, (void *) &ifr);
    ......
        fcntl(fd, F_SETFL, O_NONBLOCK);
        return fd;

    在tap_open中打开一个文件"/dev/net/tun",然后通过ioctl操作这个文件。这是Linux内核的一项机制,和KVM机制很像,其实这就是一种通过打开这个字符设备文件,然后通过ioctl操作这个文件和内核打交道,来使用内核的这么一种能力,如下图所示:

    为什么需要使用内核的机制呢?因为网络包需要从虚拟机里面发送到虚拟机外面,发送到宿主机上的时候,必须是一个正常的网络包才能被转发。要形成一个网络包,那就需要经过复杂的协议栈。客户机会将网络包发送给qemu,qemu自己没有网络协议栈,现去实现一个也不可能,太复杂了,于是它就要借助内核的力量。qemu会将客户机发送给它的网络包转换成为文件流,写入"/dev/net/tun"字符设备,就像写一个文件一样。内核中TUN/TAP字符设备驱动会收到这个写入的文件流,然后交给TUN/TAP的虚拟网卡驱动,这个驱动会将文件流再次转成网络包,交给TCP/IP栈,最终从虚拟TAP网卡tap0发出来,成为标准的网络包。后面会看到这个过程。

    现在到内核里面,看一看打开"/dev/net/tun"字符设备后,内核会发生什么事情。内核的实现在drivers/net/tun.c文件中,这是一个字符设备驱动程序,应该符合字符设备的格式,如下所示:

    module_init(tun_init);
    module_exit(tun_cleanup);
    MODULE_DESCRIPTION(DRV_DESCRIPTION);
    MODULE_AUTHOR(DRV_COPYRIGHT);
    MODULE_LICENSE("GPL");
    MODULE_ALIAS_MISCDEV(TUN_MINOR);
    MODULE_ALIAS("devname:net/tun");
     
    static int __init tun_init(void)
    {
    ......
      ret = rtnl_link_register(&tun_link_ops);
    ......
      ret = misc_register(&tun_miscdev);
    ......
      ret = register_netdevice_notifier(&tun_notifier_block);
    ......
    }

    这里面注册了一个tun_miscdev字符设备,从它的定义可以看出,这就是"/dev/net/tun"字符设备,如下所示:

    static struct miscdevice tun_miscdev = {
      .minor = TUN_MINOR,
      .name = "tun",
      .nodename = "net/tun",
      .fops = &tun_fops,
    };
     
    static const struct file_operations tun_fops = {
      .owner  = THIS_MODULE,
      .llseek = no_llseek,
      .read_iter  = tun_chr_read_iter,
      .write_iter = tun_chr_write_iter,
      .poll  = tun_chr_poll,
      .unlocked_ioctl  = tun_chr_ioctl,
      .open  = tun_chr_open,
      .release = tun_chr_close,
      .fasync = tun_chr_fasync,
    };

    qemu的tap_open函数会打开这个字符设备PATH_NET_TUN。打开字符设备的过程这里不再重复,总之到了驱动这一层,调用的是tun_chr_open,如下所示:

    static int tun_chr_open(struct inode *inode, struct file * file)
    {
      struct tun_file *tfile;
      tfile = (struct tun_file *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL,
                  &tun_proto, 0);
      RCU_INIT_POINTER(tfile->tun, NULL);
      tfile->flags = 0;
      tfile->ifindex = 0;
     
      init_waitqueue_head(&tfile->wq.wait);
      RCU_INIT_POINTER(tfile->socket.wq, &tfile->wq);
     
      tfile->socket.file = file;
      tfile->socket.ops = &tun_socket_ops;
     
      sock_init_data(&tfile->socket, &tfile->sk);
     
      tfile->sk.sk_write_space = tun_sock_write_space;
      tfile->sk.sk_sndbuf = INT_MAX;
     
      file->private_data = tfile;
      INIT_LIST_HEAD(&tfile->next);
     
      sock_set_flag(&tfile->sk, SOCK_ZEROCOPY);
     
      return 0;
    }

    在tun_chr_open的参数里面有一个struct file,它代表的就是打开的字符设备文件"/dev/net/tun",因而往这个字符设备文件中写数据,就会通过这个struct file写入。这个struct file里面的file_operations,按照字符设备打开的规则,指向的就是tun_fops。另外还需要在tun_chr_open创建一个结构struct tun_file,并且将struct file的private_data指向它,如下所示:

    /* A tun_file connects an open character device to a tuntap netdevice. It
     * also contains all socket related structures 
     * to serve as one transmit queue for tuntap device. 
     */
    struct tun_file {
      struct sock sk;
      struct socket socket;
      struct socket_wq wq;
      struct tun_struct __rcu *tun;
      struct fasync_struct *fasync;
      /* only used for fasnyc */
      unsigned int flags;
      union {
        u16 queue_index;
        unsigned int ifindex;
      };
      struct list_head next;
      struct tun_struct *detached;
      struct skb_array tx_array;
    };
     
    struct tun_struct {
      struct tun_file __rcu  *tfiles[MAX_TAP_QUEUES];
      unsigned int            numqueues;
      unsigned int     flags;
      kuid_t      owner;
      kgid_t      group;
     
      struct net_device  *dev;
      netdev_features_t  set_features;
      int      align;
      int      vnet_hdr_sz;
      int      sndbuf;
      struct tap_filter  txflt;
      struct sock_fprog  fprog;
      /* protected by rtnl lock */
      bool      filter_attached;
      spinlock_t lock;
      struct hlist_head flows[TUN_NUM_FLOW_ENTRIES];
      struct timer_list flow_gc_timer;
      unsigned long ageing_time;
      unsigned int numdisabled;
      struct list_head disabled;
      void *security;
      u32 flow_count;
      u32 rx_batched;
      struct tun_pcpu_stats __percpu *pcpu_stats;
    };
     
    static const struct proto_ops tun_socket_ops = {
      .peek_len = tun_peek_len,
      .sendmsg = tun_sendmsg,
      .recvmsg = tun_recvmsg,
    }

    在struct tun_file中有一个成员struct tun_struct,它里面有一个struct net_device,这个用来表示宿主机上的tuntap网络设备。在struct tun_file中,还有struct socket和struct sock,因为要用到内核的网络协议栈,所以就需要这两个结构,这在以前网络协议部分已经分析过了。所以按照struct tun_file的注释所说,这是一个很重要的数据结构,"/dev/net/tun"对应的struct file的private_data指向它,因而可以接收qemu发过来的数据。除此之外,它还可以通过struct sock来操作内核协议栈,然后将网络包从宿主机上的tuntap网络设备发出去,宿主机上的tuntap网络设备对应的struct net_device也归它管。

    32. 在qemu的tap_open函数中,打开这个字符设备文件之后,接下来要做的事情是,通过ioctl来设置宿主机的网卡 TUNSETIFF。接下来,ioctl到了内核里面会调用tun_chr_ioctl,如下所示:

    static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
              unsigned long arg, int ifreq_len)
    {
      struct tun_file *tfile = file->private_data;
      struct tun_struct *tun;
      void __user* argp = (void __user*)arg;
      struct ifreq ifr;
      kuid_t owner;
      kgid_t group;
      int sndbuf;
      int vnet_hdr_sz;
      unsigned int ifindex;
      int le;
      int ret;
     
      if (cmd == TUNSETIFF || cmd == TUNSETQUEUE || _IOC_TYPE(cmd) == SOCK_IOC_TYPE) {
        if (copy_from_user(&ifr, argp, ifreq_len))
          return -EFAULT;
      } 
    ......
      tun = __tun_get(tfile);
      if (cmd == TUNSETIFF) {
        ifr.ifr_name[IFNAMSIZ-1] = '';
        ret = tun_set_iff(sock_net(&tfile->sk), file, &ifr);
    ......
        if (copy_to_user(argp, &ifr, ifreq_len))
          ret = -EFAULT;
      }
    ......

    在__tun_chr_ioctl中,首先通过copy_from_user把配置从用户态拷贝到内核态,调用tun_set_iff设置tuntap网络设备,然后调用copy_to_user将配置结果返回。tun_set_iff的实现如下所示:

    static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
    {
      struct tun_struct *tun;
      struct tun_file *tfile = file->private_data;
      struct net_device *dev;
    ......
      char *name;
      unsigned long flags = 0;
      int queues = ifr->ifr_flags & IFF_MULTI_QUEUE ?
               MAX_TAP_QUEUES : 1;
     
      if (ifr->ifr_flags & IFF_TUN) {
        /* TUN device */
        flags |= IFF_TUN;
        name = "tun%d";
      } else if (ifr->ifr_flags & IFF_TAP) {
        /* TAP device */
        flags |= IFF_TAP;
        name = "tap%d";
      } else
        return -EINVAL;
     
      if (*ifr->ifr_name)
        name = ifr->ifr_name;
     
      dev = alloc_netdev_mqs(sizeof(struct tun_struct), name,
                   NET_NAME_UNKNOWN, tun_setup, queues,
                   queues);
     
      err = dev_get_valid_name(net, dev, name);
      dev_net_set(dev, net);
      dev->rtnl_link_ops = &tun_link_ops;
      dev->ifindex = tfile->ifindex;
      dev->sysfs_groups[0] = &tun_attr_group;
     
      tun = netdev_priv(dev);
      tun->dev = dev;
      tun->flags = flags;
      tun->txflt.count = 0;
      tun->vnet_hdr_sz = sizeof(struct virtio_net_hdr);
     
      tun->align = NET_SKB_PAD;
      tun->filter_attached = false;
      tun->sndbuf = tfile->socket.sk->sk_sndbuf;
      tun->rx_batched = 0;
     
      tun_net_init(dev);
      tun_flow_init(tun);
     
      err = tun_attach(tun, file, false);
      err = register_netdevice(tun->dev);
     
      netif_carrier_on(tun->dev);
     
      if (netif_running(tun->dev))
        netif_tx_wake_all_queues(tun->dev);
     
      strcpy(ifr->ifr_name, tun->dev->name);
      return 0;
    }

    tun_set_iff创建了struct tun_struct和struct net_device,并且将这个tuntap网络设备通过register_netdevice注册到内核中,这样就能在宿主机上通过ip addr看到这个网卡了,如下图所示:

     33. 下面来解析关联前端设备驱动和后端设备驱动的过程。来看在客户机中发送一个网络包的时候,会发生哪些事情。虚拟机里面的进程发送一个网络包,通过文件系统和Socket调用网络协议栈到达网络设备层,只不过这个不是普通的网络设备,而是virtio_net的驱动。virtio_net的驱动程序代码在Linux操作系统的源代码里面,文件名为drivers/net/virtio_net.c,如下所示:

    static __init int virtio_net_driver_init(void)
    {
        ret = register_virtio_driver(&virtio_net_driver);
    ......
    }
    module_init(virtio_net_driver_init);
    module_exit(virtio_net_driver_exit);
     
    MODULE_DEVICE_TABLE(virtio, id_table);
    MODULE_DESCRIPTION("Virtio network driver");
    MODULE_LICENSE("GPL");
     
    static struct virtio_driver virtio_net_driver = {
      .driver.name =  KBUILD_MODNAME,
      .driver.owner =  THIS_MODULE,
      .id_table =  id_table,
      .validate =  virtnet_validate,
      .probe =  virtnet_probe,
      .remove =  virtnet_remove,
      .config_changed = virtnet_config_changed,
    ......
    };

    在virtio_net的驱动程序的初始化代码中,需要注册一个驱动函数virtio_net_driver。当一个设备驱动作为一个内核模块被初始化的时候,probe函数会被调用,因而来看一下virtnet_probe:

    static int virtnet_probe(struct virtio_device *vdev)
    {
      int i, err;
      struct net_device *dev;
      struct virtnet_info *vi;
      u16 max_queue_pairs;
      int mtu;
     
      /* Allocate ourselves a network device with room for our info */
      dev = alloc_etherdev_mq(sizeof(struct virtnet_info), max_queue_pairs);
     
      /* Set up network device as normal. */
      dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE;
      dev->netdev_ops = &virtnet_netdev;
      dev->features = NETIF_F_HIGHDMA;
     
      dev->ethtool_ops = &virtnet_ethtool_ops;
      SET_NETDEV_DEV(dev, &vdev->dev);
    ......
      /* MTU range: 68 - 65535 */
      dev->min_mtu = MIN_MTU;
      dev->max_mtu = MAX_MTU;
     
      /* Set up our device-specific information */
      vi = netdev_priv(dev);
      vi->dev = dev;
      vi->vdev = vdev;
      vdev->priv = vi;
      vi->stats = alloc_percpu(struct virtnet_stats);
      INIT_WORK(&vi->config_work, virtnet_config_changed_work);
    ......
      vi->max_queue_pairs = max_queue_pairs;
     
      /* Allocate/initialize the rx/tx queues, and invoke find_vqs */
      err = init_vqs(vi);
      netif_set_real_num_tx_queues(dev, vi->curr_queue_pairs);
      netif_set_real_num_rx_queues(dev, vi->curr_queue_pairs);
     
      virtnet_init_settings(dev);
     
      err = register_netdev(dev);
      virtio_device_ready(vdev);
      virtnet_set_queues(vi, vi->curr_queue_pairs);
    ......
    }

    在virtnet_probe中会创建struct net_device,并且通过register_netdev注册这个网络设备,这样在客户机里面就能看到这个网卡了。在virtnet_probe中,还有一件重要的事情就是,init_vqs会初始化发送和接收的virtqueue,如下所示:

    static int init_vqs(struct virtnet_info *vi)
    {
      int ret;
     
      /* Allocate send & receive queues */
      ret = virtnet_alloc_queues(vi);
      ret = virtnet_find_vqs(vi);
    ......
      get_online_cpus();
      virtnet_set_affinity(vi);
      put_online_cpus();
     
      return 0;
    }
     
    static int virtnet_alloc_queues(struct virtnet_info *vi)
    {
      int i;
     
      vi->sq = kzalloc(sizeof(*vi->sq) * vi->max_queue_pairs, GFP_KERNEL);
      vi->rq = kzalloc(sizeof(*vi->rq) * vi->max_queue_pairs, GFP_KERNEL);
     
      INIT_DELAYED_WORK(&vi->refill, refill_work);
      for (i = 0; i < vi->max_queue_pairs; i++) {
        vi->rq[i].pages = NULL;
        netif_napi_add(vi->dev, &vi->rq[i].napi, virtnet_poll,
                 napi_weight);
        netif_tx_napi_add(vi->dev, &vi->sq[i].napi, virtnet_poll_tx,
              napi_tx ? napi_weight : 0);
     
        sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg));
        ewma_pkt_len_init(&vi->rq[i].mrg_avg_pkt_len);
        sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg));
      }
     
      return 0;
    }

    按照之前的virtio原理,virtqueue是一个介于客户机前端和qemu后端的一个结构,用于在这两端之间传递数据,对于网络设备来讲有发送和接收两个方向的队列。这里建立的struct virtqueue是客户机前端对于队列管理的数据结构。队列的实体需要通过函数virtnet_find_vqs查找或者生成,这里还会指定接收队列的callback函数为skb_recv_done,发送队列的callback函数为skb_xmit_done。当buffer使用发生变化的时候,可以调用这个callback函数进行通知,如下所示:

    static int virtnet_find_vqs(struct virtnet_info *vi)
    {
      vq_callback_t **callbacks;
      struct virtqueue **vqs;
      int ret = -ENOMEM;
      int i, total_vqs;
      const char **names;
     
      /* Allocate space for find_vqs parameters */
      vqs = kzalloc(total_vqs * sizeof(*vqs), GFP_KERNEL);
      callbacks = kmalloc(total_vqs * sizeof(*callbacks), GFP_KERNEL);
      names = kmalloc(total_vqs * sizeof(*names), GFP_KERNEL);
     
      /* Allocate/initialize parameters for send/receive virtqueues */
      for (i = 0; i < vi->max_queue_pairs; i++) {
        callbacks[rxq2vq(i)] = skb_recv_done;
        callbacks[txq2vq(i)] = skb_xmit_done;
        names[rxq2vq(i)] = vi->rq[i].name;
        names[txq2vq(i)] = vi->sq[i].name;
      }
     
      ret = vi->vdev->config->find_vqs(vi->vdev, total_vqs, vqs, callbacks, names, ctx, NULL);
    ......
      for (i = 0; i < vi->max_queue_pairs; i++) {
        vi->rq[i].vq = vqs[rxq2vq(i)];
        vi->rq[i].min_buf_len = mergeable_min_buf_len(vi, vi->rq[i].vq);
        vi->sq[i].vq = vqs[txq2vq(i)];
      }
    ......
    }
     

    这里的find_vqs是在struct virtnet_info里的struct virtio_device里的struct virtio_config_ops *config里面定义的。根据virtio_config_ops的定义,find_vqs会调用vp_modern_find_vqs,到这一步和块设备是一样的了。在vp_modern_find_vqs 中,vp_find_vqs会调用vp_find_vqs_intx。在vp_find_vqs_intx 中,通过request_irq注册一个中断处理函数vp_interrupt,当设备向队列中写入信息时会产生一个中断,也就是vq中断。中断处理函数需要调用相应队列的回调函数,然后根据队列的数目,依次调用vp_setup_vq完成virtqueue、vring的分配和初始化。

    同样,这些数据结构会和virtio后端的VirtIODevice、VirtQueue、vring对应起来,都应该指向刚才创建的那一段内存。客户机同样会通过调用专门给外部设备发送指令的函数iowrite告诉外部的pci设备,这些共享内存的地址。至此前端设备驱动和后端设备驱动之间的两个收发队列就关联好了,这两个队列的格式和块设备是一样的。

    virtio 数据流交互机制

    vring 主要通过两个环形缓冲区来完成数据流的转发,如下图所示。

    vring 包含三个部分,描述符数组 desc,可用的 available ring 和使用过的 used ring。

    desc 用于存储一些关联的描述符,每个描述符记录一个对 buffer 的描述,available ring 则用于 guest 端表示当前有哪些描述符是可用的,而 used ring 则表示 host 端哪些描述符已经被使用。

    Virtio 使用 virtqueue 来实现 I/O 机制,每个 virtqueue 就是一个承载大量数据的队列,具体使用多少个队列取决于需求,例如,virtio 网络驱动程序(virtio-net)使用两个队列(一个用于接受,另一个用于发送),而 virtio 块驱动程序(virtio-blk)仅使用一个队列。

    具体的,假设 guest 要向 host 发送数据,首先,guest 通过函数 virtqueue_add_buf 将存有数据的 buffer 添加到 virtqueue 中,然后调用 virtqueue_kick 函数,virtqueue_kick 调用 virtqueue_notify 函数,通过写入寄存器的方式来通知到 host。host 调用 virtqueue_get_buf 来获取 virtqueue 中收到的数据。

     

    vm_find_vqs --> vm_setup_vq 
                      |
                      | --> vring_create_virtqueue
                                  |--> vring_init
                                  |-->  __vring_new_virtqueue
    virtqueue_add_split    
             | -->  dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE)          
             | -->          vq->split.vring.desc       vq->split.vring.avail
    struct vring_virtqueue {
            struct virtqueue vq;
    
            /* Is this a packed ring? */
            bool packed_ring;
    
            /* Is DMA API used? */
            bool use_dma_api;
    
            /* Can we use weak barriers? */
            bool weak_barriers;
    
            /* Other side has made a mess, don't try any more. */
            bool broken;
    
            /* Host supports indirect buffers */
            bool indirect;
    
            /* Host publishes avail event idx */
            bool event;
    
            /* Head of free buffer list. */
            unsigned int free_head;
            /* Number we've added since last sync. */
            unsigned int num_added;
    
            /* Last used index we've seen. */
            u16 last_used_idx;
    
            union {
                    /* Available for split ring */
                    struct {
                            /* Actual memory layout for this queue. */
                            struct vring vring;
    
                            /* Last written value to avail->flags */
                            u16 avail_flags_shadow;
    
                            /*
                             * Last written value to avail->idx in
                             * guest byte order.
                             */
                            u16 avail_idx_shadow;
    
                            /* Per-descriptor state. */
                            struct vring_desc_state_split *desc_state;
    
                            /* DMA address and size information */
                            dma_addr_t queue_dma_addr;
                            size_t queue_size_in_bytes;
                    } split;
    
    
    
                    /* Available for packed ring */
                    struct {
                            /* Actual memory layout for this queue. */
                            struct {
                                    unsigned int num;
                                    struct vring_packed_desc *desc;
                                    struct vring_packed_desc_event *driver;
                                    struct vring_packed_desc_event *device;
                            } vring;
    
                            /* Driver ring wrap counter. */
                            bool avail_wrap_counter;
    
                            /* Device ring wrap counter. */
                            bool used_wrap_counter;
    
                            /* Avail used flags. */
                            u16 avail_used_flags;
    
                            /* Index of the next avail descriptor. */
                            u16 next_avail_idx;
    
                            /*
                             * Last written value to driver->flags in
                             * guest byte order.
                             */
                            u16 event_flags_shadow;
    
                            /* Per-descriptor state. */
                            struct vring_desc_state_packed *desc_state;
                            struct vring_desc_extra_packed *desc_extra;
    
                            /* DMA address and size information */
                            dma_addr_t ring_dma_addr;
                            dma_addr_t driver_event_dma_addr;
                            dma_addr_t device_event_dma_addr;
                            size_t ring_size_in_bytes;
                            size_t event_size_in_bytes;
                    } packed;
            };
    
            /* How to notify other side. FIXME: commonalize hcalls! */
            bool (*notify)(struct virtqueue *vq);
    
            /* DMA, allocation, and size information */
            bool we_own_ring;
    
    #ifdef DEBUG
            /* They're supposed to lock for us. */
            unsigned int in_use;
    
            /* Figure out if their kicks are too delayed. */
            bool last_add_time_valid;
            ktime_t last_add_time;
    #endif
    };

    virtqueue创建 + DMA地址

    struct virtqueue *vring_create_virtqueue(
        unsigned int index,
        unsigned int num,
        unsigned int vring_align,
        struct virtio_device *vdev,
        bool weak_barriers,
        bool may_reduce_num,
        bool context,
        bool (*notify)(struct virtqueue *),
        void (*callback)(struct virtqueue *),
        const char *name)
    {
    
        if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
            return vring_create_virtqueue_packed(index, num, vring_align,
                    vdev, weak_barriers, may_reduce_num,
                    context, notify, callback, name);
    
        return vring_create_virtqueue_split(index, num, vring_align,
                vdev, weak_barriers, may_reduce_num,
                context, notify, callback, name);
    }
    
    
    
    
    
    
    
    
    
    dma_addr_t virtqueue_get_desc_addr(struct virtqueue *_vq)
    {
        struct vring_virtqueue *vq = to_vvq(_vq);
        BUG_ON(!vq->we_own_ring);
        if (vq->packed_ring)
            return vq->packed.ring_dma_addr;
        return vq->split.queue_dma_addr;
    }
    
    
    
     
    
    
    
    
    static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index,
                      void (*callback)(struct virtqueue *vq),
                      const char *name, bool ctx)
    {
        struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
        struct virtio_mmio_vq_info *info;
        struct virtqueue *vq;
        unsigned long flags;
        unsigned int num;
        int err;
    
        if (!name)
            return NULL;
    
        /* Select the queue we're interested in */
        writel(index, vm_dev->base + VIRTIO_MMIO_QUEUE_SEL);
    
        /* Queue shouldn't already be set up. */
        if (readl(vm_dev->base + (vm_dev->version == 1 ?
                VIRTIO_MMIO_QUEUE_PFN : VIRTIO_MMIO_QUEUE_READY))) {
            err = -ENOENT;
            goto error_available;
        }
    
        /* Allocate and fill out our active queue description */
        info = kmalloc(sizeof(*info), GFP_KERNEL);
        if (!info) {
            err = -ENOMEM;
            goto error_kmalloc;
        }
    
        num = readl(vm_dev->base + VIRTIO_MMIO_QUEUE_NUM_MAX);
        if (num == 0) {
            err = -ENOENT;
            goto error_new_virtqueue;
        }
    
        /* Create the vring */
        vq = vring_create_virtqueue(index, num, VIRTIO_MMIO_VRING_ALIGN, vdev,
                     true, true, ctx, vm_notify, callback, name);
        if (!vq) {
            err = -ENOMEM;
            goto error_new_virtqueue;
        }
    
        /* Activate the queue */
        writel(virtqueue_get_vring_size(vq), vm_dev->base + VIRTIO_MMIO_QUEUE_NUM);
        if (vm_dev->version == 1) {
            u64 q_pfn = virtqueue_get_desc_addr(vq) >> PAGE_SHIFT;
    
            /*
             * virtio-mmio v1 uses a 32bit QUEUE PFN. If we have something
             * that doesn't fit in 32bit, fail the setup rather than
             * pretending to be successful.
             */
            if (q_pfn >> 32) {
                dev_err(&vdev->dev,
                    "platform bug: legacy virtio-mmio must not be used with RAM above 0x%llxGB
    ",
                    0x1ULL << (32 + PAGE_SHIFT - 30));
                err = -E2BIG;
                goto error_bad_pfn;
            }
    
            writel(PAGE_SIZE, vm_dev->base + VIRTIO_MMIO_QUEUE_ALIGN);
            writel(q_pfn, vm_dev->base + VIRTIO_MMIO_QUEUE_PFN);
        } else {
            u64 addr;
    
            addr = virtqueue_get_desc_addr(vq);
            writel((u32)addr, vm_dev->base + VIRTIO_MMIO_QUEUE_DESC_LOW);
            writel((u32)(addr >> 32),
                    vm_dev->base + VIRTIO_MMIO_QUEUE_DESC_HIGH);
    
            addr = virtqueue_get_avail_addr(vq);
            writel((u32)addr, vm_dev->base + VIRTIO_MMIO_QUEUE_AVAIL_LOW);
            writel((u32)(addr >> 32),
                    vm_dev->base + VIRTIO_MMIO_QUEUE_AVAIL_HIGH);
    
            addr = virtqueue_get_used_addr(vq);
            writel((u32)addr, vm_dev->base + VIRTIO_MMIO_QUEUE_USED_LOW);
            writel((u32)(addr >> 32),
                    vm_dev->base + VIRTIO_MMIO_QUEUE_USED_HIGH);
    
            writel(1, vm_dev->base + VIRTIO_MMIO_QUEUE_READY);
        }
    
        vq->priv = info;
        info->vq = vq;
    
        spin_lock_irqsave(&vm_dev->lock, flags);
        list_add(&info->node, &vm_dev->virtqueues);
        spin_unlock_irqrestore(&vm_dev->lock, flags);
    
        return vq;
    
    
    }
    linux kernel
    static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs,
                   struct virtqueue *vqs[],
                   vq_callback_t *callbacks[],
                   const char * const names[],
                   const bool *ctx,
                   struct irq_affinity *desc)
    {
        
        err = request_irq(irq, vm_interrupt, IRQF_SHARED,
                dev_name(&vdev->dev), vm_dev);
        if (err)
            return err;
    
        for (i = 0; i < nvqs; ++i) {
            if (!names[i]) {
                vqs[i] = NULL;
                continue;
            }
    
            vqs[i] = vm_setup_vq(vdev, queue_idx++, callbacks[i], names[i],
                         ctx ? ctx[i] : false);
            if (IS_ERR(vqs[i])) {
                vm_del_vqs(vdev);
                return PTR_ERR(vqs[i]);
            }
        }
    
        return 0;
    }
    
    
    static struct virtqueue *vring_create_virtqueue_split(
        unsigned int index,
        unsigned int num,
        unsigned int vring_align,
        struct virtio_device *vdev,
        bool weak_barriers,
        bool may_reduce_num,
        bool context,
        bool (*notify)(struct virtqueue *),
        void (*callback)(struct virtqueue *),
        const char *name)
    {
        struct virtqueue *vq;
        void *queue = NULL;
        dma_addr_t dma_addr;
        size_t queue_size_in_bytes;
        struct vring vring;
    
     
        /* TODO: allocate each queue chunk individually */
        for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) {
            queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
                          &dma_addr,
                          GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
            if (queue)
                break;
            if (!may_reduce_num)
                return NULL;
        }
     
    
        queue_size_in_bytes = vring_size(num, vring_align);
        vring_init(&vring, num, queue, vring_align);
    
        vq = __vring_new_virtqueue(index, vring, vdev, weak_barriers, context,
                       notify, callback, name);
         
    
        return vq;
    }
    /* Only available for split ring */
    struct virtqueue *__vring_new_virtqueue(unsigned int index,
                        struct vring vring,
                        struct virtio_device *vdev,
                        bool weak_barriers,
                        bool context,
                        bool (*notify)(struct virtqueue *),
                        void (*callback)(struct virtqueue *),
                        const char *name)
    {
        unsigned int i;
        struct vring_virtqueue *vq;
    
        if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
            return NULL;
    
        vq = kmalloc(sizeof(*vq), GFP_KERNEL);
        if (!vq)
            return NULL;
    
        vq->packed_ring = false;
        vq->vq.callback = callback;
        vq->vq.vdev = vdev;
        vq->vq.name = name;
        vq->vq.num_free = vring.num;
        vq->vq.index = index;
        vq->we_own_ring = false;
        vq->notify = notify;
        vq->weak_barriers = weak_barriers;
        vq->broken = false;
        vq->last_used_idx = 0;
        vq->num_added = 0;
        vq->use_dma_api = vring_use_dma_api(vdev);
    #ifdef DEBUG
        vq->in_use = false;
        vq->last_add_time_valid = false;
    #endif
    
        vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
            !context;
        vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
    
        if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
            vq->weak_barriers = false;
    
        vq->split.queue_dma_addr = 0;
        vq->split.queue_size_in_bytes = 0;
    
        vq->split.vring = vring;
        vq->split.avail_flags_shadow = 0;
        vq->split.avail_idx_shadow = 0;
    
        /* No callback?  Tell other side not to bother us. */
        if (!callback) {
            vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
            if (!vq->event)
                vq->split.vring.avail->flags = cpu_to_virtio16(vdev,
                        vq->split.avail_flags_shadow);
        }
    
        vq->split.desc_state = kmalloc_array(vring.num,
                sizeof(struct vring_desc_state_split), GFP_KERNEL);
        if (!vq->split.desc_state) {
            kfree(vq);
            return NULL;
        }
    
        /* Put everything in free lists. */
        vq->free_head = 0;
        for (i = 0; i < vring.num-1; i++)
            vq->split.vring.desc[i].next = cpu_to_virtio16(vdev, i + 1);
        memset(vq->split.desc_state, 0, vring.num *
                sizeof(struct vring_desc_state_split));
    
        list_add_tail(&vq->vq.list, &vdev->vqs);
        return &vq->vq;
    }
    
    static inline int virtqueue_add_split(struct virtqueue *_vq,
                          struct scatterlist *sgs[],
                          unsigned int total_sg,
                          unsigned int out_sgs,
                          unsigned int in_sgs,
                          void *data,
                          void *ctx,
                          gfp_t gfp)
    {
        struct vring_virtqueue *vq = to_vvq(_vq);
        struct scatterlist *sg;
        struct vring_desc *desc;
        unsigned int i, n, avail, descs_used, prev, err_idx;
        int head;
        bool indirect;
    
        head = vq->free_head;
    
        if (virtqueue_use_indirect(_vq, total_sg))
            desc = alloc_indirect_split(_vq, total_sg, gfp);
        else {
            desc = NULL;
            WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect);
        }
    
        if (desc) {
            /* Use a single buffer which doesn't continue */
            indirect = true;
            /* Set up rest to use this indirect table. */
            i = 0;
            descs_used = 1;
        } else {
            indirect = false;
            desc = vq->split.vring.desc;
            i = head;
            descs_used = total_sg;
        }
    
        if (vq->vq.num_free < descs_used) {
            pr_debug("Can't add buf len %i - avail = %i
    ",
                 descs_used, vq->vq.num_free);
            /* FIXME: for historical reasons, we force a notify here if
             * there are outgoing parts to the buffer.  Presumably the
             * host should service the ring ASAP. */
            if (out_sgs)
                vq->notify(&vq->vq);
            if (indirect)
                kfree(desc);
            END_USE(vq);
            return -ENOSPC;
        }
    
        for (n = 0; n < out_sgs; n++) {
            for (sg = sgs[n]; sg; sg = sg_next(sg)) {
                dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE);
                if (vring_mapping_error(vq, addr))
                    goto unmap_release;
    
                desc[i].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT);
                desc[i].addr = cpu_to_virtio64(_vq->vdev, addr);
                desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length);
                prev = i;
                i = virtio16_to_cpu(_vq->vdev, desc[i].next);
            }
        }
        for (; n < (out_sgs + in_sgs); n++) {
            for (sg = sgs[n]; sg; sg = sg_next(sg)) {
                dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE);
                if (vring_mapping_error(vq, addr))
                    goto unmap_release;
    
                desc[i].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT | VRING_DESC_F_WRITE);
                desc[i].addr = cpu_to_virtio64(_vq->vdev, addr);
                desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length);
                prev = i;
                i = virtio16_to_cpu(_vq->vdev, desc[i].next);
            }
        }
        /* Last one doesn't continue. */
        desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT);
    
        if (indirect) {
            /* Now that the indirect table is filled in, map it. */
            dma_addr_t addr = vring_map_single(
                vq, desc, total_sg * sizeof(struct vring_desc),
                DMA_TO_DEVICE);
            if (vring_mapping_error(vq, addr))
                goto unmap_release;
    
            vq->split.vring.desc[head].flags = cpu_to_virtio16(_vq->vdev,
                    VRING_DESC_F_INDIRECT);
            vq->split.vring.desc[head].addr = cpu_to_virtio64(_vq->vdev,
                    addr);
    
            vq->split.vring.desc[head].len = cpu_to_virtio32(_vq->vdev,
                    total_sg * sizeof(struct vring_desc));
        }
    
        /* We're using some buffers from the free list. */
        vq->vq.num_free -= descs_used;
    
        /* Update free pointer */
        if (indirect)
            vq->free_head = virtio16_to_cpu(_vq->vdev,
                        vq->split.vring.desc[head].next);
        else
            vq->free_head = i;
    
        /* Store token and indirect buffer state. */
        vq->split.desc_state[head].data = data;
        if (indirect)
            vq->split.desc_state[head].indir_desc = desc;
        else
            vq->split.desc_state[head].indir_desc = ctx;
    
        /* Put entry in available array (but don't update avail->idx until they
         * do sync). */
        avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1);
        vq->split.vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head);
    
        /* Descriptors and available array need to be set before we expose the
         * new available array entries. */
        virtio_wmb(vq->weak_barriers);
        vq->split.avail_idx_shadow++;
        vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
                            vq->split.avail_idx_shadow);
        vq->num_added++;
    
    }

    qemu

    void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr)
    {
    if (!vdev->vq[n].vring.num) {
    return;
    }
    vdev->vq[n].vring.desc = addr;
    virtio_queue_update_rings(vdev, n);
    }
    
     

     https://blog.csdn.net/qq_33588730/article/details/105397879

  • 相关阅读:
    swift 如何给tabBarItem的相关设计
    本地缓存
    Xcode7.2 导入XMPP框架错误解决
    Selenium WebUI自动化测试--PO中传递driver
    【转发】自动化测试中 数据源获取方式
    IntelliJ IDEA 开发环境设置
    Jmeter在csv传参时 请求参数乱码
    XAMPP phpmind Agileone 环境搭建及遇到问题的解决方法
    python浅拷贝和深拷贝
    一套简单的git版本控制代码
  • 原文地址:https://www.cnblogs.com/dream397/p/14386024.html
Copyright © 2011-2022 走看看