zoukankan      html  css  js  c++  java
  • 网络虚拟化Virtio-net

    网络虚拟化有和存储虚拟化类似的地方,例如它们都是基于virtio 的,因而在看网络虚拟化的过程中,会看到和存储虚拟化很像的数据结构和原理。但是网络虚拟化也有自己的特殊性。例如,存储虚拟化是将宿主机上的文件作为客户机上的硬盘,而网络虚拟化需要依赖于内核协议栈进行网络包的封装与解封装。那怎么实现客户机和宿主机之间的互通呢?就来看一看解析初始化的过程。还是从Virtio Network Device这个设备的初始化讲起,如下所示:

    static const TypeInfo device_type_info = {
        .name = TYPE_DEVICE,
        .parent = TYPE_OBJECT,
        .instance_size = sizeof(DeviceState),
        .instance_init = device_initfn,
        .instance_post_init = device_post_init,
        .instance_finalize = device_finalize,
        .class_base_init = device_class_base_init,
        .class_init = device_class_init,
        .abstract = true,
        .class_size = sizeof(DeviceClass),
    };
     
    static const TypeInfo virtio_device_info = {
        .name = TYPE_VIRTIO_DEVICE,
        .parent = TYPE_DEVICE,
        .instance_size = sizeof(VirtIODevice),
        .class_init = virtio_device_class_init,
        .instance_finalize = virtio_device_instance_finalize,
        .abstract = true,
        .class_size = sizeof(VirtioDeviceClass),
    };
     
    static const TypeInfo virtio_net_info = {
        .name = TYPE_VIRTIO_NET,
        .parent = TYPE_VIRTIO_DEVICE,
        .instance_size = sizeof(VirtIONet),
        .instance_init = virtio_net_instance_init,
        .class_init = virtio_net_class_init,
    };
     
    static void virtio_register_types(void)
    {
        type_register_static(&virtio_net_info);
    }
     
    type_init(virtio_register_types)

    Virtio Network Device这种类的定义是有多层继承关系的,TYPE_VIRTIO_NET的父类是TYPE_VIRTIO_DEVICE,TYPE_VIRTIO_DEVICE的父类是TYPE_DEVICE,TYPE_DEVICE的父类是TYPE_OBJECT,继承关系就到头了。type_init用于注册这种类,这里面每一层都有class_init,用于从TypeImpl生成xxxClass,也有instance_init,会将xxxClass初始化为实例。TYPE_VIRTIO_NET层的class_init函数是virtio_net_class_init,它定义了DeviceClass的realize函数为virtio_net_device_realize,这一点和存储块设备是一样的,如下所示:

    static void virtio_net_device_realize(DeviceState *dev, Error **errp)
    {
        VirtIODevice *vdev = VIRTIO_DEVICE(dev);
        VirtIONet *n = VIRTIO_NET(dev);
        NetClientState *nc;
        int i;
    ......
        virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size);
     
        /*
         * We set a lower limit on RX queue size to what it always was.
         * Guests that want a smaller ring can always resize it without
         * help from us (using virtio 1 and up).
         */
        if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
            n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
            !is_power_of_2(n->net_conf.rx_queue_size)) {
    ......
            return;
        }
     
        if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
            n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE ||
            !is_power_of_2(n->net_conf.tx_queue_size)) {
    ......
            return;
        }
     
        n->max_queues = MAX(n->nic_conf.peers.queues, 1);
        if (n->max_queues * 2 + 1 > VIRTIO_QUEUE_MAX) {
    ......
            return;
        }
        n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queues);
        n->curr_queues = 1;
    ......
        n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
                                        n->net_conf.tx_queue_size);
     
        for (i = 0; i < n->max_queues; i++) {
            virtio_net_add_queue(n, i);
        }
     
        n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
        qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
        memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
        n->status = VIRTIO_NET_S_LINK_UP;
     
        if (n->netclient_type) {
            n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
                                  n->netclient_type, n->netclient_name, n);
        } else {
            n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
                                  object_get_typename(OBJECT(dev)), dev->id, n);
        }
    ......
    }
    static void virtio_net_add_queue(VirtIONet *n, int index)
    {
        VirtIODevice *vdev = VIRTIO_DEVICE(n);
    
        n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
                                               virtio_net_handle_rx);
    
        if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
            n->vqs[index].tx_vq =
                virtio_add_queue(vdev, n->net_conf.tx_queue_size,
                                 virtio_net_handle_tx_timer);
            n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
                                                  virtio_net_tx_timer,
                                                  &n->vqs[index]);
        } else {
            n->vqs[index].tx_vq =
                virtio_add_queue(vdev, n->net_conf.tx_queue_size,
                                 virtio_net_handle_tx_bh);
            n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
        }
    
        n->vqs[index].tx_waiting = 0;
        n->vqs[index].n = n;
    }
    static void virtio_net_add_queue(VirtIONet *n, int index)
    {
        VirtIODevice *vdev = VIRTIO_DEVICE(n);
    
        n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
                                               virtio_net_handle_rx);
    
        if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
            n->vqs[index].tx_vq =
                virtio_add_queue(vdev, n->net_conf.tx_queue_size,
                                 virtio_net_handle_tx_timer);
            n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
                                                  virtio_net_tx_timer,
                                                  &n->vqs[index]);
        } else {
            n->vqs[index].tx_vq =
                virtio_add_queue(vdev, n->net_conf.tx_queue_size,
                                 virtio_net_handle_tx_bh);
            n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
        }
    
        n->vqs[index].tx_waiting = 0;
        n->vqs[index].n = n;
    }

    这里面创建了一个VirtIODevice,这一点和存储虚拟化也是一样的。virtio_init用来初始化这个设备。VirtIODevice结构里面有一个VirtQueue数组,这就是virtio前端和后端互相传数据的队列,最多有VIRTIO_QUEUE_MAX个。

    刚才说的都是一样的地方,其实也有不一样的地方。会发现这里面有这样的语句n->max_queues * 2 + 1 > VIRTIO_QUEUE_MAX。为什么要乘以2呢?这是因为对于网络设备来讲,应该分发送队列和接收队列两个方向。接下来调用virtio_net_add_queue来初始化队列,可以看出这里面就有发送tx_vq和接收rx_vq两个队列,如下所示:

    typedef struct VirtIONetQueue {
        VirtQueue *rx_vq;
        VirtQueue *tx_vq;
        QEMUTimer *tx_timer;
        QEMUBH *tx_bh;
        uint32_t tx_waiting;
        struct {
            VirtQueueElement *elem;
        } async_tx;
        struct VirtIONet *n;
    } VirtIONetQueue;
     
    static void virtio_net_add_queue(VirtIONet *n, int index)
    {
        VirtIODevice *vdev = VIRTIO_DEVICE(n);
     
        n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size, virtio_net_handle_rx);
     
    ......
     
        n->vqs[index].tx_vq = virtio_add_queue(vdev, n->net_conf.tx_queue_size, virtio_net_handle_tx_bh);
        n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
        n->vqs[index].n = n;

    每个VirtQueue中,都有一个vring用来维护这个队列里面的数据;另外还有函数virtio_net_handle_rx用于处理网络包的接收;函数virtio_net_handle_tx_bh用于网络包的发送,这个函数后面会用到。接下来,qemu_new_nic会创建一个虚拟机里面的网卡,如下所示

    NICState *qemu_new_nic(NetClientInfo *info,
                           NICConf *conf,
                           const char *model,
                           const char *name,
                           void *opaque)
    {
        NetClientState **peers = conf->peers.ncs;
        NICState *nic;
        int i, queues = MAX(1, conf->peers.queues);
    ......
        nic = g_malloc0(info->size + sizeof(NetClientState) * queues);
        nic->ncs = (void *)nic + info->size;
        nic->conf = conf;
        nic->opaque = opaque;
     
        for (i = 0; i < queues; i++) {
            qemu_net_client_setup(&nic->ncs[i], info, peers[i], model, name, NULL);
            nic->ncs[i].queue_index = i;
        }
     
        return nic;
    }
     
    static void qemu_net_client_setup(NetClientState *nc,
                                      NetClientInfo *info,
                                      NetClientState *peer,
                                      const char *model,
                                      const char *name,
                                      NetClientDestructor *destructor)
    {
        nc->info = info;
        nc->model = g_strdup(model);
        if (name) {
            nc->name = g_strdup(name);
        } else {
            nc->name = assign_name(nc, model);
        }
     
        QTAILQ_INSERT_TAIL(&net_clients, nc, next);
     
        nc->incoming_queue = qemu_new_net_queue(qemu_deliver_packet_iov, nc);
        nc->destructor = destructor;
        QTAILQ_INIT(&nc->filters);
    }

    初始化过程解析完毕以后,接下来从qemu的启动过程看起。对于网卡的虚拟化,qemu的启动参数里面有关的是下面两行

    -netdev tap,fd=32,id=hostnet0,vhost=on,vhostfd=37
    -device virtio-net-pci,netdev=hostnet0,id=net0,mac=fa:16:3e:d1:2d:99,bus=pci.0,addr=0x3

    qemu的main函数会调用net_init_clients进行网络设备的初始化,可以解析net参数,也可以解析netdev参数,如下所示:

    int net_init_clients(Error **errp)
    {
        QTAILQ_INIT(&net_clients);
        if (qemu_opts_foreach(qemu_find_opts("netdev"),
                              net_init_netdev, NULL, errp)) {
            return -1;
        }
        if (qemu_opts_foreach(qemu_find_opts("nic"), net_param_nic, NULL, errp)) {
            return -1;
       }
        if (qemu_opts_foreach(qemu_find_opts("net"), net_init_client, NULL, errp)) {
            return -1;
        }
        return 0;
    }

    net_init_clients会解析参数。上面的参数netdev会调用net_init_netdev->net_client_init->net_client_init1。net_client_init1会根据不同的driver类型,调用不同的初始化函数,如下所示:

    static int (* const net_client_init_fun[NET_CLIENT_DRIVER__MAX])(
        const Netdev *netdev,
        const char *name,
        NetClientState *peer, Error **errp) = {
            [NET_CLIENT_DRIVER_NIC]       = net_init_nic,
            [NET_CLIENT_DRIVER_TAP]       = net_init_tap,
            [NET_CLIENT_DRIVER_SOCKET]    = net_init_socket,
            [NET_CLIENT_DRIVER_HUBPORT]   = net_init_hubport,
    ......
    };

    由于配置的driver类型是tap,因而这里会调用net_init_tap->net_tap_init->tap_open,如下所示:

    #define PATH_NET_TUN "/dev/net/tun"
     
    int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
                 int vnet_hdr_required, int mq_required, Error **errp)
    {
        struct ifreq ifr;
        int fd, ret;
        int len = sizeof(struct virtio_net_hdr);
        unsigned int features;
     
        TFR(fd = open(PATH_NET_TUN, O_RDWR));
        memset(&ifr, 0, sizeof(ifr));
        ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
     
        if (ioctl(fd, TUNGETFEATURES, &features) == -1) {
            features = 0;
        }
     
        if (features & IFF_ONE_QUEUE) {
            ifr.ifr_flags |= IFF_ONE_QUEUE;
        }
     
        if (*vnet_hdr) {
            if (features & IFF_VNET_HDR) {
                *vnet_hdr = 1;
                ifr.ifr_flags |= IFF_VNET_HDR;
            } else {
                *vnet_hdr = 0;
            }
            ioctl(fd, TUNSETVNETHDRSZ, &len);
        }
    ......
        ret = ioctl(fd, TUNSETIFF, (void *) &ifr);
    ......
        fcntl(fd, F_SETFL, O_NONBLOCK);
        return fd;

    在tap_open中打开一个文件"/dev/net/tun",然后通过ioctl操作这个文件。这是Linux内核的一项机制,和KVM机制很像,其实这就是一种通过打开这个字符设备文件,然后通过ioctl操作这个文件和内核打交道,来使用内核的这么一种能力,如下图所示:

    为什么需要使用内核的机制呢?因为网络包需要从虚拟机里面发送到虚拟机外面,发送到宿主机上的时候,必须是一个正常的网络包才能被转发。要形成一个网络包,那就需要经过复杂的协议栈。客户机会将网络包发送给qemu,qemu自己没有网络协议栈,现去实现一个也不可能,太复杂了,于是它就要借助内核的力量。qemu会将客户机发送给它的网络包转换成为文件流,写入"/dev/net/tun"字符设备,就像写一个文件一样。内核中TUN/TAP字符设备驱动会收到这个写入的文件流,然后交给TUN/TAP的虚拟网卡驱动,这个驱动会将文件流再次转成网络包,交给TCP/IP栈,最终从虚拟TAP网卡tap0发出来,成为标准的网络包。后面会看到这个过程。

    现在到内核里面,看一看打开"/dev/net/tun"字符设备后,内核会发生什么事情。内核的实现在drivers/net/tun.c文件中,这是一个字符设备驱动程序,应该符合字符设备的格式,如下所示:

    module_init(tun_init);
    module_exit(tun_cleanup);
    MODULE_DESCRIPTION(DRV_DESCRIPTION);
    MODULE_AUTHOR(DRV_COPYRIGHT);
    MODULE_LICENSE("GPL");
    MODULE_ALIAS_MISCDEV(TUN_MINOR);
    MODULE_ALIAS("devname:net/tun");
     
    static int __init tun_init(void)
    {
    ......
      ret = rtnl_link_register(&tun_link_ops);
    ......
      ret = misc_register(&tun_miscdev);
    ......
      ret = register_netdevice_notifier(&tun_notifier_block);
    ......
    }

    这里面注册了一个tun_miscdev字符设备,从它的定义可以看出,这就是"/dev/net/tun"字符设备,如下所示:

    static struct miscdevice tun_miscdev = {
      .minor = TUN_MINOR,
      .name = "tun",
      .nodename = "net/tun",
      .fops = &tun_fops,
    };
     
    static const struct file_operations tun_fops = {
      .owner  = THIS_MODULE,
      .llseek = no_llseek,
      .read_iter  = tun_chr_read_iter,
      .write_iter = tun_chr_write_iter,
      .poll  = tun_chr_poll,
      .unlocked_ioctl  = tun_chr_ioctl,
      .open  = tun_chr_open,
      .release = tun_chr_close,
      .fasync = tun_chr_fasync,
    };

    qemu的tap_open函数会打开这个字符设备PATH_NET_TUN。打开字符设备的过程这里不再重复,总之到了驱动这一层,调用的是tun_chr_open,如下所示:

    static int tun_chr_open(struct inode *inode, struct file * file)
    {
      struct tun_file *tfile;
      tfile = (struct tun_file *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL,
                  &tun_proto, 0);
      RCU_INIT_POINTER(tfile->tun, NULL);
      tfile->flags = 0;
      tfile->ifindex = 0;
     
      init_waitqueue_head(&tfile->wq.wait);
      RCU_INIT_POINTER(tfile->socket.wq, &tfile->wq);
     
      tfile->socket.file = file;
      tfile->socket.ops = &tun_socket_ops;
     
      sock_init_data(&tfile->socket, &tfile->sk);
     
      tfile->sk.sk_write_space = tun_sock_write_space;
      tfile->sk.sk_sndbuf = INT_MAX;
     
      file->private_data = tfile;
      INIT_LIST_HEAD(&tfile->next);
     
      sock_set_flag(&tfile->sk, SOCK_ZEROCOPY);
     
      return 0;
    }

    在tun_chr_open的参数里面有一个struct file,它代表的就是打开的字符设备文件"/dev/net/tun",因而往这个字符设备文件中写数据,就会通过这个struct file写入。这个struct file里面的file_operations,按照字符设备打开的规则,指向的就是tun_fops。另外还需要在tun_chr_open创建一个结构struct tun_file,并且将struct file的private_data指向它,如下所示:

    /* A tun_file connects an open character device to a tuntap netdevice. It
     * also contains all socket related structures 
     * to serve as one transmit queue for tuntap device. 
     */
    struct tun_file {
      struct sock sk;
      struct socket socket;
      struct socket_wq wq;
      struct tun_struct __rcu *tun;
      struct fasync_struct *fasync;
      /* only used for fasnyc */
      unsigned int flags;
      union {
        u16 queue_index;
        unsigned int ifindex;
      };
      struct list_head next;
      struct tun_struct *detached;
      struct skb_array tx_array;
    };
     
    struct tun_struct {
      struct tun_file __rcu  *tfiles[MAX_TAP_QUEUES];
      unsigned int            numqueues;
      unsigned int     flags;
      kuid_t      owner;
      kgid_t      group;
     
      struct net_device  *dev;
      netdev_features_t  set_features;
      int      align;
      int      vnet_hdr_sz;
      int      sndbuf;
      struct tap_filter  txflt;
      struct sock_fprog  fprog;
      /* protected by rtnl lock */
      bool      filter_attached;
      spinlock_t lock;
      struct hlist_head flows[TUN_NUM_FLOW_ENTRIES];
      struct timer_list flow_gc_timer;
      unsigned long ageing_time;
      unsigned int numdisabled;
      struct list_head disabled;
      void *security;
      u32 flow_count;
      u32 rx_batched;
      struct tun_pcpu_stats __percpu *pcpu_stats;
    };
     
    static const struct proto_ops tun_socket_ops = {
      .peek_len = tun_peek_len,
      .sendmsg = tun_sendmsg,
      .recvmsg = tun_recvmsg,
    }

    在struct tun_file中有一个成员struct tun_struct,它里面有一个struct net_device,这个用来表示宿主机上的tuntap网络设备。在struct tun_file中,还有struct socket和struct sock,因为要用到内核的网络协议栈,所以就需要这两个结构,这在以前网络协议部分已经分析过了。所以按照struct tun_file的注释所说,这是一个很重要的数据结构,"/dev/net/tun"对应的struct file的private_data指向它,因而可以接收qemu发过来的数据。除此之外,它还可以通过struct sock来操作内核协议栈,然后将网络包从宿主机上的tuntap网络设备发出去,宿主机上的tuntap网络设备对应的struct net_device也归它管。

    32. 在qemu的tap_open函数中,打开这个字符设备文件之后,接下来要做的事情是,通过ioctl来设置宿主机的网卡 TUNSETIFF。接下来,ioctl到了内核里面会调用tun_chr_ioctl,如下所示:

    static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
              unsigned long arg, int ifreq_len)
    {
      struct tun_file *tfile = file->private_data;
      struct tun_struct *tun;
      void __user* argp = (void __user*)arg;
      struct ifreq ifr;
      kuid_t owner;
      kgid_t group;
      int sndbuf;
      int vnet_hdr_sz;
      unsigned int ifindex;
      int le;
      int ret;
     
      if (cmd == TUNSETIFF || cmd == TUNSETQUEUE || _IOC_TYPE(cmd) == SOCK_IOC_TYPE) {
        if (copy_from_user(&ifr, argp, ifreq_len))
          return -EFAULT;
      } 
    ......
      tun = __tun_get(tfile);
      if (cmd == TUNSETIFF) {
        ifr.ifr_name[IFNAMSIZ-1] = '';
        ret = tun_set_iff(sock_net(&tfile->sk), file, &ifr);
    ......
        if (copy_to_user(argp, &ifr, ifreq_len))
          ret = -EFAULT;
      }
    ......

    在__tun_chr_ioctl中,首先通过copy_from_user把配置从用户态拷贝到内核态,调用tun_set_iff设置tuntap网络设备,然后调用copy_to_user将配置结果返回。tun_set_iff的实现如下所示:

    static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
    {
      struct tun_struct *tun;
      struct tun_file *tfile = file->private_data;
      struct net_device *dev;
    ......
      char *name;
      unsigned long flags = 0;
      int queues = ifr->ifr_flags & IFF_MULTI_QUEUE ?
               MAX_TAP_QUEUES : 1;
     
      if (ifr->ifr_flags & IFF_TUN) {
        /* TUN device */
        flags |= IFF_TUN;
        name = "tun%d";
      } else if (ifr->ifr_flags & IFF_TAP) {
        /* TAP device */
        flags |= IFF_TAP;
        name = "tap%d";
      } else
        return -EINVAL;
     
      if (*ifr->ifr_name)
        name = ifr->ifr_name;
     
      dev = alloc_netdev_mqs(sizeof(struct tun_struct), name,
                   NET_NAME_UNKNOWN, tun_setup, queues,
                   queues);
     
      err = dev_get_valid_name(net, dev, name);
      dev_net_set(dev, net);
      dev->rtnl_link_ops = &tun_link_ops;
      dev->ifindex = tfile->ifindex;
      dev->sysfs_groups[0] = &tun_attr_group;
     
      tun = netdev_priv(dev);
      tun->dev = dev;
      tun->flags = flags;
      tun->txflt.count = 0;
      tun->vnet_hdr_sz = sizeof(struct virtio_net_hdr);
     
      tun->align = NET_SKB_PAD;
      tun->filter_attached = false;
      tun->sndbuf = tfile->socket.sk->sk_sndbuf;
      tun->rx_batched = 0;
     
      tun_net_init(dev);
      tun_flow_init(tun);
     
      err = tun_attach(tun, file, false);
      err = register_netdevice(tun->dev);
     
      netif_carrier_on(tun->dev);
     
      if (netif_running(tun->dev))
        netif_tx_wake_all_queues(tun->dev);
     
      strcpy(ifr->ifr_name, tun->dev->name);
      return 0;
    }

    tun_set_iff创建了struct tun_struct和struct net_device,并且将这个tuntap网络设备通过register_netdevice注册到内核中,这样就能在宿主机上通过ip addr看到这个网卡了,如下图所示:

     33. 下面来解析关联前端设备驱动和后端设备驱动的过程。来看在客户机中发送一个网络包的时候,会发生哪些事情。虚拟机里面的进程发送一个网络包,通过文件系统和Socket调用网络协议栈到达网络设备层,只不过这个不是普通的网络设备,而是virtio_net的驱动。virtio_net的驱动程序代码在Linux操作系统的源代码里面,文件名为drivers/net/virtio_net.c,如下所示:

    static __init int virtio_net_driver_init(void)
    {
        ret = register_virtio_driver(&virtio_net_driver);
    ......
    }
    module_init(virtio_net_driver_init);
    module_exit(virtio_net_driver_exit);
     
    MODULE_DEVICE_TABLE(virtio, id_table);
    MODULE_DESCRIPTION("Virtio network driver");
    MODULE_LICENSE("GPL");
     
    static struct virtio_driver virtio_net_driver = {
      .driver.name =  KBUILD_MODNAME,
      .driver.owner =  THIS_MODULE,
      .id_table =  id_table,
      .validate =  virtnet_validate,
      .probe =  virtnet_probe,
      .remove =  virtnet_remove,
      .config_changed = virtnet_config_changed,
    ......
    };

    在virtio_net的驱动程序的初始化代码中,需要注册一个驱动函数virtio_net_driver。当一个设备驱动作为一个内核模块被初始化的时候,probe函数会被调用,因而来看一下virtnet_probe:

    static int virtnet_probe(struct virtio_device *vdev)
    {
      int i, err;
      struct net_device *dev;
      struct virtnet_info *vi;
      u16 max_queue_pairs;
      int mtu;
     
      /* Allocate ourselves a network device with room for our info */
      dev = alloc_etherdev_mq(sizeof(struct virtnet_info), max_queue_pairs);
     
      /* Set up network device as normal. */
      dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE;
      dev->netdev_ops = &virtnet_netdev;
      dev->features = NETIF_F_HIGHDMA;
     
      dev->ethtool_ops = &virtnet_ethtool_ops;
      SET_NETDEV_DEV(dev, &vdev->dev);
    ......
      /* MTU range: 68 - 65535 */
      dev->min_mtu = MIN_MTU;
      dev->max_mtu = MAX_MTU;
     
      /* Set up our device-specific information */
      vi = netdev_priv(dev);
      vi->dev = dev;
      vi->vdev = vdev;
      vdev->priv = vi;
      vi->stats = alloc_percpu(struct virtnet_stats);
      INIT_WORK(&vi->config_work, virtnet_config_changed_work);
    ......
      vi->max_queue_pairs = max_queue_pairs;
     
      /* Allocate/initialize the rx/tx queues, and invoke find_vqs */
      err = init_vqs(vi);
      netif_set_real_num_tx_queues(dev, vi->curr_queue_pairs);
      netif_set_real_num_rx_queues(dev, vi->curr_queue_pairs);
     
      virtnet_init_settings(dev);
     
      err = register_netdev(dev);
      virtio_device_ready(vdev);
      virtnet_set_queues(vi, vi->curr_queue_pairs);
    ......
    }

    在virtnet_probe中会创建struct net_device,并且通过register_netdev注册这个网络设备,这样在客户机里面就能看到这个网卡了。在virtnet_probe中,还有一件重要的事情就是,init_vqs会初始化发送和接收的virtqueue,如下所示:

    static int init_vqs(struct virtnet_info *vi)
    {
      int ret;
     
      /* Allocate send & receive queues */
      ret = virtnet_alloc_queues(vi);
      ret = virtnet_find_vqs(vi);
    ......
      get_online_cpus();
      virtnet_set_affinity(vi);
      put_online_cpus();
     
      return 0;
    }
     
    static int virtnet_alloc_queues(struct virtnet_info *vi)
    {
      int i;
     
      vi->sq = kzalloc(sizeof(*vi->sq) * vi->max_queue_pairs, GFP_KERNEL);
      vi->rq = kzalloc(sizeof(*vi->rq) * vi->max_queue_pairs, GFP_KERNEL);
     
      INIT_DELAYED_WORK(&vi->refill, refill_work);
      for (i = 0; i < vi->max_queue_pairs; i++) {
        vi->rq[i].pages = NULL;
        netif_napi_add(vi->dev, &vi->rq[i].napi, virtnet_poll,
                 napi_weight);
        netif_tx_napi_add(vi->dev, &vi->sq[i].napi, virtnet_poll_tx,
              napi_tx ? napi_weight : 0);
     
        sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg));
        ewma_pkt_len_init(&vi->rq[i].mrg_avg_pkt_len);
        sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg));
      }
     
      return 0;
    }

    按照之前的virtio原理,virtqueue是一个介于客户机前端和qemu后端的一个结构,用于在这两端之间传递数据,对于网络设备来讲有发送和接收两个方向的队列。这里建立的struct virtqueue是客户机前端对于队列管理的数据结构。队列的实体需要通过函数virtnet_find_vqs查找或者生成,这里还会指定接收队列的callback函数为skb_recv_done,发送队列的callback函数为skb_xmit_done。当buffer使用发生变化的时候,可以调用这个callback函数进行通知,如下所示:

    static int virtnet_find_vqs(struct virtnet_info *vi)
    {
      vq_callback_t **callbacks;
      struct virtqueue **vqs;
      int ret = -ENOMEM;
      int i, total_vqs;
      const char **names;
     
      /* Allocate space for find_vqs parameters */
      vqs = kzalloc(total_vqs * sizeof(*vqs), GFP_KERNEL);
      callbacks = kmalloc(total_vqs * sizeof(*callbacks), GFP_KERNEL);
      names = kmalloc(total_vqs * sizeof(*names), GFP_KERNEL);
     
      /* Allocate/initialize parameters for send/receive virtqueues */
      for (i = 0; i < vi->max_queue_pairs; i++) {
        callbacks[rxq2vq(i)] = skb_recv_done;
        callbacks[txq2vq(i)] = skb_xmit_done;
        names[rxq2vq(i)] = vi->rq[i].name;
        names[txq2vq(i)] = vi->sq[i].name;
      }
     
      ret = vi->vdev->config->find_vqs(vi->vdev, total_vqs, vqs, callbacks, names, ctx, NULL);
    ......
      for (i = 0; i < vi->max_queue_pairs; i++) {
        vi->rq[i].vq = vqs[rxq2vq(i)];
        vi->rq[i].min_buf_len = mergeable_min_buf_len(vi, vi->rq[i].vq);
        vi->sq[i].vq = vqs[txq2vq(i)];
      }
    ......
    }
     

    这里的find_vqs是在struct virtnet_info里的struct virtio_device里的struct virtio_config_ops *config里面定义的。根据virtio_config_ops的定义,find_vqs会调用vp_modern_find_vqs,到这一步和块设备是一样的了。在vp_modern_find_vqs 中,vp_find_vqs会调用vp_find_vqs_intx。在vp_find_vqs_intx 中,通过request_irq注册一个中断处理函数vp_interrupt,当设备向队列中写入信息时会产生一个中断,也就是vq中断。中断处理函数需要调用相应队列的回调函数,然后根据队列的数目,依次调用vp_setup_vq完成virtqueue、vring的分配和初始化。

    同样,这些数据结构会和virtio后端的VirtIODevice、VirtQueue、vring对应起来,都应该指向刚才创建的那一段内存。客户机同样会通过调用专门给外部设备发送指令的函数iowrite告诉外部的pci设备,这些共享内存的地址。至此前端设备驱动和后端设备驱动之间的两个收发队列就关联好了,这两个队列的格式和块设备是一样的。

    virtio 数据流交互机制

    vring 主要通过两个环形缓冲区来完成数据流的转发,如下图所示。

    vring 包含三个部分,描述符数组 desc,可用的 available ring 和使用过的 used ring。

    desc 用于存储一些关联的描述符,每个描述符记录一个对 buffer 的描述,available ring 则用于 guest 端表示当前有哪些描述符是可用的,而 used ring 则表示 host 端哪些描述符已经被使用。

    Virtio 使用 virtqueue 来实现 I/O 机制,每个 virtqueue 就是一个承载大量数据的队列,具体使用多少个队列取决于需求,例如,virtio 网络驱动程序(virtio-net)使用两个队列(一个用于接受,另一个用于发送),而 virtio 块驱动程序(virtio-blk)仅使用一个队列。

    具体的,假设 guest 要向 host 发送数据,首先,guest 通过函数 virtqueue_add_buf 将存有数据的 buffer 添加到 virtqueue 中,然后调用 virtqueue_kick 函数,virtqueue_kick 调用 virtqueue_notify 函数,通过写入寄存器的方式来通知到 host。host 调用 virtqueue_get_buf 来获取 virtqueue 中收到的数据。

     

    vm_find_vqs --> vm_setup_vq 
                      |
                      | --> vring_create_virtqueue
                                  |--> vring_init
                                  |-->  __vring_new_virtqueue
    virtqueue_add_split    
             | -->  dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE)          
             | -->          vq->split.vring.desc       vq->split.vring.avail
    struct vring_virtqueue {
            struct virtqueue vq;
    
            /* Is this a packed ring? */
            bool packed_ring;
    
            /* Is DMA API used? */
            bool use_dma_api;
    
            /* Can we use weak barriers? */
            bool weak_barriers;
    
            /* Other side has made a mess, don't try any more. */
            bool broken;
    
            /* Host supports indirect buffers */
            bool indirect;
    
            /* Host publishes avail event idx */
            bool event;
    
            /* Head of free buffer list. */
            unsigned int free_head;
            /* Number we've added since last sync. */
            unsigned int num_added;
    
            /* Last used index we've seen. */
            u16 last_used_idx;
    
            union {
                    /* Available for split ring */
                    struct {
                            /* Actual memory layout for this queue. */
                            struct vring vring;
    
                            /* Last written value to avail->flags */
                            u16 avail_flags_shadow;
    
                            /*
                             * Last written value to avail->idx in
                             * guest byte order.
                             */
                            u16 avail_idx_shadow;
    
                            /* Per-descriptor state. */
                            struct vring_desc_state_split *desc_state;
    
                            /* DMA address and size information */
                            dma_addr_t queue_dma_addr;
                            size_t queue_size_in_bytes;
                    } split;
    
    
    
                    /* Available for packed ring */
                    struct {
                            /* Actual memory layout for this queue. */
                            struct {
                                    unsigned int num;
                                    struct vring_packed_desc *desc;
                                    struct vring_packed_desc_event *driver;
                                    struct vring_packed_desc_event *device;
                            } vring;
    
                            /* Driver ring wrap counter. */
                            bool avail_wrap_counter;
    
                            /* Device ring wrap counter. */
                            bool used_wrap_counter;
    
                            /* Avail used flags. */
                            u16 avail_used_flags;
    
                            /* Index of the next avail descriptor. */
                            u16 next_avail_idx;
    
                            /*
                             * Last written value to driver->flags in
                             * guest byte order.
                             */
                            u16 event_flags_shadow;
    
                            /* Per-descriptor state. */
                            struct vring_desc_state_packed *desc_state;
                            struct vring_desc_extra_packed *desc_extra;
    
                            /* DMA address and size information */
                            dma_addr_t ring_dma_addr;
                            dma_addr_t driver_event_dma_addr;
                            dma_addr_t device_event_dma_addr;
                            size_t ring_size_in_bytes;
                            size_t event_size_in_bytes;
                    } packed;
            };
    
            /* How to notify other side. FIXME: commonalize hcalls! */
            bool (*notify)(struct virtqueue *vq);
    
            /* DMA, allocation, and size information */
            bool we_own_ring;
    
    #ifdef DEBUG
            /* They're supposed to lock for us. */
            unsigned int in_use;
    
            /* Figure out if their kicks are too delayed. */
            bool last_add_time_valid;
            ktime_t last_add_time;
    #endif
    };

    virtqueue创建 + DMA地址

    struct virtqueue *vring_create_virtqueue(
        unsigned int index,
        unsigned int num,
        unsigned int vring_align,
        struct virtio_device *vdev,
        bool weak_barriers,
        bool may_reduce_num,
        bool context,
        bool (*notify)(struct virtqueue *),
        void (*callback)(struct virtqueue *),
        const char *name)
    {
    
        if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
            return vring_create_virtqueue_packed(index, num, vring_align,
                    vdev, weak_barriers, may_reduce_num,
                    context, notify, callback, name);
    
        return vring_create_virtqueue_split(index, num, vring_align,
                vdev, weak_barriers, may_reduce_num,
                context, notify, callback, name);
    }
    
    
    
    
    
    
    
    
    
    dma_addr_t virtqueue_get_desc_addr(struct virtqueue *_vq)
    {
        struct vring_virtqueue *vq = to_vvq(_vq);
        BUG_ON(!vq->we_own_ring);
        if (vq->packed_ring)
            return vq->packed.ring_dma_addr;
        return vq->split.queue_dma_addr;
    }
    
    
    
     
    
    
    
    
    static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index,
                      void (*callback)(struct virtqueue *vq),
                      const char *name, bool ctx)
    {
        struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
        struct virtio_mmio_vq_info *info;
        struct virtqueue *vq;
        unsigned long flags;
        unsigned int num;
        int err;
    
        if (!name)
            return NULL;
    
        /* Select the queue we're interested in */
        writel(index, vm_dev->base + VIRTIO_MMIO_QUEUE_SEL);
    
        /* Queue shouldn't already be set up. */
        if (readl(vm_dev->base + (vm_dev->version == 1 ?
                VIRTIO_MMIO_QUEUE_PFN : VIRTIO_MMIO_QUEUE_READY))) {
            err = -ENOENT;
            goto error_available;
        }
    
        /* Allocate and fill out our active queue description */
        info = kmalloc(sizeof(*info), GFP_KERNEL);
        if (!info) {
            err = -ENOMEM;
            goto error_kmalloc;
        }
    
        num = readl(vm_dev->base + VIRTIO_MMIO_QUEUE_NUM_MAX);
        if (num == 0) {
            err = -ENOENT;
            goto error_new_virtqueue;
        }
    
        /* Create the vring */
        vq = vring_create_virtqueue(index, num, VIRTIO_MMIO_VRING_ALIGN, vdev,
                     true, true, ctx, vm_notify, callback, name);
        if (!vq) {
            err = -ENOMEM;
            goto error_new_virtqueue;
        }
    
        /* Activate the queue */
        writel(virtqueue_get_vring_size(vq), vm_dev->base + VIRTIO_MMIO_QUEUE_NUM);
        if (vm_dev->version == 1) {
            u64 q_pfn = virtqueue_get_desc_addr(vq) >> PAGE_SHIFT;
    
            /*
             * virtio-mmio v1 uses a 32bit QUEUE PFN. If we have something
             * that doesn't fit in 32bit, fail the setup rather than
             * pretending to be successful.
             */
            if (q_pfn >> 32) {
                dev_err(&vdev->dev,
                    "platform bug: legacy virtio-mmio must not be used with RAM above 0x%llxGB
    ",
                    0x1ULL << (32 + PAGE_SHIFT - 30));
                err = -E2BIG;
                goto error_bad_pfn;
            }
    
            writel(PAGE_SIZE, vm_dev->base + VIRTIO_MMIO_QUEUE_ALIGN);
            writel(q_pfn, vm_dev->base + VIRTIO_MMIO_QUEUE_PFN);
        } else {
            u64 addr;
    
            addr = virtqueue_get_desc_addr(vq);
            writel((u32)addr, vm_dev->base + VIRTIO_MMIO_QUEUE_DESC_LOW);
            writel((u32)(addr >> 32),
                    vm_dev->base + VIRTIO_MMIO_QUEUE_DESC_HIGH);
    
            addr = virtqueue_get_avail_addr(vq);
            writel((u32)addr, vm_dev->base + VIRTIO_MMIO_QUEUE_AVAIL_LOW);
            writel((u32)(addr >> 32),
                    vm_dev->base + VIRTIO_MMIO_QUEUE_AVAIL_HIGH);
    
            addr = virtqueue_get_used_addr(vq);
            writel((u32)addr, vm_dev->base + VIRTIO_MMIO_QUEUE_USED_LOW);
            writel((u32)(addr >> 32),
                    vm_dev->base + VIRTIO_MMIO_QUEUE_USED_HIGH);
    
            writel(1, vm_dev->base + VIRTIO_MMIO_QUEUE_READY);
        }
    
        vq->priv = info;
        info->vq = vq;
    
        spin_lock_irqsave(&vm_dev->lock, flags);
        list_add(&info->node, &vm_dev->virtqueues);
        spin_unlock_irqrestore(&vm_dev->lock, flags);
    
        return vq;
    
    
    }
    linux kernel
    static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs,
                   struct virtqueue *vqs[],
                   vq_callback_t *callbacks[],
                   const char * const names[],
                   const bool *ctx,
                   struct irq_affinity *desc)
    {
        
        err = request_irq(irq, vm_interrupt, IRQF_SHARED,
                dev_name(&vdev->dev), vm_dev);
        if (err)
            return err;
    
        for (i = 0; i < nvqs; ++i) {
            if (!names[i]) {
                vqs[i] = NULL;
                continue;
            }
    
            vqs[i] = vm_setup_vq(vdev, queue_idx++, callbacks[i], names[i],
                         ctx ? ctx[i] : false);
            if (IS_ERR(vqs[i])) {
                vm_del_vqs(vdev);
                return PTR_ERR(vqs[i]);
            }
        }
    
        return 0;
    }
    
    
    static struct virtqueue *vring_create_virtqueue_split(
        unsigned int index,
        unsigned int num,
        unsigned int vring_align,
        struct virtio_device *vdev,
        bool weak_barriers,
        bool may_reduce_num,
        bool context,
        bool (*notify)(struct virtqueue *),
        void (*callback)(struct virtqueue *),
        const char *name)
    {
        struct virtqueue *vq;
        void *queue = NULL;
        dma_addr_t dma_addr;
        size_t queue_size_in_bytes;
        struct vring vring;
    
     
        /* TODO: allocate each queue chunk individually */
        for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) {
            queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
                          &dma_addr,
                          GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
            if (queue)
                break;
            if (!may_reduce_num)
                return NULL;
        }
     
    
        queue_size_in_bytes = vring_size(num, vring_align);
        vring_init(&vring, num, queue, vring_align);
    
        vq = __vring_new_virtqueue(index, vring, vdev, weak_barriers, context,
                       notify, callback, name);
         
    
        return vq;
    }
    /* Only available for split ring */
    struct virtqueue *__vring_new_virtqueue(unsigned int index,
                        struct vring vring,
                        struct virtio_device *vdev,
                        bool weak_barriers,
                        bool context,
                        bool (*notify)(struct virtqueue *),
                        void (*callback)(struct virtqueue *),
                        const char *name)
    {
        unsigned int i;
        struct vring_virtqueue *vq;
    
        if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
            return NULL;
    
        vq = kmalloc(sizeof(*vq), GFP_KERNEL);
        if (!vq)
            return NULL;
    
        vq->packed_ring = false;
        vq->vq.callback = callback;
        vq->vq.vdev = vdev;
        vq->vq.name = name;
        vq->vq.num_free = vring.num;
        vq->vq.index = index;
        vq->we_own_ring = false;
        vq->notify = notify;
        vq->weak_barriers = weak_barriers;
        vq->broken = false;
        vq->last_used_idx = 0;
        vq->num_added = 0;
        vq->use_dma_api = vring_use_dma_api(vdev);
    #ifdef DEBUG
        vq->in_use = false;
        vq->last_add_time_valid = false;
    #endif
    
        vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
            !context;
        vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
    
        if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
            vq->weak_barriers = false;
    
        vq->split.queue_dma_addr = 0;
        vq->split.queue_size_in_bytes = 0;
    
        vq->split.vring = vring;
        vq->split.avail_flags_shadow = 0;
        vq->split.avail_idx_shadow = 0;
    
        /* No callback?  Tell other side not to bother us. */
        if (!callback) {
            vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
            if (!vq->event)
                vq->split.vring.avail->flags = cpu_to_virtio16(vdev,
                        vq->split.avail_flags_shadow);
        }
    
        vq->split.desc_state = kmalloc_array(vring.num,
                sizeof(struct vring_desc_state_split), GFP_KERNEL);
        if (!vq->split.desc_state) {
            kfree(vq);
            return NULL;
        }
    
        /* Put everything in free lists. */
        vq->free_head = 0;
        for (i = 0; i < vring.num-1; i++)
            vq->split.vring.desc[i].next = cpu_to_virtio16(vdev, i + 1);
        memset(vq->split.desc_state, 0, vring.num *
                sizeof(struct vring_desc_state_split));
    
        list_add_tail(&vq->vq.list, &vdev->vqs);
        return &vq->vq;
    }
    
    static inline int virtqueue_add_split(struct virtqueue *_vq,
                          struct scatterlist *sgs[],
                          unsigned int total_sg,
                          unsigned int out_sgs,
                          unsigned int in_sgs,
                          void *data,
                          void *ctx,
                          gfp_t gfp)
    {
        struct vring_virtqueue *vq = to_vvq(_vq);
        struct scatterlist *sg;
        struct vring_desc *desc;
        unsigned int i, n, avail, descs_used, prev, err_idx;
        int head;
        bool indirect;
    
        head = vq->free_head;
    
        if (virtqueue_use_indirect(_vq, total_sg))
            desc = alloc_indirect_split(_vq, total_sg, gfp);
        else {
            desc = NULL;
            WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect);
        }
    
        if (desc) {
            /* Use a single buffer which doesn't continue */
            indirect = true;
            /* Set up rest to use this indirect table. */
            i = 0;
            descs_used = 1;
        } else {
            indirect = false;
            desc = vq->split.vring.desc;
            i = head;
            descs_used = total_sg;
        }
    
        if (vq->vq.num_free < descs_used) {
            pr_debug("Can't add buf len %i - avail = %i
    ",
                 descs_used, vq->vq.num_free);
            /* FIXME: for historical reasons, we force a notify here if
             * there are outgoing parts to the buffer.  Presumably the
             * host should service the ring ASAP. */
            if (out_sgs)
                vq->notify(&vq->vq);
            if (indirect)
                kfree(desc);
            END_USE(vq);
            return -ENOSPC;
        }
    
        for (n = 0; n < out_sgs; n++) {
            for (sg = sgs[n]; sg; sg = sg_next(sg)) {
                dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE);
                if (vring_mapping_error(vq, addr))
                    goto unmap_release;
    
                desc[i].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT);
                desc[i].addr = cpu_to_virtio64(_vq->vdev, addr);
                desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length);
                prev = i;
                i = virtio16_to_cpu(_vq->vdev, desc[i].next);
            }
        }
        for (; n < (out_sgs + in_sgs); n++) {
            for (sg = sgs[n]; sg; sg = sg_next(sg)) {
                dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE);
                if (vring_mapping_error(vq, addr))
                    goto unmap_release;
    
                desc[i].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT | VRING_DESC_F_WRITE);
                desc[i].addr = cpu_to_virtio64(_vq->vdev, addr);
                desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length);
                prev = i;
                i = virtio16_to_cpu(_vq->vdev, desc[i].next);
            }
        }
        /* Last one doesn't continue. */
        desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT);
    
        if (indirect) {
            /* Now that the indirect table is filled in, map it. */
            dma_addr_t addr = vring_map_single(
                vq, desc, total_sg * sizeof(struct vring_desc),
                DMA_TO_DEVICE);
            if (vring_mapping_error(vq, addr))
                goto unmap_release;
    
            vq->split.vring.desc[head].flags = cpu_to_virtio16(_vq->vdev,
                    VRING_DESC_F_INDIRECT);
            vq->split.vring.desc[head].addr = cpu_to_virtio64(_vq->vdev,
                    addr);
    
            vq->split.vring.desc[head].len = cpu_to_virtio32(_vq->vdev,
                    total_sg * sizeof(struct vring_desc));
        }
    
        /* We're using some buffers from the free list. */
        vq->vq.num_free -= descs_used;
    
        /* Update free pointer */
        if (indirect)
            vq->free_head = virtio16_to_cpu(_vq->vdev,
                        vq->split.vring.desc[head].next);
        else
            vq->free_head = i;
    
        /* Store token and indirect buffer state. */
        vq->split.desc_state[head].data = data;
        if (indirect)
            vq->split.desc_state[head].indir_desc = desc;
        else
            vq->split.desc_state[head].indir_desc = ctx;
    
        /* Put entry in available array (but don't update avail->idx until they
         * do sync). */
        avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1);
        vq->split.vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head);
    
        /* Descriptors and available array need to be set before we expose the
         * new available array entries. */
        virtio_wmb(vq->weak_barriers);
        vq->split.avail_idx_shadow++;
        vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
                            vq->split.avail_idx_shadow);
        vq->num_added++;
    
    }

    qemu

    void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr)
    {
    if (!vdev->vq[n].vring.num) {
    return;
    }
    vdev->vq[n].vring.desc = addr;
    virtio_queue_update_rings(vdev, n);
    }
    
     

     https://blog.csdn.net/qq_33588730/article/details/105397879

  • 相关阅读:
    (转载)C++ string中find() ,rfind() 等函数 用法总结及示例
    UVA 230 Borrowers (STL 行读入的处理 重载小于号)
    UVA 12100 打印队列(STL deque)
    uva 12096 The SetStack Computer(STL set的各种库函数 交集 并集 插入迭代器)
    uva 1592 Database (STL)
    HDU 1087 Super Jumping! Jumping! Jumping!
    hdu 1176 免费馅饼
    HDU 1003 Max Sum
    转战HDU
    hust 1227 Join Together
  • 原文地址:https://www.cnblogs.com/dream397/p/14386024.html
Copyright © 2011-2022 走看看