zoukankan      html  css  js  c++  java
  • dpdk中kni模块

    一,什么是kni,为什么要有kni

         Kni(Kernel NIC Interface)内核网卡接口,是DPDK允许用户态和内核态交换报文的解决方案,模拟了一个虚拟的网口,提供dpdk的应用程序和linux内核之间通讯。kni接口允许报文从用户态接收后转发到linu协议栈去。

         为什么要弄一个kni接口,虽然dpdk的高速转发性能很出色,但是也有自己的一些缺点,比如没有协议栈就是其中一项缺陷,当然也可能当时设计时就将没有将协议栈考虑进去,毕竟协议栈需要将报文转发处理,可能会使

        处理报文的能力大大降低。

       直接上图:

       

      上图是kni的mbuf使用流程图,也可以看出报文的流向,因为报文在代码中其实就是一个个内存指针。其中rx_q右边是用户态,左边是内核态。最后通过调用netif_rx()将报文送入linux协议栈,这其中需要将dpdk的mbuf转换成skb_buf。

       当linux向kni端口发送报文时,调用回调函数kni_net_tx(),然后报文经过转换之后发送到端口上。

       二:主要代码分析:

         1,和igb uio模块一样,kni模块分成内核以及用户态代码,内核模块在编译出来之后为rte_kni.ko,首先插入内核,dpdk提供了一个用户态的例子。首先看下kni内核模块代码:

           在kni_misc.c中,ko代码入口为

           

    module_init(kni_init);

         可以看到函数从kni_init进入:

         

     1 static int __init
     2 kni_init(void)
     3 {
     4     int rc;
     5 
     6     KNI_PRINT("######## DPDK kni module loading ########
    ");
     7 
     8     if (kni_parse_kthread_mode() < 0) {    //kni的线程模式、单线程还是多线程
     9         KNI_ERR("Invalid parameter for kthread_mode
    ");
    10         return -EINVAL;
    11     }
    12 
    13 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
    14     rc = register_pernet_subsys(&kni_net_ops);
    15 #else
    16     rc = register_pernet_gen_subsys(&kni_net_id, &kni_net_ops);
    17 #endif
    18     if (rc)
    19         return -EPERM;
    20 
    21     rc = misc_register(&kni_misc);
    22     if (rc != 0) {
    23         KNI_ERR("Misc registration failed
    ");
    24         goto out;
    25     }
    26 
    27     /* Configure the lo mode according to the input parameter */
    28     kni_net_config_lo_mode(lo_mode);
    29 
    30     KNI_PRINT("######## DPDK kni module loaded  ########
    ");
    31 
    32     return 0;
    33 
    34 out:
    35 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
    36     unregister_pernet_subsys(&kni_net_ops);
    37 #else
    38     register_pernet_gen_subsys(&kni_net_id, &kni_net_ops);
    39 #endif
    40     return rc;
    41 }

         代码比较简单,首先选择kni的线程模式,分为单线程还是多线程,所谓单线程是指所有的kni端口收发都由一个线程守护,多线程只是每一个kni端口分为由一个线程守护,这部分是在插入模块时带入参数选择。

         接着调用注册函数misc_register,将kni注册为一个混杂设备。其中kni_misc结构体里面定义了该混杂设备的一些操作

        

    1 static struct miscdevice kni_misc = {
    2     .minor = MISC_DYNAMIC_MINOR,
    3     .name = KNI_DEVICE,
    4     .fops = &kni_fops,
    5 };

        这里主要看.fops里面的结构体

    1 static struct file_operations kni_fops = {
    2     .owner = THIS_MODULE,
    3     .open = kni_open,
    4     .release = kni_release,
    5     .unlocked_ioctl = (void *)kni_ioctl,
    6     .compat_ioctl = (void *)kni_compat_ioctl,
    7 };

       这里涉及的主要操作有kni_open,kni_release,以及kni_ioctl,分别对应几个函数

     1 static int
     2 kni_open(struct inode *inode, struct file *file)
     3 {
     4     struct net *net = current->nsproxy->net_ns;
     5     struct kni_net *knet = net_generic(net, kni_net_id);
     6 
     7     /* kni device can be opened by one user only per netns */
     8     if (test_and_set_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use))
     9         return -EBUSY;
    10 
    11     /* Create kernel thread for single mode */
    12     if (multiple_kthread_on == 0) {
    13         KNI_PRINT("Single kernel thread for all KNI devices
    ");
    14         /* Create kernel thread for RX */
    15         knet->kni_kthread = kthread_run(kni_thread_single, (void *)knet,
    16                         "kni_single");
    17         if (IS_ERR(knet->kni_kthread)) {
    18             KNI_ERR("Unable to create kernel threaed
    ");
    19             return PTR_ERR(knet->kni_kthread);
    20         }
    21     } else
    22         KNI_PRINT("Multiple kernel thread mode enabled
    ");
    23 
    24     file->private_data = get_net(net);
    25     KNI_PRINT("/dev/kni opened
    ");
    26 
    27     return 0;
    28 }

         kni_open时如果是单线程模式则会创建一个内核线程,并打开dev/kni,这个时候在host的dev下能看到kni文件夹

     1 static int
     2 kni_ioctl(struct inode *inode,
     3     unsigned int ioctl_num,
     4     unsigned long ioctl_param)
     5 {
     6     int ret = -EINVAL;
     7     struct net *net = current->nsproxy->net_ns;
     8 
     9     KNI_DBG("IOCTL num=0x%0x param=0x%0lx
    ", ioctl_num, ioctl_param);
    10 
    11     /*
    12      * Switch according to the ioctl called
    13      */
    14     switch (_IOC_NR(ioctl_num)) {
    15     case _IOC_NR(RTE_KNI_IOCTL_TEST):
    16         /* For test only, not used */
    17         break;
    18     case _IOC_NR(RTE_KNI_IOCTL_CREATE):
    19         ret = kni_ioctl_create(net, ioctl_num, ioctl_param);
    20         break;
    21     case _IOC_NR(RTE_KNI_IOCTL_RELEASE):
    22         ret = kni_ioctl_release(net, ioctl_num, ioctl_param);
    23         break;
    24     default:
    25         KNI_DBG("IOCTL default
    ");
    26         break;
    27     }
    28 
    29     return ret;
    30 }

    kni_ioctl函数是与用户态通信的一个接口,主要是的是kni_ioctl_create函数:

      1 static int
      2 kni_ioctl_create(struct net *net,
      3         unsigned int ioctl_num, unsigned long ioctl_param)
      4 {
      5     struct kni_net *knet = net_generic(net, kni_net_id);
      6     int ret;
      7     struct rte_kni_device_info dev_info;
      8     struct pci_dev *pci = NULL;
      9     struct pci_dev *found_pci = NULL;
     10     struct net_device *net_dev = NULL;
     11     struct net_device *lad_dev = NULL;
     12     struct kni_dev *kni, *dev, *n;
     13 
     14     printk(KERN_INFO "KNI: Creating kni...
    ");
     15     /* Check the buffer size, to avoid warning */
     16     if (_IOC_SIZE(ioctl_num) > sizeof(dev_info))
     17         return -EINVAL;
     18 
     19     /* Copy kni info from user space */
     20     ret = copy_from_user(&dev_info, (void *)ioctl_param, sizeof(dev_info));
     21     if (ret) {
     22         KNI_ERR("copy_from_user in kni_ioctl_create");
     23         return -EIO;
     24     }
     25 
     26     /**
     27      * Check if the cpu core id is valid for binding,
     28      * for multiple kernel thread mode.
     29      */
     30     if (multiple_kthread_on && dev_info.force_bind &&
     31                 !cpu_online(dev_info.core_id)) {
     32         KNI_ERR("cpu %u is not online
    ", dev_info.core_id);
     33         return -EINVAL;
     34     }
     35 
     36     /* Check if it has been created */
     37     down_read(&knet->kni_list_lock);
     38     list_for_each_entry_safe(dev, n, &knet->kni_list_head, list) {
     39         if (kni_check_param(dev, &dev_info) < 0) {
     40             up_read(&knet->kni_list_lock);
     41             return -EINVAL;
     42         }
     43     }
     44     up_read(&knet->kni_list_lock);
     45 
     46     net_dev = alloc_netdev(sizeof(struct kni_dev), dev_info.name,
     47 #ifdef NET_NAME_UNKNOWN
     48                             NET_NAME_UNKNOWN,
     49 #endif
     50                             kni_net_init);
     51     if (net_dev == NULL) {
     52         KNI_ERR("error allocating device "%s"
    ", dev_info.name);
     53         return -EBUSY;
     54     }
     55 
     56     dev_net_set(net_dev, net);
     57 
     58     kni = netdev_priv(net_dev);
     59 
     60     kni->net_dev = net_dev;
     61     kni->group_id = dev_info.group_id;
     62     kni->core_id = dev_info.core_id;
     63     strncpy(kni->name, dev_info.name, RTE_KNI_NAMESIZE);
     64 
     65     /* Translate user space info into kernel space info */
     66     kni->tx_q = phys_to_virt(dev_info.tx_phys);
     67     kni->rx_q = phys_to_virt(dev_info.rx_phys);
     68     kni->alloc_q = phys_to_virt(dev_info.alloc_phys);
     69     kni->free_q = phys_to_virt(dev_info.free_phys);
     70 
     71     kni->req_q = phys_to_virt(dev_info.req_phys);
     72     kni->resp_q = phys_to_virt(dev_info.resp_phys);
     73     kni->sync_va = dev_info.sync_va;
     74     kni->sync_kva = phys_to_virt(dev_info.sync_phys);
     75 
     76     kni->mbuf_kva = phys_to_virt(dev_info.mbuf_phys);
     77     kni->mbuf_va = dev_info.mbuf_va;
     78 
     79 #ifdef RTE_KNI_VHOST
     80     kni->vhost_queue = NULL;
     81     kni->vq_status = BE_STOP;
     82 #endif
     83     kni->mbuf_size = dev_info.mbuf_size;
     84 
     85     KNI_PRINT("tx_phys:      0x%016llx, tx_q addr:      0x%p
    ",
     86         (unsigned long long) dev_info.tx_phys, kni->tx_q);
     87     KNI_PRINT("rx_phys:      0x%016llx, rx_q addr:      0x%p
    ",
     88         (unsigned long long) dev_info.rx_phys, kni->rx_q);
     89     KNI_PRINT("alloc_phys:   0x%016llx, alloc_q addr:   0x%p
    ",
     90         (unsigned long long) dev_info.alloc_phys, kni->alloc_q);
     91     KNI_PRINT("free_phys:    0x%016llx, free_q addr:    0x%p
    ",
     92         (unsigned long long) dev_info.free_phys, kni->free_q);
     93     KNI_PRINT("req_phys:     0x%016llx, req_q addr:     0x%p
    ",
     94         (unsigned long long) dev_info.req_phys, kni->req_q);
     95     KNI_PRINT("resp_phys:    0x%016llx, resp_q addr:    0x%p
    ",
     96         (unsigned long long) dev_info.resp_phys, kni->resp_q);
     97     KNI_PRINT("mbuf_phys:    0x%016llx, mbuf_kva:       0x%p
    ",
     98         (unsigned long long) dev_info.mbuf_phys, kni->mbuf_kva);
     99     KNI_PRINT("mbuf_va:      0x%p
    ", dev_info.mbuf_va);
    100     KNI_PRINT("mbuf_size:    %u
    ", kni->mbuf_size);
    101 
    102     KNI_DBG("PCI: %02x:%02x.%02x %04x:%04x
    ",
    103                     dev_info.bus,
    104                     dev_info.devid,
    105                     dev_info.function,
    106                     dev_info.vendor_id,
    107                     dev_info.device_id);
    108 
    109     pci = pci_get_device(dev_info.vendor_id, dev_info.device_id, NULL);
    110 
    111     /* Support Ethtool */
    112     while (pci) {
    113         KNI_PRINT("pci_bus: %02x:%02x:%02x 
    ",
    114                     pci->bus->number,
    115                     PCI_SLOT(pci->devfn),
    116                     PCI_FUNC(pci->devfn));
    117 
    118         if ((pci->bus->number == dev_info.bus) &&
    119             (PCI_SLOT(pci->devfn) == dev_info.devid) &&
    120             (PCI_FUNC(pci->devfn) == dev_info.function)) {
    121             found_pci = pci;
    122             switch (dev_info.device_id) {
    123             #define RTE_PCI_DEV_ID_DECL_IGB(vend, dev) case (dev):
    124             #include <rte_pci_dev_ids.h>
    125                 ret = igb_kni_probe(found_pci, &lad_dev);
    126                 break;
    127             #define RTE_PCI_DEV_ID_DECL_IXGBE(vend, dev) 
    128                             case (dev):
    129             #include <rte_pci_dev_ids.h>
    130                 ret = ixgbe_kni_probe(found_pci, &lad_dev);
    131                 break;
    132             default:
    133                 ret = -1;
    134                 break;
    135             }
    136 
    137             KNI_DBG("PCI found: pci=0x%p, lad_dev=0x%p
    ",
    138                             pci, lad_dev);
    139             if (ret == 0) {
    140                 kni->lad_dev = lad_dev;
    141                 kni_set_ethtool_ops(kni->net_dev);
    142             } else {
    143                 KNI_ERR("Device not supported by ethtool");
    144                 kni->lad_dev = NULL;
    145             }
    146 
    147             kni->pci_dev = found_pci;
    148             kni->device_id = dev_info.device_id;
    149             break;
    150         }
    151         pci = pci_get_device(dev_info.vendor_id,
    152                 dev_info.device_id, pci);
    153     }
    154     if (pci)
    155         pci_dev_put(pci);
    156 
    157     if (kni->lad_dev)
    158         memcpy(net_dev->dev_addr, kni->lad_dev->dev_addr, ETH_ALEN);
    159     else
    160         /*
    161          * Generate random mac address. eth_random_addr() is the newer
    162          * version of generating mac address in linux kernel.
    163          */
    164         //random_ether_addr(net_dev->dev_addr);
    165         memcpy(net_dev->dev_addr, &dev_info.kni_mac,ETH_ALEN);
    166     
    167 
    168     ret = register_netdev(net_dev);
    169     if (ret) {
    170         KNI_ERR("error %i registering device "%s"
    ",
    171                     ret, dev_info.name);
    172         kni_dev_remove(kni);
    173         return -ENODEV;
    174     }
    175 
    176 #ifdef RTE_KNI_VHOST
    177     kni_vhost_init(kni);
    178 #endif
    179 
    180     /**
    181      * Create a new kernel thread for multiple mode, set its core affinity,
    182      * and finally wake it up.
    183      */
    184     if (multiple_kthread_on) {
    185         kni->pthread = kthread_create(kni_thread_multiple,
    186                           (void *)kni,
    187                           "kni_%s", kni->name);
    188         if (IS_ERR(kni->pthread)) {
    189             kni_dev_remove(kni);
    190             return -ECANCELED;
    191         }
    192         if (dev_info.force_bind)
    193             kthread_bind(kni->pthread, kni->core_id);
    194         wake_up_process(kni->pthread);
    195     }
    196 
    197     down_write(&knet->kni_list_lock);
    198     list_add(&kni->list, &knet->kni_list_head);
    199     up_write(&knet->kni_list_lock);
    200 
    201     return 0;
    202 }

    ret = copy_from_user(&dev_info, (void *)ioctl_param, sizeof(dev_info));这条语句会拷贝从用户态传过来的消息,dev_info主要存放了虚拟kni网口的相关参数,接下来就会根据dev_info中的参数注册一个kni网口ret = register_netdev(net_dev);

    这个函数完成创建,这样就虚拟出一个网口出来。其中165行是自己修改的,因为我发现按照文档提供的方法根本不能ping通报文,我将生成kni的mac地址修改成dpdk接管的网口mac即可贯通。原生态代码是随时生成一个mac。

    2,用户态代码主要分析dpdk提供的example,

     1 int
     2 main(int argc, char** argv)
     3 {
     4     int ret;
     5     uint8_t nb_sys_ports, port;
     6     unsigned i;
     7 
     8     /* Associate signal_hanlder function with USR signals */
     9     signal(SIGUSR1, signal_handler);
    10     signal(SIGUSR2, signal_handler);
    11     signal(SIGRTMIN, signal_handler);
    12     signal(SIGINT, signal_handler);
    13 
    14     /* Initialise EAL */
    15     ret = rte_eal_init(argc, argv);
    16     if (ret < 0)
    17         rte_exit(EXIT_FAILURE, "Could not initialise EAL (%d)
    ", ret);
    18     argc -= ret;
    19     argv += ret;
    20 
    21     /* Parse application arguments (after the EAL ones) */
    22     ret = parse_args(argc, argv);
    23     if (ret < 0)
    24         rte_exit(EXIT_FAILURE, "Could not parse input parameters
    ");
    25 
    26     /* Create the mbuf pool */
    27     pktmbuf_pool = rte_pktmbuf_pool_create("mbuf_pool", NB_MBUF,
    28         MEMPOOL_CACHE_SZ, 0, MBUF_DATA_SZ, rte_socket_id());
    29     if (pktmbuf_pool == NULL) {
    30         rte_exit(EXIT_FAILURE, "Could not initialise mbuf pool
    ");
    31         return -1;
    32     }
    33 
    34     /* Get number of ports found in scan */
    35     nb_sys_ports = rte_eth_dev_count();
    36     if (nb_sys_ports == 0)
    37         rte_exit(EXIT_FAILURE, "No supported Ethernet device found
    ");
    38 
    39     /* Check if the configured port ID is valid */
    40     for (i = 0; i < RTE_MAX_ETHPORTS; i++)
    41         if (kni_port_params_array[i] && i >= nb_sys_ports)
    42             rte_exit(EXIT_FAILURE, "Configured invalid "
    43                         "port ID %u
    ", i);
    44 
    45     /* Initialize KNI subsystem */
    46     init_kni();
    47 
    48     /* Initialise each port */
    49     for (port = 0; port < nb_sys_ports; port++) {
    50         /* Skip ports that are not enabled */
    51         if (!(ports_mask & (1 << port)))
    52             continue;
    53         init_port(port);
    54 
    55         if (port >= RTE_MAX_ETHPORTS)
    56             rte_exit(EXIT_FAILURE, "Can not use more than "
    57                 "%d ports for kni
    ", RTE_MAX_ETHPORTS);
    58 
    59         kni_alloc(port);
    60     }
    61     check_all_ports_link_status(nb_sys_ports, ports_mask);
    62 
    63     /* Launch per-lcore function on every lcore */
    64     rte_eal_mp_remote_launch(main_loop, NULL, CALL_MASTER);
    65     RTE_LCORE_FOREACH_SLAVE(i) {
    66         if (rte_eal_wait_lcore(i) < 0)
    67             return -1;
    68     }
    69 
    70     /* Release resources */
    71     for (port = 0; port < nb_sys_ports; port++) {
    72         if (!(ports_mask & (1 << port)))
    73             continue;
    74         kni_free_kni(port);
    75     }
    76 #ifdef RTE_LIBRTE_XEN_DOM0
    77     rte_kni_close();
    78 #endif
    79     for (i = 0; i < RTE_MAX_ETHPORTS; i++)
    80         if (kni_port_params_array[i]) {
    81             rte_free(kni_port_params_array[i]);
    82             kni_port_params_array[i] = NULL;
    83         }
    84 
    85     return 0;
    86 }

          main函数进来进行一些eal的初始化,随后创建一个pktmbuf_pool,重点看一下init_kni();以及kni_alloc(port);rte_eal_mp_remote_launch(main_loop, NULL, CALL_MASTER);函数。其中init_kni()函数是初始化kni子系统

     1 static void
     2 init_kni(void)
     3 {
     4     unsigned int num_of_kni_ports = 0, i;
     5     struct kni_port_params **params = kni_port_params_array;
     6 
     7     /* Calculate the maximum number of KNI interfaces that will be used */
     8     for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
     9         if (kni_port_params_array[i]) {
    10             num_of_kni_ports += (params[i]->nb_lcore_k ?
    11                 params[i]->nb_lcore_k : 1);
    12         }
    13     }
    14 
    15     /* Invoke rte KNI init to preallocate the ports */
    16     rte_kni_init(num_of_kni_ports);
    17 }

    主要代码在rte_kni_init里面

      1 void
      2 rte_kni_init(unsigned int max_kni_ifaces)
      3 {
      4     uint32_t i;
      5     struct rte_kni_memzone_slot *it;
      6     const struct rte_memzone *mz;
      7 #define OBJNAMSIZ 32
      8     char obj_name[OBJNAMSIZ];
      9     char mz_name[RTE_MEMZONE_NAMESIZE];
     10 
     11     /* Immediately return if KNI is already initialized */
     12     if (kni_memzone_pool.initialized) {
     13         RTE_LOG(WARNING, KNI, "Double call to rte_kni_init()");
     14         return;
     15     }
     16 
     17     if (max_kni_ifaces == 0) {
     18         RTE_LOG(ERR, KNI, "Invalid number of max_kni_ifaces %d
    ",
     19                             max_kni_ifaces);
     20         rte_panic("Unable to initialize KNI
    ");
     21     }
     22 
     23     /* Check FD and open */
     24     if (kni_fd < 0) {
     25         kni_fd = open("/dev/" KNI_DEVICE, O_RDWR);
     26         if (kni_fd < 0)
     27             rte_panic("Can not open /dev/%s
    ", KNI_DEVICE);
     28     }
     29 
     30     /* Allocate slot objects */
     31     kni_memzone_pool.slots = (struct rte_kni_memzone_slot *)
     32                     rte_malloc(NULL,
     33                     sizeof(struct rte_kni_memzone_slot) *
     34                     max_kni_ifaces,
     35                     0);
     36     KNI_MEM_CHECK(kni_memzone_pool.slots == NULL);
     37 
     38     /* Initialize general pool variables */
     39     kni_memzone_pool.initialized = 1;
     40     kni_memzone_pool.max_ifaces = max_kni_ifaces;
     41     kni_memzone_pool.free = &kni_memzone_pool.slots[0];
     42     rte_spinlock_init(&kni_memzone_pool.mutex);
     43 
     44     /* Pre-allocate all memzones of all the slots; panic on error */
     45     for (i = 0; i < max_kni_ifaces; i++) {
     46 
     47         /* Recover current slot */
     48         it = &kni_memzone_pool.slots[i];
     49         it->id = i;
     50 
     51         /* Allocate KNI context */
     52         snprintf(mz_name, RTE_MEMZONE_NAMESIZE, "KNI_INFO_%d", i);
     53         mz = kni_memzone_reserve(mz_name, sizeof(struct rte_kni),
     54                     SOCKET_ID_ANY, 0);
     55         KNI_MEM_CHECK(mz == NULL);
     56         it->m_ctx = mz;
     57 
     58         /* TX RING */
     59         snprintf(obj_name, OBJNAMSIZ, "kni_tx_%d", i);
     60         mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,
     61                             SOCKET_ID_ANY, 0);
     62         KNI_MEM_CHECK(mz == NULL);
     63         it->m_tx_q = mz;
     64 
     65         /* RX RING */
     66         snprintf(obj_name, OBJNAMSIZ, "kni_rx_%d", i);
     67         mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,
     68                             SOCKET_ID_ANY, 0);
     69         KNI_MEM_CHECK(mz == NULL);
     70         it->m_rx_q = mz;
     71 
     72         /* ALLOC RING */
     73         snprintf(obj_name, OBJNAMSIZ, "kni_alloc_%d", i);
     74         mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,
     75                             SOCKET_ID_ANY, 0);
     76         KNI_MEM_CHECK(mz == NULL);
     77         it->m_alloc_q = mz;
     78 
     79         /* FREE RING */
     80         snprintf(obj_name, OBJNAMSIZ, "kni_free_%d", i);
     81         mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,
     82                             SOCKET_ID_ANY, 0);
     83         KNI_MEM_CHECK(mz == NULL);
     84         it->m_free_q = mz;
     85 
     86         /* Request RING */
     87         snprintf(obj_name, OBJNAMSIZ, "kni_req_%d", i);
     88         mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,
     89                             SOCKET_ID_ANY, 0);
     90         KNI_MEM_CHECK(mz == NULL);
     91         it->m_req_q = mz;
     92 
     93         /* Response RING */
     94         snprintf(obj_name, OBJNAMSIZ, "kni_resp_%d", i);
     95         mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,
     96                             SOCKET_ID_ANY, 0);
     97         KNI_MEM_CHECK(mz == NULL);
     98         it->m_resp_q = mz;
     99 
    100         /* Req/Resp sync mem area */
    101         snprintf(obj_name, OBJNAMSIZ, "kni_sync_%d", i);
    102         mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,
    103                             SOCKET_ID_ANY, 0);
    104         KNI_MEM_CHECK(mz == NULL);
    105         it->m_sync_addr = mz;
    106 
    107         if ((i+1) == max_kni_ifaces) {
    108             it->next = NULL;
    109             kni_memzone_pool.free_tail = it;
    110         } else
    111             it->next = &kni_memzone_pool.slots[i+1];
    112     }
    113 
    114     return;
    115 
    116 kni_fail:
    117     rte_panic("Unable to allocate memory for max_kni_ifaces:%d. Increase the amount of hugepages memory
    ",
    118              max_kni_ifaces);
    119 }

    对上图中所有的fifo分配内存。

     1 static int
     2 kni_alloc(uint8_t port_id)
     3 {
     4     uint8_t i;
     5     struct rte_kni *kni;
     6     struct rte_kni_conf conf;
     7     struct kni_port_params **params = kni_port_params_array;
     8 
     9     if (port_id >= RTE_MAX_ETHPORTS || !params[port_id])
    10         return -1;
    11 
    12     params[port_id]->nb_kni = params[port_id]->nb_lcore_k ?
    13                 params[port_id]->nb_lcore_k : 1;
    14 
    15     for (i = 0; i < params[port_id]->nb_kni; i++) {
    16         /* Clear conf at first */
    17         memset(&conf, 0, sizeof(conf));
    18         if (params[port_id]->nb_lcore_k) {
    19             snprintf(conf.name, RTE_KNI_NAMESIZE,
    20                     "vEth%u_%u", port_id, i);
    21             conf.core_id = params[port_id]->lcore_k[i];
    22             conf.force_bind = 1;
    23         } else
    24             snprintf(conf.name, RTE_KNI_NAMESIZE,
    25                         "vEth%u", port_id);
    26         conf.group_id = (uint16_t)port_id;
    27         conf.mbuf_size = MAX_PACKET_SZ;
    28         rte_eth_macaddr_get(port_id, (struct ether_addr *)&conf.kni_mac);
    29         /*
    30          * The first KNI device associated to a port
    31          * is the master, for multiple kernel thread
    32          * environment.
    33          */
    34         if (i == 0) {
    35             struct rte_kni_ops ops;
    36             struct rte_eth_dev_info dev_info;
    37 
    38             memset(&dev_info, 0, sizeof(dev_info));
    39             rte_eth_dev_info_get(port_id, &dev_info);
    40             conf.addr = dev_info.pci_dev->addr;
    41             conf.id = dev_info.pci_dev->id;
    42 
    43             memset(&ops, 0, sizeof(ops));
    44             ops.port_id = port_id;
    45             ops.change_mtu = kni_change_mtu;
    46             ops.config_network_if = kni_config_network_interface;
    47 
    48             kni = rte_kni_alloc(pktmbuf_pool, &conf, &ops);
    49         } else
    50             kni = rte_kni_alloc(pktmbuf_pool, &conf, NULL);
    51 
    52         if (!kni)
    53             rte_exit(EXIT_FAILURE, "Fail to create kni for "
    54                         "port: %d
    ", port_id);
    55         params[port_id]->kni[i] = kni;
    56     }
    57 
    58     return 0;
    59 }
      1 struct rte_kni *
      2 rte_kni_alloc(struct rte_mempool *pktmbuf_pool,
      3           const struct rte_kni_conf *conf,
      4           struct rte_kni_ops *ops)
      5 {
      6     int ret;
      7     struct rte_kni_device_info dev_info;
      8     struct rte_kni *ctx;
      9     char intf_name[RTE_KNI_NAMESIZE];
     10     char mz_name[RTE_MEMZONE_NAMESIZE];
     11     const struct rte_memzone *mz;
     12     const struct rte_mempool *mp;
     13     struct rte_kni_memzone_slot *slot = NULL;
     14 
     15     if (!pktmbuf_pool || !conf || !conf->name[0])
     16         return NULL;
     17 
     18     /* Check if KNI subsystem has been initialized */
     19     if (kni_memzone_pool.initialized != 1) {
     20         RTE_LOG(ERR, KNI, "KNI subsystem has not been initialized. Invoke rte_kni_init() first
    ");
     21         return NULL;
     22     }
     23 
     24     /* Get an available slot from the pool */
     25     slot = kni_memzone_pool_alloc();
     26     if (!slot) {
     27         RTE_LOG(ERR, KNI, "Cannot allocate more KNI interfaces; increase the number of max_kni_ifaces(current %d) or release unusued ones.
    ",
     28             kni_memzone_pool.max_ifaces);
     29         return NULL;
     30     }
     31 
     32     /* Recover ctx */
     33     ctx = slot->m_ctx->addr;
     34     snprintf(intf_name, RTE_KNI_NAMESIZE, "%s", conf->name);
     35 
     36     if (ctx->in_use) {
     37         RTE_LOG(ERR, KNI, "KNI %s is in use
    ", ctx->name);
     38         return NULL;
     39     }
     40     memset(ctx, 0, sizeof(struct rte_kni));
     41     if (ops)
     42         memcpy(&ctx->ops, ops, sizeof(struct rte_kni_ops));
     43 
     44     memset(&dev_info, 0, sizeof(dev_info));
     45     dev_info.bus = conf->addr.bus;
     46     dev_info.devid = conf->addr.devid;
     47     dev_info.function = conf->addr.function;
     48     dev_info.vendor_id = conf->id.vendor_id;
     49     dev_info.device_id = conf->id.device_id;
     50     dev_info.core_id = conf->core_id;
     51     dev_info.force_bind = conf->force_bind;
     52     dev_info.group_id = conf->group_id;
     53     dev_info.mbuf_size = conf->mbuf_size;
     54 
     55     snprintf(ctx->name, RTE_KNI_NAMESIZE, "%s", intf_name);
     56     snprintf(dev_info.name, RTE_KNI_NAMESIZE, "%s", intf_name);
     57 
     58     RTE_LOG(INFO, KNI, "pci: %02x:%02x:%02x 	 %02x:%02x
    ",
     59         dev_info.bus, dev_info.devid, dev_info.function,
     60             dev_info.vendor_id, dev_info.device_id);
     61     /* TX RING */
     62     mz = slot->m_tx_q;
     63     ctx->tx_q = mz->addr;
     64     kni_fifo_init(ctx->tx_q, KNI_FIFO_COUNT_MAX);
     65     dev_info.tx_phys = mz->phys_addr;
     66 
     67     /* RX RING */
     68     mz = slot->m_rx_q;
     69     ctx->rx_q = mz->addr;
     70     kni_fifo_init(ctx->rx_q, KNI_FIFO_COUNT_MAX);
     71     dev_info.rx_phys = mz->phys_addr;
     72 
     73     /* ALLOC RING */
     74     mz = slot->m_alloc_q;
     75     ctx->alloc_q = mz->addr;
     76     kni_fifo_init(ctx->alloc_q, KNI_FIFO_COUNT_MAX);
     77     dev_info.alloc_phys = mz->phys_addr;
     78 
     79     /* FREE RING */
     80     mz = slot->m_free_q;
     81     ctx->free_q = mz->addr;
     82     kni_fifo_init(ctx->free_q, KNI_FIFO_COUNT_MAX);
     83     dev_info.free_phys = mz->phys_addr;
     84 
     85     /* Request RING */
     86     mz = slot->m_req_q;
     87     ctx->req_q = mz->addr;
     88     kni_fifo_init(ctx->req_q, KNI_FIFO_COUNT_MAX);
     89     dev_info.req_phys = mz->phys_addr;
     90 
     91     /* Response RING */
     92     mz = slot->m_resp_q;
     93     ctx->resp_q = mz->addr;
     94     kni_fifo_init(ctx->resp_q, KNI_FIFO_COUNT_MAX);
     95     dev_info.resp_phys = mz->phys_addr;
     96 
     97     /* Req/Resp sync mem area */
     98     mz = slot->m_sync_addr;
     99     ctx->sync_addr = mz->addr;
    100     dev_info.sync_va = mz->addr;
    101     dev_info.sync_phys = mz->phys_addr;
    102 
    103 
    104     /* MBUF mempool */
    105     snprintf(mz_name, sizeof(mz_name), RTE_MEMPOOL_MZ_FORMAT,
    106         pktmbuf_pool->name);
    107     mz = rte_memzone_lookup(mz_name);
    108     KNI_MEM_CHECK(mz == NULL);
    109     mp = (struct rte_mempool *)mz->addr;
    110     /* KNI currently requires to have only one memory chunk */
    111     if (mp->nb_mem_chunks != 1)
    112         goto kni_fail;
    113 
    114     dev_info.mbuf_va = STAILQ_FIRST(&mp->mem_list)->addr;
    115     dev_info.mbuf_phys = STAILQ_FIRST(&mp->mem_list)->phys_addr;
    116     ctx->pktmbuf_pool = pktmbuf_pool;
    117     ctx->group_id = conf->group_id;
    118     ctx->slot_id = slot->id;
    119     ctx->mbuf_size = conf->mbuf_size;
    120 
    121     dev_info.kni_mac = conf->kni_mac;
    122 
    123     ret = ioctl(kni_fd, RTE_KNI_IOCTL_CREATE, &dev_info);
    124     KNI_MEM_CHECK(ret < 0);
    125 
    126     ctx->in_use = 1;
    127 
    128     /* Allocate mbufs and then put them into alloc_q */
    129     kni_allocate_mbufs(ctx);
    130 
    131     return ctx;
    132 
    133 kni_fail:
    134     if (slot)
    135         kni_memzone_pool_release(&kni_memzone_pool.slots[slot->id]);
    136 
    137     return NULL;
    138 }

    其中ret = ioctl(kni_fd, RTE_KNI_IOCTL_CREATE, &dev_info);就是讲dev_info传入内核。

     1 static int
     2 main_loop(__rte_unused void *arg)
     3 {
     4     uint8_t i, nb_ports = rte_eth_dev_count();
     5     int32_t f_stop;
     6     const unsigned lcore_id = rte_lcore_id();
     7     enum lcore_rxtx {
     8         LCORE_NONE,
     9         LCORE_RX,
    10         LCORE_TX,
    11         LCORE_MAX
    12     };
    13     enum lcore_rxtx flag = LCORE_NONE;
    14 
    15     for (i = 0; i < nb_ports; i++) {
    16         if (!kni_port_params_array[i])
    17             continue;
    18         if (kni_port_params_array[i]->lcore_rx == (uint8_t)lcore_id) {
    19             flag = LCORE_RX;
    20             break;
    21         } else if (kni_port_params_array[i]->lcore_tx ==
    22                         (uint8_t)lcore_id) {
    23             flag = LCORE_TX;
    24             break;
    25         }
    26     }
    27 
    28     if (flag == LCORE_RX) {
    29         RTE_LOG(INFO, APP, "Lcore %u is reading from port %d
    ",
    30                     kni_port_params_array[i]->lcore_rx,
    31                     kni_port_params_array[i]->port_id);
    32         while (1) {
    33             f_stop = rte_atomic32_read(&kni_stop);
    34             if (f_stop)
    35                 break;
    36             kni_ingress(kni_port_params_array[i]);
    37         }
    38     } else if (flag == LCORE_TX) {
    39         RTE_LOG(INFO, APP, "Lcore %u is writing to port %d
    ",
    40                     kni_port_params_array[i]->lcore_tx,
    41                     kni_port_params_array[i]->port_id);
    42         while (1) {
    43             f_stop = rte_atomic32_read(&kni_stop);
    44             if (f_stop)
    45                 break;
    46             kni_egress(kni_port_params_array[i]);
    47         }
    48     } else
    49         RTE_LOG(INFO, APP, "Lcore %u has nothing to do
    ", lcore_id);
    50 
    51     return 0;
    52 }

    进入循环收发包,

     1 static void
     2 kni_ingress(struct kni_port_params *p)
     3 {
     4     uint8_t i, port_id;
     5     unsigned nb_rx, num;
     6     uint32_t nb_kni;
     7     struct rte_mbuf *pkts_burst[PKT_BURST_SZ];
     8 
     9     if (p == NULL)
    10         return;
    11 
    12     nb_kni = p->nb_kni;
    13     port_id = p->port_id;
    14     for (i = 0; i < nb_kni; i++) {
    15         /* Burst rx from eth */
    16         nb_rx = rte_eth_rx_burst(port_id, 0, pkts_burst, PKT_BURST_SZ);
    17         if (unlikely(nb_rx > PKT_BURST_SZ)) {
    18             RTE_LOG(ERR, APP, "Error receiving from eth
    ");
    19             return;
    20         }
    21         /* Burst tx to kni */
    22         num = rte_kni_tx_burst(p->kni[i], pkts_burst, nb_rx);
    23         kni_stats[port_id].rx_packets += num;
    24         //if(kni_stats[port_id].rx_packets != 0 && kni_stats[port_id].rx_packets%20 == 0 && num > 0)
    25          //   printf("recv packet num : %"PRIu64"
    ",kni_stats[port_id].rx_packets);
    26         rte_kni_handle_request(p->kni[i]);
    27         if (unlikely(num < nb_rx)) {
    28             /* Free mbufs not tx to kni interface */
    29             kni_burst_free_mbufs(&pkts_burst[num], nb_rx - num);
    30             kni_stats[port_id].rx_dropped += nb_rx - num;
    31         }
    32     }
    33 }
     1 static void
     2 kni_egress(struct kni_port_params *p)
     3 {
     4     uint8_t i, port_id;
     5     unsigned nb_tx, num;
     6     uint32_t nb_kni;
     7     struct rte_mbuf *pkts_burst[PKT_BURST_SZ];
     8 
     9     if (p == NULL)
    10         return;
    11 
    12     nb_kni = p->nb_kni;
    13     port_id = p->port_id;
    14     for (i = 0; i < nb_kni; i++) {
    15         /* Burst rx from kni */
    16         num = rte_kni_rx_burst(p->kni[i], pkts_burst, PKT_BURST_SZ);
    17         if (unlikely(num > PKT_BURST_SZ)) {
    18             RTE_LOG(ERR, APP, "Error receiving from KNI
    ");
    19             return;
    20         }
    21         /* Burst tx to eth */
    22         nb_tx = rte_eth_tx_burst(port_id, 0, pkts_burst, (uint16_t)num);
    23         kni_stats[port_id].tx_packets += nb_tx;
    24         if (unlikely(nb_tx < num)) {
    25             /* Free mbufs not tx to NIC */
    26             kni_burst_free_mbufs(&pkts_burst[nb_tx], num - nb_tx);
    27             kni_stats[port_id].tx_dropped += num - nb_tx;
    28         }
    29     }
    30 }

    代码就守护在这个kni网口进行收发包。篇幅有限,后面再整理。

  • 相关阅读:
    Turn the corner
    全排列的递归算法
    全排列的递归算法
    二分   三分搜索
    二分   三分搜索
    理解 Linux 的硬链接与软链接
    一个 Linux 上分析死锁的简单方法
    char能表示(-128~127)
    UNIX网络编程——ioctl 函数的用法详解
    UNIX网络编程——原始套接字(dos攻击)
  • 原文地址:https://www.cnblogs.com/kb342/p/6033139.html
Copyright © 2011-2022 走看看