zoukankan      html  css  js  c++  java
  • DPDK 网卡驱动学习

    DPDK版本19.02

    初始化:

    /* Launch threads, called at application init(). */
    int
    rte_eal_init(int argc, char **argv)
    {
        ...
        /* rte_eal_cpu_init() ->
         *     eal_cpu_core_id()
         *     eal_cpu_socket_id()
         * 读取/sys/devices/system/[cpu|node]
         * 设置lcore_config->[core_role|core_id|socket_id] */
        if (rte_eal_cpu_init() < 0) {
            rte_eal_init_alert("Cannot detect lcores.");
            rte_errno = ENOTSUP;
            return -1;
        }
    
        /* eal_parse_args() ->
         *     eal_parse_common_option() ->
         *         eal_parse_coremask()
         *         eal_parse_master_lcore()
         *         eal_parse_lcores()
         *     eal_adjust_config()
         * 解析-c、--master_lcore、--lcores参数
         * 在eal_parse_lcores()中确认可用的logical CPU
         * 在eal_adjust_config()中设置rte_config.master_lcore为0 (设置第一个lcore为MASTER lcore) */
        fctret = eal_parse_args(argc, argv);
        if (fctret < 0) {
            rte_eal_init_alert("Invalid 'command line' arguments.");
            rte_errno = EINVAL;
            rte_atomic32_clear(&run_once);
            return -1;
        }
        ...
        /* 初始化大页信息 */
        if (rte_eal_memory_init() < 0) {
            rte_eal_init_alert("Cannot init memory
    ");
            rte_errno = ENOMEM;
            return -1;
        }
        ...
        /* eal_thread_init_master() ->
         *     eal_thread_set_affinity()
         * 设置当前线程为MASTER lcore
         * 在eal_thread_set_affinity()中绑定MASTER lcore到logical CPU */
        eal_thread_init_master(rte_config.master_lcore);
        ...
        /* rte_bus_scan() ->
         *     rte_pci_scan() ->
         *         pci_scan_one() ->
         *             pci_parse_sysfs_resource()
         *             rte_pci_add_device()
         * 遍历rte_bus_list链表,调用每个bus的scan函数,pci为rte_pci_scan()
         * 遍历/sys/bus/pci/devices目录,为每个DBSF分配struct rte_pci_device
         * 逐行读取并解析每个DBSF的resource,保存到dev->mem_resource[i]
         * 将dev插入rte_pci_bus.device_list链表 */
        if (rte_bus_scan()) {
            rte_eal_init_alert("Cannot scan the buses for devices
    ");
            rte_errno = ENODEV;
            return -1;
        }
    
        /* pthread_create() ->
         *     eal_thread_loop() ->
         *         eal_thread_set_affinity()
         * 为每个SLAVE lcore创建线程,线程函数为eal_thread_loop()
         * 在eal_thread_set_affinity()中绑定SLAVE lcore到logical CPU */
        RTE_LCORE_FOREACH_SLAVE(i) {
    
            /*
             * create communication pipes between master thread
             * and children
             */
            /* MASTER lcore创建pipes用于MASTER和SLAVE lcore间通信(父子线程间通信) */
            if (pipe(lcore_config[i].pipe_master2slave) < 0)
                rte_panic("Cannot create pipe
    ");
            if (pipe(lcore_config[i].pipe_slave2master) < 0)
                rte_panic("Cannot create pipe
    ");
    
            lcore_config[i].state = WAIT; /* 设置SLAVE lcore的状态为WAIT */
    
            /* create a thread for each lcore */
            ret = pthread_create(&lcore_config[i].thread_id, NULL,
                         eal_thread_loop, NULL);
            ...
        }
    
        /*
         * Launch a dummy function on all slave lcores, so that master lcore
         * knows they are all ready when this function returns.
         */
        rte_eal_mp_remote_launch(sync_func, NULL, SKIP_MASTER);
        rte_eal_mp_wait_lcore();
        ...
        /* Probe all the buses and devices/drivers on them */
        /* rte_bus_probe() ->
         *     rte_pci_probe() ->
         *         pci_probe_all_drivers() ->
         *             rte_pci_probe_one_driver() ->
         *                 rte_pci_match()
         *                 rte_pci_map_device() ->
         *                     pci_uio_map_resource()
         *                 eth_ixgbe_pci_probe()
         * 遍历rte_bus_list链表,调用每个bus的probe函数,pci为rte_pci_probe()
         * rte_pci_probe()/pci_probe_all_drivers()分别遍历rte_pci_bus.device_list/driver_list链表,匹配设备和驱动
         * 映射BAR,调用驱动的probe函数,ixgbe为eth_ixgbe_pci_probe() */
        if (rte_bus_probe()) {
            rte_eal_init_alert("Cannot probe devices
    ");
            rte_errno = ENOTSUP;
            return -1;
        }
        ...
    }

    在dpdk 16.11是没有bud这一层抽象的,直接通过rte_eal_initàrte_eal_pci_init调用pci设备的初始化。

    也就是dpdk 16.11只支持pci这一种总线设备。但是到了dpdk17.11引入了bus的概念。在rte_bus_scan 进行bus的初始化

     1 /* Scan all the buses for registered devices */
     2 int
     3 rte_bus_scan(void)
     4 {
     5     int ret;
     6     struct rte_bus *bus = NULL;
     7 
     8     TAILQ_FOREACH(bus, &rte_bus_list, next) {
     9         ret = bus->scan();
    10         if (ret)
    11             RTE_LOG(ERR, EAL, "Scan for (%s) bus failed.
    ",
    12                 bus->name);
    13     }
    14 
    15     return 0;
    16 }

    这个函数会调用rte_bus_list上注册的所有bus的scan函数,这些bus是通过rte_bus_register函数注册上去的,而宏RTE_REGISTER_BUS又是rte_bus_register的封装。

    重要结构体

    rte_bus_list

     1 struct rte_bus {
     2     TAILQ_ENTRY(rte_bus) next;   /**< Next bus object in linked list */
     3     const char *name;            /**< Name of the bus */
     4     rte_bus_scan_t scan;         /**< Scan for devices attached to bus */
     5     rte_bus_probe_t probe;       /**< Probe devices on bus */
     6     rte_bus_find_device_t find_device; /**< Find a device on the bus */
     7     rte_bus_plug_t plug;         /**< Probe single device for drivers */
     8     rte_bus_unplug_t unplug;     /**< Remove single device from driver */
     9     rte_bus_parse_t parse;       /**< Parse a device name */
    10     struct rte_bus_conf conf;    /**< Bus configuration */
    11 };
    12 
    13 TAILQ_HEAD(rte_bus_list, rte_bus);
    14 
    15 #define    TAILQ_HEAD(name, type)                        
    16 struct name {                                
    17     struct type *tqh_first;    /* first element */            
    18     struct type **tqh_last;    /* addr of last next element */        
    19 }
    20 
    21 /* 定义rte_bus_list */
    22 struct rte_bus_list rte_bus_list =
    23     TAILQ_HEAD_INITIALIZER(rte_bus_list);

    注册pci bus

    将rte_pci_bus插入rte_bus_list链表

     1 struct rte_pci_bus {
     2     struct rte_bus bus;               /**< Inherit the generic class */
     3     struct rte_pci_device_list device_list;  /**< List of PCI devices */
     4     struct rte_pci_driver_list driver_list;  /**< List of PCI drivers */
     5 };
     6 
     7 /* 定义rte_pci_bus */
     8 struct rte_pci_bus rte_pci_bus = {
     9     .bus = {
    10         .scan = rte_pci_scan,
    11         .probe = rte_pci_probe,
    12         .find_device = pci_find_device,
    13         .plug = pci_plug,
    14         .unplug = pci_unplug,
    15         .parse = pci_parse,
    16     },
    17     .device_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.device_list),
    18     .driver_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.driver_list),
    19 };
    20 
    21 RTE_REGISTER_BUS(pci, rte_pci_bus.bus);
    22 
    23 #define RTE_REGISTER_BUS(nm, bus) 
    24 RTE_INIT_PRIO(businitfn_ ##nm, 101);  /* 声明为gcc构造函数,先于main()执行 */
    25 static void businitfn_ ##nm(void) 
    26 {
    27     (bus).name = RTE_STR(nm);
    28     rte_bus_register(&bus); 
    29 }
    30 
    31 void
    32 rte_bus_register(struct rte_bus *bus)
    33 {
    34     RTE_VERIFY(bus);
    35     RTE_VERIFY(bus->name && strlen(bus->name));
    36     /* A bus should mandatorily have the scan implemented */
    37     RTE_VERIFY(bus->scan);
    38     RTE_VERIFY(bus->probe);
    39     RTE_VERIFY(bus->find_device);
    40     /* Buses supporting driver plug also require unplug. */
    41     RTE_VERIFY(!bus->plug || bus->unplug);
    42 
    43     /* 将rte_pci_bus.bus插入rte_bus_list链表 */
    44     TAILQ_INSERT_TAIL(&rte_bus_list, bus, next);
    45     RTE_LOG(DEBUG, EAL, "Registered [%s] bus.
    ", bus->name);
    46 }

    设备初始化过程:

    pci设备的初始化是通过以下路径完成的:rte_eal_inità rte_bus_scan->rte_pci_scan。而对应驱动的加载则是通过如下调用完成的:rte_eal_init->rte_bus_probe->rte_pci_probe完成的

    注册pci driver

    将rte_ixgbe_pmd插入rte_pci_bus.driver_list链表

     1 struct rte_pci_driver {
     2     TAILQ_ENTRY(rte_pci_driver) next;       /**< Next in list. */
     3     struct rte_driver driver;               /**< Inherit core driver. */
     4     struct rte_pci_bus *bus;                /**< PCI bus reference. */
     5     pci_probe_t *probe;                     /**< Device Probe function. */
     6     pci_remove_t *remove;                   /**< Device Remove function. */
     7     const struct rte_pci_id *id_table;    /**< ID table, NULL terminated. */
     8     uint32_t drv_flags;                     /**< Flags contolling handling of device. */
     9 };
    10 
    11 /* 定义rte_ixgbe_pmd */
    12 static struct rte_pci_driver rte_ixgbe_pmd = {
    13     .id_table = pci_id_ixgbe_map,
    14     .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC,
    15     .probe = eth_ixgbe_pci_probe,
    16     .remove = eth_ixgbe_pci_remove,
    17 };
    18 
    19 RTE_PMD_REGISTER_PCI(net_ixgbe, rte_ixgbe_pmd);
    20 
    21 #define RTE_PMD_REGISTER_PCI(nm, pci_drv) 
    22 RTE_INIT(pciinitfn_ ##nm);  /* 声明为gcc构造函数,先于main()执行 */
    23 static void pciinitfn_ ##nm(void) 
    24 {
    25     (pci_drv).driver.name = RTE_STR(nm);
    26     rte_pci_register(&pci_drv); 
    27 } 
    28 RTE_PMD_EXPORT_NAME(nm, __COUNTER__)
    29 
    30 void
    31 rte_pci_register(struct rte_pci_driver *driver)
    32 {
    33     /* 将rte_ixgbe_pmd插入rte_pci_bus.driver_list链表 */
    34     TAILQ_INSERT_TAIL(&rte_pci_bus.driver_list, driver, next);
    35     driver->bus = &rte_pci_bus;
    36 }

    eth_ixgbe_dev_init()

     1 static int
     2 eth_ixgbe_dev_init(struct rte_eth_dev *eth_dev)
     3 {
     4     ...
     5     eth_dev->dev_ops = &ixgbe_eth_dev_ops; /* 注册ixgbe_eth_dev_ops函数表 */
     6     eth_dev->rx_pkt_burst = &ixgbe_recv_pkts; /* burst收包函数 */
     7     eth_dev->tx_pkt_burst = &ixgbe_xmit_pkts; /* burst发包函数 */
     8     eth_dev->tx_pkt_prepare = &ixgbe_prep_pkts;
     9     ...
    10     hw->device_id = pci_dev->id.device_id; /* device_id */
    11     hw->vendor_id = pci_dev->id.vendor_id; /* vendor_id */
    12     hw->hw_addr = (void *)pci_dev->mem_resource[0].addr; /* mmap()得到的BAR的虚拟地址 */
    13     ...
    14     /* ixgbe_init_shared_code() ->
    15      *     ixgbe_set_mac_type()
    16      *     ixgbe_init_ops_82599()
    17      * 在ixgbe_set_mac_type()中根据vendor_id和device_id设置hw->mac.type,82599为ixgbe_mac_82599EB
    18      * 根据hw->mac.type调用对应的函数设置hw->mac.ops,82599为ixgbe_init_ops_82599() */
    19     diag = ixgbe_init_shared_code(hw);
    20     ...
    21     /* ixgbe_init_hw() ->
    22      *     ixgbe_call_func() ->
    23      *         ixgbe_init_hw_generic() ->
    24      *             ixgbe_reset_hw_82599() ->
    25      *                 ixgbe_get_mac_addr_generic()
    26      * 得到网卡的mac地址 */
    27     diag = ixgbe_init_hw(hw);
    28     ...
    29     ether_addr_copy((struct ether_addr *) hw->mac.perm_addr,
    30             &eth_dev->data->mac_addrs[0]); /* 复制网卡的mac地址到eth_dev->data->mac_addrs */
    31     ...
    32 }
    33 
    34 static const struct eth_dev_ops ixgbe_eth_dev_ops = {
    35     .dev_configure        = ixgbe_dev_configure,
    36     .dev_start            = ixgbe_dev_start,
    37     ...
    38     .rx_queue_setup       = ixgbe_dev_rx_queue_setup,
    39     ...
    40     .tx_queue_setup       = ixgbe_dev_tx_queue_setup,
    41     ...
    42 };

    eth_ixgbe_pci_probe()

     1 static int eth_ixgbe_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
     2     struct rte_pci_device *pci_dev)
     3 {
     4     return rte_eth_dev_pci_generic_probe(pci_dev,
     5         sizeof(struct ixgbe_adapter), eth_ixgbe_dev_init);
     6 }
     7 
     8 static inline int
     9 rte_eth_dev_pci_generic_probe(struct rte_pci_device *pci_dev,
    10     size_t private_data_size, eth_dev_pci_callback_t dev_init)
    11 {
    12     ...
    13     eth_dev = rte_eth_dev_pci_allocate(pci_dev, private_data_size);
    14     ...
    15     ret = dev_init(eth_dev); /* ixgbe为eth_ixgbe_dev_init() */
    16     ...
    17 }
    18 
    19 static inline struct rte_eth_dev *
    20 rte_eth_dev_pci_allocate(struct rte_pci_device *dev, size_t private_data_size)
    21 {
    22     ...
    23     /* rte_eth_dev_allocate() ->
    24      *     rte_eth_dev_find_free_port()
    25      *     rte_eth_dev_data_alloc()
    26      *     eth_dev_get() */
    27     eth_dev = rte_eth_dev_allocate(name);
    28     ...
    29     /* 分配private data,ixgbe为struct ixgbe_adapter */
    30     eth_dev->data->dev_private = rte_zmalloc_socket(name,
    31         private_data_size, RTE_CACHE_LINE_SIZE,
    32         dev->device.numa_node);
    33     ...
    34 }
    35 
    36 struct rte_eth_dev *
    37 rte_eth_dev_allocate(const char *name)
    38 {
    39     ...
    40     /* 遍历rte_eth_devices数组,找到一个空闲的设备 */
    41     port_id = rte_eth_dev_find_free_port();
    42     ...
    43     /* 分配rte_eth_dev_data数组 */
    44     rte_eth_dev_data_alloc();
    45     ...
    46     /* 设置port_id对应的设备的state为RTE_ETH_DEV_ATTACHED */
    47     eth_dev = eth_dev_get(port_id);
    48     ...
    49 }

    ixgbe_recv_pkts()

    接收时回写:
    1、网卡使用DMA写Rx FIFO中的Frame到Rx Ring Buffer中的mbuf,设置desc的DD为1
    2、网卡驱动取走mbuf后,设置desc的DD为0,更新RDT

     1 uint16_t
     2 ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
     3         uint16_t nb_pkts)
     4 {
     5     ...
     6     nb_rx = 0;
     7     nb_hold = 0;
     8     rxq = rx_queue;
     9     rx_id = rxq->rx_tail; /* 相当于ixgbe的next_to_clean */
    10     rx_ring = rxq->rx_ring;
    11     sw_ring = rxq->sw_ring;
    12     ...
    13     while (nb_rx < nb_pkts) {
    14         ...
    15         /* 得到rx_tail指向的desc的指针 */
    16         rxdp = &rx_ring[rx_id];
    17         /* 若网卡回写的DD为0,跳出循环 */
    18         staterr = rxdp->wb.upper.status_error;
    19         if (!(staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
    20             break;
    21         /* 得到rx_tail指向的desc */
    22         rxd = *rxdp;
    23         ...
    24         /* 分配新mbuf */
    25         nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
    26         ...
    27         nb_hold++; /* 统计接收的mbuf数 */
    28         rxe = &sw_ring[rx_id]; /* 得到旧mbuf */
    29         rx_id++; /* 得到下一个desc的index,注意是一个环形缓冲区 */
    30         if (rx_id == rxq->nb_rx_desc)
    31             rx_id = 0;
    32         ...
    33         rte_ixgbe_prefetch(sw_ring[rx_id].mbuf); /* 预取下一个mbuf */
    34         ...
    35         if ((rx_id & 0x3) == 0) {
    36             rte_ixgbe_prefetch(&rx_ring[rx_id]);
    37             rte_ixgbe_prefetch(&sw_ring[rx_id]);
    38         }
    39         ...
    40         rxm = rxe->mbuf; /* rxm指向旧mbuf */
    41         rxe->mbuf = nmb; /* rxe->mbuf指向新mbuf */
    42         dma_addr =
    43             rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(nmb)); /* 得到新mbuf的总线地址 */
    44         rxdp->read.hdr_addr = 0; /* 清零新mbuf对应的desc的DD,后续网卡会读desc */
    45         rxdp->read.pkt_addr = dma_addr; /* 设置新mbuf对应的desc的总线地址,后续网卡会读desc */
    46         ...
    47         pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
    48                       rxq->crc_len); /* 包长 */
    49         rxm->data_off = RTE_PKTMBUF_HEADROOM;
    50         rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
    51         rxm->nb_segs = 1;
    52         rxm->next = NULL;
    53         rxm->pkt_len = pkt_len;
    54         rxm->data_len = pkt_len;
    55         rxm->port = rxq->port_id;
    56         ...
    57         if (likely(pkt_flags & PKT_RX_RSS_HASH)) /* RSS */
    58             rxm->hash.rss = rte_le_to_cpu_32(
    59                         rxd.wb.lower.hi_dword.rss);
    60         else if (pkt_flags & PKT_RX_FDIR) { /* FDIR */
    61             rxm->hash.fdir.hash = rte_le_to_cpu_16(
    62                     rxd.wb.lower.hi_dword.csum_ip.csum) &
    63                     IXGBE_ATR_HASH_MASK;
    64             rxm->hash.fdir.id = rte_le_to_cpu_16(
    65                     rxd.wb.lower.hi_dword.csum_ip.ip_id);
    66         }
    67         ...
    68         rx_pkts[nb_rx++] = rxm; /* 将旧mbuf放入rx_pkts数组 */
    69     }
    70     rxq->rx_tail = rx_id; /* rx_tail指向下一个desc */
    71     ...
    72     nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
    73     /* 若已处理的mbuf数大于上限(默认为32),更新RDT */
    74     if (nb_hold > rxq->rx_free_thresh) {
    75         ...
    76         rx_id = (uint16_t) ((rx_id == 0) ?
    77                      (rxq->nb_rx_desc - 1) : (rx_id - 1));
    78         IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id); /* 将rx_id写入RDT */
    79         nb_hold = 0; /* 清零nb_hold */
    80     }
    81     rxq->nb_rx_hold = nb_hold; /* 更新nb_rx_hold */
    82     return nb_rx;
    83 }

    ixgbe_xmit_pkts()

     1 uint16_t
     2 ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
     3         uint16_t nb_pkts)
     4 {
     5     ...
     6     txq = tx_queue;
     7     sw_ring = txq->sw_ring;
     8     txr     = txq->tx_ring;
     9     tx_id   = txq->tx_tail; /* 相当于ixgbe的next_to_use */
    10     txe = &sw_ring[tx_id]; /* 得到tx_tail指向的entry */
    11     txp = NULL;
    12     ...
    13     /* 若空闲的mbuf数小于下限(默认为32),清理空闲的mbuf */
    14     if (txq->nb_tx_free < txq->tx_free_thresh)
    15         ixgbe_xmit_cleanup(txq);
    16     ...
    17     /* TX loop */
    18     for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
    19         ...
    20         tx_pkt = *tx_pkts++; /* 待发送的mbuf */
    21         pkt_len = tx_pkt->pkt_len; /* 待发送的mbuf的长度 */
    22         ...
    23         nb_used = (uint16_t)(tx_pkt->nb_segs + new_ctx); /* 使用的desc数 */
    24         ...
    25         tx_last = (uint16_t) (tx_id + nb_used - 1); /* tx_last指向最后一个desc */
    26         ...
    27         if (tx_last >= txq->nb_tx_desc) /* 注意是一个环形缓冲区 */
    28             tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
    29         ...
    30         if (nb_used > txq->nb_tx_free) {
    31             ...
    32             if (ixgbe_xmit_cleanup(txq) != 0) {
    33                 /* Could not clean any descriptors */
    34                 if (nb_tx == 0) /* 若是第一个包(未发包),return 0 */
    35                     return 0;
    36                 goto end_of_tx; /* 若非第一个包(已发包),停止发包,更新发送队列参数 */
    37             }
    38             ...
    39         }
    40         ...
    41         /* 每个包可能包含多个分段,m_seg指向第一个分段 */
    42         m_seg = tx_pkt;
    43         do {
    44             txd = &txr[tx_id]; /* desc */
    45             txn = &sw_ring[txe->next_id]; /* 下一个entry */
    46             ...
    47             txe->mbuf = m_seg; /* 将m_seg挂载到txe */
    48             ...
    49             slen = m_seg->data_len; /* m_seg的长度 */
    50             buf_dma_addr = rte_mbuf_data_dma_addr(m_seg); /* m_seg的总线地址 */
    51             txd->read.buffer_addr =
    52                 rte_cpu_to_le_64(buf_dma_addr); /* 总线地址赋给txd->read.buffer_addr */
    53             txd->read.cmd_type_len =
    54                 rte_cpu_to_le_32(cmd_type_len | slen); /* 长度赋给txd->read.cmd_type_len */
    55             ...
    56             txe->last_id = tx_last; /* last_id指向最后一个desc */
    57             tx_id = txe->next_id; /* tx_id指向下一个desc */
    58             txe = txn; /* txe指向下一个entry */
    59             m_seg = m_seg->next; /* m_seg指向下一个分段 */
    60         } while (m_seg != NULL);
    61         ...
    62         /* 最后一个分段 */
    63         cmd_type_len |= IXGBE_TXD_CMD_EOP;
    64         txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used); /* 更新nb_tx_used */
    65         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used); /* 更新nb_tx_free */
    66         ...
    67         if (txq->nb_tx_used >= txq->tx_rs_thresh) { /* 若使用的mbuf数大于上限(默认为32),设置RS */
    68             ...
    69             cmd_type_len |= IXGBE_TXD_CMD_RS;
    70             ...
    71             txp = NULL; /* txp为NULL表示已设置RS */
    72         } else
    73             txp = txd; /* txp非NULL表示未设置RS */
    74         ...
    75         txd->read.cmd_type_len |= rte_cpu_to_le_32(cmd_type_len);
    76     }
    77     ...
    78 end_of_tx:
    79     /* burst发包的最后一个包的最后一个分段 */
    80     ...
    81     if (txp != NULL) /* 若未设置RS,设置RS */
    82         txp->read.cmd_type_len |= rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
    83     ...
    84     IXGBE_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, tx_id); /* 将tx_id写入TDT */
    85     txq->tx_tail = tx_id; /* tx_tail指向下一个desc */
    86     ...
    87     return nb_tx;
    88 }

     

    rte_eth_rx/tx_burst()

     1 static inline uint16_t
     2 rte_eth_rx_burst(uint8_t port_id, uint16_t queue_id,
     3          struct rte_mbuf **rx_pkts, const uint16_t nb_pkts)
     4 {
     5     /* 得到port_id对应的设备 */
     6     struct rte_eth_dev *dev = &rte_eth_devices[port_id];
     7     ...
     8     /* ixgbe为ixgbe_recv_pkts() */
     9     int16_t nb_rx = (*dev->rx_pkt_burst)(dev->data->rx_queues[queue_id],
    10             rx_pkts, nb_pkts);
    11     ...
    12 }
    13 
    14 static inline uint16_t
    15 rte_eth_tx_burst(uint8_t port_id, uint16_t queue_id,
    16          struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
    17 {
    18     /* 得到port_id对应的设备 */
    19     struct rte_eth_dev *dev = &rte_eth_devices[port_id];
    20     ...
    21     /* ixgbe为ixgbe_xmit_pkts */
    22     return (*dev->tx_pkt_burst)(dev->data->tx_queues[queue_id], tx_pkts, nb_pkts);
    23 }

    1、从描述符中获取报文返回给应用层

            首先根据应用层最后一次获取报文的位置,进而从描述符队列找到待被应用层接收的描述符。此时会判断描述符中的status_error是否已经打上了dd标记,有dd标记说明dma控制器已经把报文放到mbuf中了。这里解释下dd标记,当dma控制器将接收到的报文保存到描述符指向的mbuf空间时,由dma控制器打上dd标记,表示dma控制器已经把报文放到mbuf中了。应用层在获取完报文后,需要清除dd标记。

            找到了描述符的位置,也就找到了mbuf空间。此时会根据描述符里面保存的信息,填充mbuf结构。例如填充报文的长度,vlanid, rss等信息。填充完mbuf后,将这个mbuf保存到应用层传进来的结构中,返回给应用层,这样应用层就获取到了这个报文。

     1 uint16_t eth_igb_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
     2 {
     3     while (nb_rx < nb_pkts) 
     4     {
     5         //从描述符队列中找到待被应用层最后一次接收的那个描述符位置
     6         rxdp = &rx_ring[rx_id];
     7         staterr = rxdp->wb.upper.status_error;
     8         //检查状态是否为dd, 不是则说明驱动还没有把报文放到接收队列,直接退出
     9         if (! (staterr & rte_cpu_to_le_32(E1000_RXD_STAT_DD)))
    10         {
    11             break;
    12         };
    13         //找到了描述符的位置,也就从软件队列中找到了mbuf
    14         rxe = &sw_ring[rx_id];
    15         rx_id++;
    16         rxm = rxe->mbuf;        
    17         //填充mbuf
    18         pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) - rxq->crc_len);
    19         rxm->data_off = RTE_PKTMBUF_HEADROOM;
    20         rxm->nb_segs = 1;
    21         rxm->pkt_len = pkt_len;
    22         rxm->data_len = pkt_len;
    23         rxm->port = rxq->port_id;
    24         rxm->hash.rss = rxd.wb.lower.hi_dword.rss;
    25         rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
    26         //保存到应用层
    27         rx_pkts[nb_rx++] = rxm;
    28     }
    29 }

    2、从内存池中获取新的mbuf告诉dma控制器

            当应用层从软件队列中获取到mbuf后, 需要重新从内存池申请一个mbuf空间,并将mbuf地址放到描述符队列中, 相当于告诉dma控制器,后续将收到的报文保存到这个新的mbuf中, 这也是狸猫换太子的过程。描述符是mbuf与dma控制器的中介,那dma控制器怎么知道描述符队列的地址呢?这在上一篇文章中已经介绍过了,将描述符队列的地址写入到了寄存器中,dma控制器通过读取寄存器就知道描述符队列的地址。

            需要注意的是,将mbuf的地址保存到描述符中,此时会将dd标记给清0,这样dma控制器就认为这个mbuf里面的内容已经被应用层接收了,收到新报文后可以重新放到这个mbuf中。

     1 uint16_t eth_igb_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
     2 {
     3     while (nb_rx < nb_pkts) 
     4     {
     5         //申请一个新的mbuf
     6         nmb = rte_rxmbuf_alloc(rxq->mb_pool);
     7         //因为原来的mbuf被应用层取走了。这里替换原来的软件队列mbuf,这样网卡收到报文后可以放到这个新的mbuf
     8         rxe->mbuf = nmb;
     9         dma_addr = rte_cpu_to_le_64(RTE_MBUF_DATA_DMA_ADDR_DEFAULT(nmb));
    10         //将mbuf地址保存到描述符中,相当于高速dma控制器mbuf的地址。
    11         rxdp->read.hdr_addr = dma_addr;            //这里会将dd标记清0
    12         rxdp->read.pkt_addr = dma_addr;            
    13     }
    14 }
  • 相关阅读:
    GCD 信号量使用记录
    使用AFNetWorking 上传文件/图片
    iOS 13 使用LaunchScreen.storyboard设置启动图注意事项
    clipsToBounds和masksToBounds的区别?
    react-native 单页面界面横屏(带导航栏的V5.0不支持,V4.0,V3.0支持)
    react-native 5.0导航栏配置
    使用SSZipArchive 注意事项
    iOS 相册照片heic (实况)
    react-native 集成Code-Push的常用命令
    Java基础知识学习02-运算符、循环语句、break、continue
  • 原文地址:https://www.cnblogs.com/mysky007/p/11219593.html
Copyright © 2011-2022 走看看