zoukankan      html  css  js  c++  java
  • DPDK l2fwd源码学习

    1. 主函数分析

      1 /* 命令行解析
      2  * 参数输入 ./l2fwd -c 0x3 -n 4 -- -p 3 -q 1
      3  * -c 为十六进制的分配的逻辑内核数量
      4  * -n 为十进制的内存通道数量,EAL参数和程序参数用--分开
      5  * -q 为分配给每个核心的收发队列数量(端口数量)
      6  * -p为十六进制的分配的端口数
      7  * -t 为可选默认10s打印时间间隔参数
      8 */
      9 int main(int argc, char **argv)
     10 {
     11     struct lcore_queue_conf *qconf;
     12     int ret;
     13     uint16_t nb_ports;
     14     uint16_t nb_ports_available = 0;
     15     uint16_t portid, last_port;
     16     unsigned lcore_id, rx_lcore_id;
     17     unsigned nb_ports_in_mask = 0;
     18     unsigned int nb_lcores = 0;
     19     unsigned int nb_mbufs;
     20 
     21     /* init EAL */
     22     /* 初始化EAL参数,并解析参数,系统函数getopt以及getopt_long,
     23      * 这些处理命令行参数的函数,处理到“--”时就会停止,分割参
     24      */
     25     ret = rte_eal_init(argc, argv);
     26     if (ret < 0)
     27         rte_exit(EXIT_FAILURE, "Invalid EAL arguments
    ");
     28     //argc减去EAL参数的同时,argv加上EAL的参数,保证解析程序参数的时候已经跳过了EAL参数
     29     argc -= ret;
     30     argv += ret;
     31 
     32     force_quit = false;
     33     signal(SIGINT, signal_handler);
     34     signal(SIGTERM, signal_handler);
     35 
     36     /* parse application arguments (after the EAL ones) */
     37     //解析l2fwd程序参数
     38     ret = l2fwd_parse_args(argc, argv);
     39     if (ret < 0)
     40         rte_exit(EXIT_FAILURE, "Invalid L2FWD arguments
    ");
     41 
     42     printf("MAC updating %s
    ", mac_updating ? "enabled" : "disabled");
     43 
     44     /* convert to number of cycles */
     45     //-t参数,打印时间间隔
     46     timer_period *= rte_get_timer_hz();
     47 
     48     nb_ports = rte_eth_dev_count_avail();
     49     if (nb_ports == 0)
     50         rte_exit(EXIT_FAILURE, "No Ethernet ports - bye
    ");
     51 
     52     /* check port mask to possible port mask */
     53     /*
     54      * DPDK运行时创建的大页内存中,创建报文内存池,
     55      * 其中socket不是套接字,是numa框架中的socket,
     56      * 每个socket都有数个node,每个node右包括数个core。
     57      * 每个socket都有自己的内存,每个socket里的处理器访问自己内存的速度最快,
     58      * 访问其他socket的内存则较慢。
     59     */
     60     if (l2fwd_enabled_port_mask & ~((1 << nb_ports) - 1))
     61         rte_exit(EXIT_FAILURE, "Invalid portmask; possible (0x%x)
    ",
     62             (1 << nb_ports) - 1);
     63 
     64     /* reset l2fwd_dst_ports */
     65     //设置二层转发目的端口
     66     for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++)
     67         l2fwd_dst_ports[portid] = 0;
     68     //初始化所有的目的端口为0
     69     last_port = 0;
     70 
     71     /*
     72      * Each logical core is assigned a dedicated TX queue on each port.
     73      */
     74     RTE_ETH_FOREACH_DEV(portid) {
     75         /* skip ports that are not enabled */
     76         /* l2fwd_enabled_port_mask 可用端口位掩码
     77          * 跳过未分配或是不可用端口。
     78          * 可用端口位掩码表示,左数第n位如果为1,表示端口n可用,如果左数第n位如果为0,表示端口n不可用。
     79          * 要得到第x位为1还是0,我们的方法是将1左移x位,得到一个只在x位为1,其他位都为0的数,再与位掩码相与。
     80          * 结果为1,那么第x位为1,结果位0,那么第x位为0.
     81         */
     82         if ((l2fwd_enabled_port_mask & (1 << portid)) == 0)
     83             continue;
     84         //此处,当输入端口数,即nb_ports为1时,dst_port[0] = 0;
     85         //此处,当输入端口数,即nb_ports为2时,dst_port[0] = 0,dst_port[2] = 1,dst_port[1] = 2;
     86         //此处,当输入端口数,即nb_ports为3时,dst_port[0] = 0,dst_port[2] = 1,dst_port[1] = 2;
     87         //此处,当输入端口数,即nb_ports为4时,....dst_port[4] = 3,dst_port[3] = 4;
     88         
     89         if (nb_ports_in_mask % 2) {
     90             l2fwd_dst_ports[portid] = last_port;
     91             l2fwd_dst_ports[last_port] = portid;
     92         }
     93         else
     94             last_port = portid;
     95 
     96         nb_ports_in_mask++;
     97     }
     98     if (nb_ports_in_mask % 2) {
     99         printf("Notice: odd number of ports in portmask.
    ");
    100         l2fwd_dst_ports[last_port] = last_port;
    101     }
    102 
    103     rx_lcore_id = 0;
    104     qconf = NULL;
    105 
    106     /* Initialize the port/queue configuration of each logical core */
    107     RTE_ETH_FOREACH_DEV(portid) {
    108         /* skip ports that are not enabled */
    109         if ((l2fwd_enabled_port_mask & (1 << portid)) == 0)
    110             continue;
    111 
    112         /* get the lcore_id for this port */
    113         //l2fwd_rx_queue_per_lcore即参数-q
    114         while (rte_lcore_is_enabled(rx_lcore_id) == 0 ||
    115                lcore_queue_conf[rx_lcore_id].n_rx_port ==
    116                l2fwd_rx_queue_per_lcore) {
    117             rx_lcore_id++;
    118             if (rx_lcore_id >= RTE_MAX_LCORE)
    119                 rte_exit(EXIT_FAILURE, "Not enough cores
    ");
    120         }
    121 
    122         if (qconf != &lcore_queue_conf[rx_lcore_id]) {
    123             /* Assigned a new logical core in the loop above. */
    124             qconf = &lcore_queue_conf[rx_lcore_id];
    125             nb_lcores++;
    126         }
    127 
    128         qconf->rx_port_list[qconf->n_rx_port] = portid;
    129         qconf->n_rx_port++;
    130         printf("Lcore %u: RX port %u
    ", rx_lcore_id, portid);
    131     }
    132 
    133     nb_mbufs = RTE_MAX(nb_ports * (nb_rxd + nb_txd + MAX_PKT_BURST +
    134         nb_lcores * MEMPOOL_CACHE_SIZE), 8192U);
    135 
    136     /* create the mbuf pool */
    137     l2fwd_pktmbuf_pool = rte_pktmbuf_pool_create("mbuf_pool", nb_mbufs,
    138         MEMPOOL_CACHE_SIZE, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
    139         rte_socket_id());
    140     if (l2fwd_pktmbuf_pool == NULL)
    141         rte_exit(EXIT_FAILURE, "Cannot init mbuf pool
    ");
    142 
    143     /* Initialise each port */
    144     RTE_ETH_FOREACH_DEV(portid) {
    145         struct rte_eth_rxconf rxq_conf;
    146         struct rte_eth_txconf txq_conf;
    147         struct rte_eth_conf local_port_conf = port_conf;
    148         struct rte_eth_dev_info dev_info;
    149 
    150         /* skip ports that are not enabled */
    151         if ((l2fwd_enabled_port_mask & (1 << portid)) == 0) {
    152             printf("Skipping disabled port %u
    ", portid);
    153             continue;
    154         }
    155         nb_ports_available++;
    156 
    157         /* init port */
    158         printf("Initializing port %u... ", portid);
    159         //清除读写缓冲区
    160         fflush(stdout);
    161 
    162         //配置端口,将一些配置写进设备dev的一些字段,以及检查设备支持什么类型的中断、支持的包大小
    163         ret = rte_eth_dev_info_get(portid, &dev_info);
    164         if (ret != 0)
    165             rte_exit(EXIT_FAILURE,
    166                 "Error during getting device (port %u) info: %s
    ",
    167                 portid, strerror(-ret));
    168 
    169         if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE)
    170             local_port_conf.txmode.offloads |=
    171                 DEV_TX_OFFLOAD_MBUF_FAST_FREE;
    172         ret = rte_eth_dev_configure(portid, 1, 1, &local_port_conf);
    173         if (ret < 0)
    174             rte_exit(EXIT_FAILURE, "Cannot configure device: err=%d, port=%u
    ",
    175                   ret, portid);
    176 
    177         ret = rte_eth_dev_adjust_nb_rx_tx_desc(portid, &nb_rxd,
    178                                &nb_txd);
    179         if (ret < 0)
    180             rte_exit(EXIT_FAILURE,
    181                  "Cannot adjust number of descriptors: err=%d, port=%u
    ",
    182                  ret, portid);
    183 
    184         //获取设备的MAC地址,存入l2fwd_ports_eth_addr[]数组,后续打印MAC地址
    185         ret = rte_eth_macaddr_get(portid,
    186                       &l2fwd_ports_eth_addr[portid]);
    187         if (ret < 0)
    188             rte_exit(EXIT_FAILURE,
    189                  "Cannot get MAC address: err=%d, port=%u
    ",
    190                  ret, portid);
    191 
    192         /* init one RX queue */
    193         //清除读写缓冲区
    194         fflush(stdout);
    195         rxq_conf = dev_info.default_rxconf;
    196         rxq_conf.offloads = local_port_conf.rxmode.offloads;
    197         //设置接收队列,nb_rxd指收取队列的大小,最大能够存储mbuf的数量
    198         ret = rte_eth_rx_queue_setup(portid, 0, nb_rxd,
    199                          rte_eth_dev_socket_id(portid),
    200                          &rxq_conf,
    201                          l2fwd_pktmbuf_pool);
    202         if (ret < 0)
    203             rte_exit(EXIT_FAILURE, "rte_eth_rx_queue_setup:err=%d, port=%u
    ",
    204                   ret, portid);
    205 
    206         /* init one TX queue on each port */
    207         fflush(stdout);
    208         txq_conf = dev_info.default_txconf;
    209         txq_conf.offloads = local_port_conf.txmode.offloads;
    210         //初始化一个发送队列,nb_txd指发送队列的大小,最大能够存储mbuf的数量
    211         ret = rte_eth_tx_queue_setup(portid, 0, nb_txd,
    212                 rte_eth_dev_socket_id(portid),
    213                 &txq_conf);
    214         if (ret < 0)
    215             rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup:err=%d, port=%u
    ",
    216                 ret, portid);
    217 
    218         /* Initialize TX buffers */
    219         //为每个端口分配接收缓冲区,根据numa架构的socket就近分配
    220         tx_buffer[portid] = rte_zmalloc_socket("tx_buffer",
    221                 RTE_ETH_TX_BUFFER_SIZE(MAX_PKT_BURST), 0,
    222                 rte_eth_dev_socket_id(portid));
    223         if (tx_buffer[portid] == NULL)
    224             rte_exit(EXIT_FAILURE, "Cannot allocate buffer for tx on port %u
    ",
    225                     portid);
    226 
    227         rte_eth_tx_buffer_init(tx_buffer[portid], MAX_PKT_BURST);
    228 
    229         ret = rte_eth_tx_buffer_set_err_callback(tx_buffer[portid],
    230                 rte_eth_tx_buffer_count_callback,
    231                 &port_statistics[portid].dropped);
    232         if (ret < 0)
    233             rte_exit(EXIT_FAILURE,
    234             "Cannot set error callback for tx buffer on port %u
    ",
    235                  portid);
    236 
    237         ret = rte_eth_dev_set_ptypes(portid, RTE_PTYPE_UNKNOWN, NULL,
    238                          0);
    239         if (ret < 0)
    240             printf("Port %u, Failed to disable Ptype parsing
    ",
    241                     portid);
    242         /* Start device */
    243         //启动端口
    244         ret = rte_eth_dev_start(portid);
    245         if (ret < 0)
    246             rte_exit(EXIT_FAILURE, "rte_eth_dev_start:err=%d, port=%u
    ",
    247                   ret, portid);
    248 
    249         printf("done: 
    ");
    250 
    251         ret = rte_eth_promiscuous_enable(portid);
    252         if (ret != 0)
    253             rte_exit(EXIT_FAILURE,
    254                  "rte_eth_promiscuous_enable:err=%s, port=%u
    ",
    255                  rte_strerror(-ret), portid);
    256 
    257         printf("Port %u, MAC address: %02X:%02X:%02X:%02X:%02X:%02X
    
    ",
    258                 portid,
    259                 l2fwd_ports_eth_addr[portid].addr_bytes[0],
    260                 l2fwd_ports_eth_addr[portid].addr_bytes[1],
    261                 l2fwd_ports_eth_addr[portid].addr_bytes[2],
    262                 l2fwd_ports_eth_addr[portid].addr_bytes[3],
    263                 l2fwd_ports_eth_addr[portid].addr_bytes[4],
    264                 l2fwd_ports_eth_addr[portid].addr_bytes[5]);
    265 
    266         /* initialize port stats */
    267         //初始化端口数据,就是后面要打印的,接收、发送、drop的包数
    268         memset(&port_statistics, 0, sizeof(port_statistics));
    269     }
    270 
    271     if (!nb_ports_available) {
    272         rte_exit(EXIT_FAILURE,
    273             "All available ports are disabled. Please set portmask.
    ");
    274     }
    275 
    276 
    277     //检查每个端口的连接状态
    278     check_all_ports_link_status(l2fwd_enabled_port_mask);
    279 
    280     ret = 0;
    281     /* launch per-lcore init on every lcore */
    282     //在每个逻辑内核上启动线程,开始转发,l2fwd_launch_one_lcore实际上运行的是l2fwd_main_loop
    283     rte_eal_mp_remote_launch(l2fwd_launch_one_lcore, NULL, CALL_MASTER);
    284     RTE_LCORE_FOREACH_SLAVE(lcore_id) {
    285         if (rte_eal_wait_lcore(lcore_id) < 0) {
    286             ret = -1;
    287             break;
    288         }
    289     }
    290 
    291     RTE_ETH_FOREACH_DEV(portid) {
    292         if ((l2fwd_enabled_port_mask & (1 << portid)) == 0)
    293             continue;
    294         printf("Closing port %d...", portid);
    295         rte_eth_dev_stop(portid);
    296         rte_eth_dev_close(portid);
    297         printf(" Done
    ");
    298     }
    299     printf("Bye...
    ");
    300 
    301     return ret;
    302 }

    程序的主要流程如下

     
     

    二. 二层转发和普通的端口转发区别:

    特点L2fwdbasicfwd
    端口数量 两者都用端口掩码来指定,L2fwd支持奇数个 只能是偶数个
    lcore数量 多个,每个lcore负责一个port 一个lcore,执行类似repeater的程序
    转发逻辑 转发时会改写MAC地址 只能是 0<-->1,2<-->3 这样的 pair 互相转发
    Tx_buffer 有发包缓存队列,收的包会缓存到发包队列里,一段时间后或者队列满后才会转发 没有发包缓存,Rx收到包后直接Tx出去

    三. 任务分发

    每个逻辑核在任务分发后会执行如下的循环,直到退出:

     40 /*
     41  * Check that every SLAVE lcores are in WAIT state, then call
     42  * rte_eal_remote_launch() for all of them. If call_master is true
     43  * (set to CALL_MASTER), also call the function on the master lcore.
     44  */
     45 int
     46 rte_eal_mp_remote_launch(int (*f)(void *), void *arg,
     47              enum rte_rmt_call_master_t call_master)
     48 {
     49     int lcore_id;
     50     int master = rte_get_master_lcore();
     51 
     52     /* check state of lcores */
     53     RTE_LCORE_FOREACH_SLAVE(lcore_id) {
     54         if (lcore_config[lcore_id].state != WAIT)
     55             return -EBUSY;
     56     }
     57 
     58     /* send messages to cores */
     59     RTE_LCORE_FOREACH_SLAVE(lcore_id) {
     60         rte_eal_remote_launch(f, arg, lcore_id);
     61     }
     62 
     63     if (call_master == CALL_MASTER) {
     64         lcore_config[master].ret = f(arg);
     65         lcore_config[master].state = FINISHED;
     66     }
     67 
     68     return 0;
     69 }

    rte_eal_mp_remote_launch(l2fwd_launch_one_lcore, NULL, CALL_MASTER)

    283 static int
    284 l2fwd_launch_one_lcore(__attribute__((unused)) void *dummy)
    285 {
    286     l2fwd_main_loop();
    287     return 0;
    288 }
      1 /* main processing loop */
      2 static void
      3 l2fwd_main_loop(void)
      4 {
      5     struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
      6     struct rte_mbuf *m;
      7     int sent;
      8     unsigned lcore_id;
      9     uint64_t prev_tsc, diff_tsc, cur_tsc, timer_tsc;
     10     unsigned i, j, portid, nb_rx;
     11     struct lcore_queue_conf *qconf;
     12     const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S *
     13             BURST_TX_DRAIN_US;
     14     struct rte_eth_dev_tx_buffer *buffer;
     15 
     16     prev_tsc = 0;
     17     timer_tsc = 0;
     18 
     19     //获取自己的lcore_id
     20     lcore_id = rte_lcore_id();
     21     qconf = &lcore_queue_conf[lcore_id];
     22 
     23     //分配后多余的lcore,无事可做,orz
     24     if (qconf->n_rx_port == 0) {
     25         RTE_LOG(INFO, L2FWD, "lcore %u has nothing to do
    ", lcore_id);
     26         return;
     27     }
     28 
     29     //有事做的核,很开心的进入了主循环~
     30     RTE_LOG(INFO, L2FWD, "entering main loop on lcore %u
    ", lcore_id);
     31 
     32     for (i = 0; i < qconf->n_rx_port; i++) {
     33 
     34         portid = qconf->rx_port_list[i];
     35         RTE_LOG(INFO, L2FWD, " -- lcoreid=%u portid=%u
    ", lcore_id,
     36             portid);
     37 
     38     }
     39 
     40     //直到发生了强制退出,在这里就是ctrl+c或者kill了这个进程
     41     while (!force_quit) {
     42 
     43         cur_tsc = rte_rdtsc();
     44 
     45         /*
     46          * TX burst queue drain
     47          */
     48         //计算时间片
     49         diff_tsc = cur_tsc - prev_tsc;
     50         //过了100us,把发送buffer里的报文发出去
     51         if (unlikely(diff_tsc > drain_tsc)) {
     52 
     53             for (i = 0; i < qconf->n_rx_port; i++) {
     54 
     55                 portid = l2fwd_dst_ports[qconf->rx_port_list[i]];
     56                 buffer = tx_buffer[portid];
     57 
     58                 sent = rte_eth_tx_buffer_flush(portid, 0, buffer);
     59                 if (sent)
     60                     port_statistics[portid].tx += sent;
     61 
     62             }
     63 
     64             /* if timer is enabled */
     65              //到了时间片了打印各端口的数据
     66             if (timer_period > 0) {
     67 
     68                 /* advance the timer */
     69                 timer_tsc += diff_tsc;
     70 
     71                 /* if timer has reached its timeout */
     72                 if (unlikely(timer_tsc >= timer_period)) {
     73 
     74                     /* do this only on master core */
     75                     if (lcore_id == rte_get_master_lcore()) {
     76                         //打印让master主线程来做
     77                         print_stats();
     78                         /* reset the timer */
     79                         timer_tsc = 0;
     80                     }
     81                 }
     82             }
     83 
     84             prev_tsc = cur_tsc;
     85         }
     86 
     87         /*
     88          * Read packet from RX queues
     89          */
     90         //没有到发送时间片的话,读接收队列里的报文
     91         for (i = 0; i < qconf->n_rx_port; i++) {
     92 
     93             portid = qconf->rx_port_list[i];
     94             nb_rx = rte_eth_rx_burst(portid, 0,
     95                          pkts_burst, MAX_PKT_BURST);
     96 
     97             //计数,收到的报文数
     98             port_statistics[portid].rx += nb_rx;
     99 
    100             for (j = 0; j < nb_rx; j++) {
    101                 m = pkts_burst[j];
    102                 rte_prefetch0(rte_pktmbuf_mtod(m, void *));
    103                 //updating mac地址以及目的端口发送buffer满了的话,尝试发送
    104                 l2fwd_simple_forward(m, portid);
    105             }
    106         }
    107     }
    108 }

     流程图:

     

     
     

    四. 测试实验

    ./l2fwd -l 0-3 -n 4 -- -p 0x3
    * 参数输入 ./l2fwd -c 0x3 -n 4 -- -p 3 -q 1
    * -c 为十六进制的分配的逻辑内核数量
    * -n 为十进制的内存通道数量,EAL参数和程序参数用--分开
    * -q 为分配给每个核心的收发队列数量(端口数量)
    * -p为十六进制的分配的端口数
    * -t 为可选默认10s打印时间间隔参数
  • 相关阅读:
    MATLAB 编程风格指南及注意事项
    Redis笔记
    HDU-5706
    【sqli-labs】 less4 GET
    【sqli-labs】 less3 GET
    【sqli-labs】 less2 GET
    【sqli-labs】 less1 GET
    Ubuntu14.04环境下java web运行环境搭建
    Android进度条控件ProgressBar使用
    Android中DatePicker与TimePicker用法讲解(包括DatePickerDialog与TimePickerDialog)
  • 原文地址:https://www.cnblogs.com/mysky007/p/12308305.html
Copyright © 2011-2022 走看看