zoukankan      html  css  js  c++  java
  • mbufs multi segments + offload

    DPDK技术栈在电信云中的最佳实践(三)

     https://kkutysllb.cn/2019/05/01/DPDK%E6%8A%80%E6%9C%AF%E6%A0%88%E5%9C%A8%E7%94%B5%E4%BF%A1%E4%BA%91%E4%B8%AD%E7%9A%84%E6%9C%80%E4%BD%B3%E5%AE%9E%E8%B7%B5%EF%BC%88%E4%B8%89%EF%BC%89/

    ethtool -k enp6s0
    Features for enp6s0:
    rx-checksumming: off [fixed]
    tx-checksumming: off
            tx-checksum-ipv4: off [fixed]
            tx-checksum-ip-generic: off [fixed]
            tx-checksum-ipv6: off [fixed]
            tx-checksum-fcoe-crc: off [fixed]
            tx-checksum-sctp: off [fixed]
    scatter-gather: on
            tx-scatter-gather: on
            tx-scatter-gather-fraglist: off [fixed]
    tcp-segmentation-offload: off
            tx-tcp-segmentation: off [fixed]
            tx-tcp-ecn-segmentation: off [fixed]
            tx-tcp-mangleid-segmentation: off [fixed]
            tx-tcp6-segmentation: off [fixed]
    udp-fragmentation-offload: off
    generic-segmentation-offload: on
    generic-receive-offload: on
    large-receive-offload: off [fixed]
    rx-vlan-offload: off [fixed]
    tx-vlan-offload: off [fixed]
    ntuple-filters: off [fixed]
    receive-hashing: off [fixed]
    highdma: on
    rx-vlan-filter: on [fixed]
    vlan-challenged: off [fixed]
    tx-lockless: off [fixed]
    netns-local: off [fixed]
    tx-gso-robust: off [fixed]
    tx-fcoe-segmentation: off [fixed]
    tx-gre-segmentation: off [fixed]
    tx-gre-csum-segmentation: off [fixed]
    tx-ipxip4-segmentation: off [fixed]
    tx-ipxip6-segmentation: off [fixed]
    tx-udp_tnl-segmentation: off [fixed]
    tx-udp_tnl-csum-segmentation: off [fixed]
    tx-gso-partial: off [fixed]
    tx-sctp-segmentation: off [fixed]
    tx-esp-segmentation: off [fixed]
    fcoe-mtu: off [fixed]
    tx-nocache-copy: off
    loopback: off [fixed]
    rx-fcs: off [fixed]
    rx-all: off [fixed]
    tx-vlan-stag-hw-insert: off [fixed]
    rx-vlan-stag-hw-parse: off [fixed]
    rx-vlan-stag-filter: off [fixed]
    l2-fwd-offload: off [fixed]
    hw-tc-offload: off [fixed]
    esp-hw-offload: off [fixed]
    esp-tx-csum-hw-offload: off [fixed]
    rx-udp_tunnel-port-offload: off [fixed]

    .txmode = {                                                                                                                                                                                                                                                                  
        .offloads = DEV_TX_OFFLOAD_MULTI_SEGS,    

    DMA的实现简述

    在实现DMA传输时,是由DMA控制器直接掌管总线,因此,存在着一个总线控制权转移问题。即DMA传输前,CPU要把总线控制权交给DMA控制器,而在结束DMA传输后,DMA控制器应立即把总线控制权再交回给CPU。一个完整的DMA传输过程必须经过DMA请求、DMA响应、DMA传输、DMA结束 4个步骤。

    scatter-gather DMA 与 block DMA
    传统的block DMA 一次只能传输物理上连续的一个块的数据, 完成传输后发起中断。而scatter-gather DMA允许一次传输多个物理上不连续的块,完成传输后只发起一次中断。 

    传统的block DMA像这样:

    先进的scatter-gather DMA像这样:

    这样做的好处是直观的,大大减少了中断的次数,提高了数据传输的效率。

    scatter-gather DMA的应用

    dpdk在ip分片的实现中,采用了一种称作零拷贝的技术。而这种实现方式的底层,正是由scatter-gather DMA支撑的。dpdk的分片包采用了链式管理,同一个数据包的数据,分散存储在不连续的块中(mbuf结构)。这就要求DMA一次操作,需要从不连续的多个块中搬移数据。附上e1000驱动发包部分代码:

    uint16_t
    eth_em_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
            uint16_t nb_pkts)
    {
        //e1000驱动部分代码
        ...
        m_seg = tx_pkt;
        do {
            txd = &txr[tx_id];
            txn = &sw_ring[txe->next_id];
     
            if (txe->mbuf != NULL)
                rte_pktmbuf_free_seg(txe->mbuf);
                txe->mbuf = m_seg;
     
            /*
            * Set up Transmit Data Descriptor.
            */
            slen = m_seg->data_len;
            buf_dma_addr = rte_mbuf_data_iova(m_seg);
     
            txd->buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
            txd->lower.data = rte_cpu_to_le_32(cmd_type_len | slen);
            txd->upper.data = rte_cpu_to_le_32(popts_spec);
     
            txe->last_id = tx_last;
            tx_id = txe->next_id;
            txe = txn;
            m_seg = m_seg->next;
        } while (m_seg != NULL);
     
        /*
        * The last packet data descriptor needs End Of Packet (EOP)
        */
        cmd_type_len |= E1000_TXD_CMD_EOP;
        txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
        txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
        ...
    }
    DEV_TX_OFFLOAD_IPV4_CKSUM;
      txq_conf.offloads |= DEV_TX_OFFLOAD_IPV4_CKSUM;
        ret = rte_eth_tx_queue_setup(portid, 0, nb_txd,
                                     rte_eth_dev_socket_id(portid),
                                     &txq_conf);
    Ethdev port_id=0 tx_queue_id=0, new added offloads 0x2 must be within per-queue offload capabilities 0x0 in rte_eth_tx_queue_setup()
    EAL: Error - exiting with code: 1
      Cause: rte_eth_tx_queue_setup:err=-22, port=0

     添加:

    local_port_conf.txmode.offloads |= DEV_TX_OFFLOAD_IPV4_CKSUM;
        ret = rte_eth_dev_configure(portid, 1, 1, &local_port_conf);
                            //ip_h->hdr_checksum = ipv4_hdr_cksum(ip_h);
                            ip_h->hdr_checksum = 0;
                            pkt->ol_flags |= PKT_TX_IP_CKSUM;
    Breakpoint 1, hinic_tx_offload_pkt_prepare (m=0x13e82a480, off_info=0xffffbd40cd28)
        at /data1/dpdk-19.11/drivers/net/hinic/hinic_pmd_tx.c:794
    794             u16 eth_type = 0;
    (gdb) bt
    #0  hinic_tx_offload_pkt_prepare (m=0x13e82a480, off_info=0xffffbd40cd28)
        at /data1/dpdk-19.11/drivers/net/hinic/hinic_pmd_tx.c:794
    #1  0x000000000078a5cc in hinic_get_sge_txoff_info (mbuf_pkt=0x13e82a480, sqe_info=0xffffbd40cd38, 
        off_info=0xffffbd40cd28) at /data1/dpdk-19.11/drivers/net/hinic/hinic_pmd_tx.c:991
    #2  0x000000000078a890 in hinic_xmit_pkts (tx_queue=0x13e7e7000, tx_pkts=0xffffbd40ce08, nb_pkts=1)
    (gdb) s
    796             uint64_t ol_flags = m->ol_flags;
    (gdb) list
    791             struct rte_udp_hdr *udp_hdr;
    792             struct rte_ether_hdr *eth_hdr;
    793             struct rte_vlan_hdr *vlan_hdr;
    794             u16 eth_type = 0;
    795             uint64_t inner_l3_offset;
    796             uint64_t ol_flags = m->ol_flags;
    797
    798             /* Check if the packets set available offload flags */
    799             if (!(ol_flags & HINIC_TX_CKSUM_OFFLOAD_MASK))
    800                     return 0;
    (gdb) n
    799             if (!(ol_flags & HINIC_TX_CKSUM_OFFLOAD_MASK))
    (gdb) n
    800                     return 0;
    (gdb) n
    978     }
    (gdb) n
    hinic_get_sge_txoff_info (mbuf_pkt=0x13e82a480, sqe_info=0xffffbd40cd38, off_info=0xffffbd40cd28)
        at /data1/dpdk-19.11/drivers/net/hinic/hinic_pmd_tx.c:992
    992             if (unlikely(ret))
    (gdb) n
    995             sqe_info->cpy_mbuf_cnt = 0;
    (gdb) n
    998             if (likely(!(mbuf_pkt->ol_flags & PKT_TX_TCP_SEG))) {
    (gdb) n
    999                     if (unlikely(mbuf_pkt->pkt_len > MAX_SINGLE_SGE_SIZE)) {
    (gdb) n
    1002                    } else if (unlikely(HINIC_NONTSO_SEG_NUM_INVALID(sge_cnt))) {
    (gdb) n
    1024                    sqe_info->sge_cnt = sge_cnt;
    (gdb) n
    1037            return true;
    (gdb) n
    1038    }
    (gdb) n
    hinic_xmit_pkts (tx_queue=0x13e7e7000, tx_pkts=0xffffbd40ce08, nb_pkts=1)
        at /data1/dpdk-19.11/drivers/net/hinic/hinic_pmd_tx.c:1093
    1093                    wqe_wqebb_cnt = HINIC_SQ_WQEBB_CNT(sqe_info.sge_cnt);
    (gdb) n
    1094                    free_wqebb_cnt = HINIC_GET_SQ_FREE_WQEBBS(txq);
    (gdb) n
    1095                    if (unlikely(wqe_wqebb_cnt > free_wqebb_cnt)) {
    (gdb) n
    1108                    sq_wqe = hinic_get_sq_wqe(txq, wqe_wqebb_cnt, &sqe_info);
    (gdb) n
    1111                    if (unlikely(!hinic_mbuf_dma_map_sge(txq, mbuf_pkt,
    (gdb) n
    1121                    task = &sq_wqe->task;
    (gdb) n
    1124                    hinic_fill_tx_offload_info(mbuf_pkt, task, &queue_info,
    (gdb) n
    1128                    tx_info = &txq->tx_info[sqe_info.pi];
    (gdb) n
    1129                    tx_info->mbuf = mbuf_pkt;
    (gdb) n
    1130                    tx_info->wqebb_cnt = wqe_wqebb_cnt;
    (gdb) n
    1133                    hinic_fill_sq_wqe_header(&sq_wqe->ctrl, queue_info,
    (gdb) n
    1134                                             sqe_info.sge_cnt, sqe_info.owner);
    (gdb) n
    1133                    hinic_fill_sq_wqe_header(&sq_wqe->ctrl, queue_info,
    (gdb) c
    Continuing.
    Breakpoint 1, hinic_tx_offload_pkt_prepare (m=0x13e82ac00, off_info=0xffffc357c558)
        at /data1/dpdk-19.11/drivers/net/hinic/hinic_pmd_tx.c:794
    794             u16 eth_type = 0;
    (gdb) s
    796             uint64_t ol_flags = m->ol_flags;
    (gdb) n
    799             if (!(ol_flags & HINIC_TX_CKSUM_OFFLOAD_MASK))
    (gdb) n
    803             if ((ol_flags & PKT_TX_TUNNEL_MASK) &&
    (gdb) n
    812             if (ol_flags & PKT_TX_TUNNEL_VXLAN) {
    (gdb) n
    847                     inner_l3_offset = m->l2_len;
    (gdb) n
    848                     off_info->inner_l2_len = m->l2_len;
    (gdb) n
    849                     off_info->inner_l3_len = m->l3_len;
    (gdb) n
    850                     off_info->inner_l4_len = m->l4_len;
    (gdb) n
    851                     off_info->tunnel_type = NOT_TUNNEL;
    (gdb) n
    853                     hinic_get_pld_offset(m, off_info,
    (gdb) n
    858             if (unlikely(off_info->payload_offset > MAX_PLD_OFFSET))
    (gdb) n
    862             if ((ol_flags & PKT_TX_TUNNEL_VXLAN) && ((ol_flags & PKT_TX_TCP_SEG) ||
    (gdb) n
    901             } else if (ol_flags & PKT_TX_OUTER_IPV4) {
    (gdb) n
    907             if (ol_flags & PKT_TX_IPV4)
    (gdb) n
    908                     off_info->inner_l3_type = (ol_flags & PKT_TX_IP_CKSUM) ?
    (gdb) n
    915             if ((ol_flags & PKT_TX_L4_MASK) == PKT_TX_UDP_CKSUM) {
    (gdb) n
    942             } else if (((ol_flags & PKT_TX_L4_MASK) == PKT_TX_TCP_CKSUM) ||
    (gdb) n
    943                             (ol_flags & PKT_TX_TCP_SEG)) {
    (gdb) n
    942             } else if (((ol_flags & PKT_TX_L4_MASK) == PKT_TX_TCP_CKSUM) ||
    (gdb) n
    971             } else if ((ol_flags & PKT_TX_L4_MASK) == PKT_TX_SCTP_CKSUM) {
    (gdb) n
    977             return 0;
    (gdb) n
    978     }
    (gdb) n
    hinic_get_sge_txoff_info (mbuf_pkt=0x13e82ac00, sqe_info=0xffffc357c568, off_info=0xffffc357c558)
        at /data1/dpdk-19.11/drivers/net/hinic/hinic_pmd_tx.c:992
    992             if (unlikely(ret))
    (gdb) 

     

  • 相关阅读:
    js 生成32位UUID方法
    win10把控制声音改成和win7一样
    jQuery.inArray()方法
    在eclipse中安装activiti插件
    关于NOIP运输计划一题几种思路和若干种做法的研究
    该博客停止更新
    [CTSC2010]产品销售
    roi 学习轨迹
    「PA 2019」Szprotki i szczupaki
    LOJ576签到游戏
  • 原文地址:https://www.cnblogs.com/dream397/p/13677950.html
Copyright © 2011-2022 走看看