• linux内存管理之DMA


    说起DMA我们并不陌生,但是实际编程中去用的人不多吧,最多就是网卡驱动里的环形buffer,再有就是设备的dma,下面我们就分析分析.
       DMA用来在设备内存和内存之间直接数据交互。而无需cpu干预
      
    内核为了方便驱动的开发,已经提供了几个dma 函数接口。
    dma跟硬件架构相关,所以linux关于硬件部分已经给屏蔽了,有兴趣的可以深入跟踪学习.

    按照linux内核对dma层的架构设计,各平台dma缓冲区映射之间的差异由内核定义的一个dma操作集

    include/linux/dma-mapping.h:点击(此处)折叠或打开

    1. struct dma_map_ops {
    2.     void* (*alloc)(struct device *dev, size_t size,
    3.                 dma_addr_t *dma_handle, gfp_t gfp,
    4.                 struct dma_attrs *attrs);
    5.     void (*free)(struct device *dev, size_t size,
    6.              void *vaddr, dma_addr_t dma_handle,
    7.              struct dma_attrs *attrs);
    8.     int (*mmap)(struct device *, struct vm_area_struct *,
    9.              void *, dma_addr_t, size_t, struct dma_attrs *attrs);
    10.     int (*get_sgtable)(struct device *dev, struct sg_table *sgt, void *,
    11.              dma_addr_t, size_t, struct dma_attrs *attrs);
    12.     dma_addr_t (*map_page)(struct device *dev, struct page *page,
    13.              unsigned long offset, size_t size,
    14.              enum dma_data_direction dir,
    15.              struct dma_attrs *attrs);
    16.     void (*unmap_page)(struct device *dev, dma_addr_t dma_handle,
    17.              size_t size, enum dma_data_direction dir,
    18.              struct dma_attrs *attrs);
    19.     int (*map_sg)(struct device *dev, struct scatterlist *sg,
    20.          int nents, enum dma_data_direction dir,
    21.          struct dma_attrs *attrs);
    22.     void (*unmap_sg)(struct device *dev,
    23.              struct scatterlist *sg, int nents,
    24.              enum dma_data_direction dir,
    25.              struct dma_attrs *attrs);
    26.     void (*sync_single_for_cpu)(struct device *dev,
    27.                  dma_addr_t dma_handle, size_t size,
    28.                  enum dma_data_direction dir);
    29.     void (*sync_single_for_device)(struct device *dev,
    30.                  dma_addr_t dma_handle, size_t size,
    31.                  enum dma_data_direction dir);
    32.     void (*sync_sg_for_cpu)(struct device *dev,
    33.                 struct scatterlist *sg, int nents,
    34.                 enum dma_data_direction dir);
    35.     void (*sync_sg_for_device)(struct device *dev,
    36.                  struct scatterlist *sg, int nents,
    37.                  enum dma_data_direction dir);
    38.     int (*mapping_error)(struct device *dev, dma_addr_t dma_addr);
    39.     int (*dma_supported)(struct device *dev, u64 mask);
    40.     int (*set_dma_mask)(struct device *dev, u64 mask);
    41. #ifdef ARCH_HAS_DMA_GET_REQUIRED_MASK
    42.     u64 (*get_required_mask)(struct device *dev);
    43. #endif
    44.     int is_phys;
    45. }

    来统一屏蔽实现的差异. 
    不同差异主要来来自cache的问题

    Cache与dma同步问题,这里不深入讨论.

    另外一个常用的函数是Dma_set_mask,  为了通知内核设备能够寻址的范围,很多时候设备能够寻址的范围有限。

    Dma映射可以分为三类:

    1.       一致性dma映射 dma_alloc_coherent (问题:驱动使用的buffer不是自身申请的,而是其他模块)

    当驱动模块主动分配一个Dma缓冲区并且dma生存期和模块一样时

    参数说明:

    (1)这个函数的返回值是缓冲的一个内核虚拟地址, 它可被驱动使用

    (2)第三个参数dma_handle:

    其间相关的物理地址在 dma_handle 中返回

    2.       流式dma映射  dma_map_single 
    通常用于把内核一段buffer映射,返回物理地址.

    如果驱动模块需要使用从别的模块传进来的虚拟地址空间作为dma缓冲区,保证地址的线性  cache一致性

    一致性api接口:sync_single_for_cpu

    3.分散/聚集映射(scatter/gather map)  Dma_map_sgs


    有时候我们还需要

    1. 回弹缓冲区 bounce  buffer:当cpu侧物理地址不适合设备的dma操作的时候

    2.

    DmA内存池:一般dma映射都是单个page的整数倍,如果驱动程序需要更小的一致性映射的dma缓冲区,可以使用。类似于slab机制,

    Dma_pool_create

    下面我们就那网卡驱动的例子说说dma的具体应用,参考linux kernel e1000网卡
    drivers/net/ethernet/intel/e1000/*

    Ring buffer

    Dma不能为高端内存,一般为32,默认低端内存,由于设备能够访问的地址范围有限。

    设备使用物理地址,而代码使用虚拟地址。

    就看看如何发送数据包:e1000_main.c:
    e1000_xmit_frame: 关于帧的发送流程这里不多说.

    点击(此处)折叠或打开

    1. static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb,
    2.                  struct net_device *netdev)
    3. {
    4.     struct e1000_adapter *adapter = netdev_priv(netdev);
    5.     struct e1000_hw *hw = &adapter->hw;
    6.     struct e1000_tx_ring *tx_ring;
    7.     unsigned int first, max_per_txd = E1000_MAX_DATA_PER_TXD;
    8.     unsigned int max_txd_pwr = E1000_MAX_TXD_PWR;
    9.     unsigned int tx_flags = 0;
    10.     unsigned int len = skb_headlen(skb);
    11.     unsigned int nr_frags;
    12.     unsigned int mss;
    13.     int count = 0;
    14.     int tso;
    15.     unsigned int f;
    16.     /* This goes back to the question of how to logically map a tx queue
    17.      * to a flow. Right now, performance is impacted slightly negatively
    18.      * if using multiple tx queues. If the stack breaks away from a
    19.      * single qdisc implementation, we can look at this again. */
    20.     tx_ring = adapter->tx_ring;
    21.     if (unlikely(skb->len <= 0)) {
    22.         dev_kfree_skb_any(skb);
    23.         return NETDEV_TX_OK;
    24.     }
    25.     /* On PCI/PCI-X HW, if packet size is less than ETH_ZLEN,
    26.      * packets may get corrupted during padding by HW.
    27.      * To WA this issue, pad all small packets manually.
    28.      */
    29.     if (skb->len < ETH_ZLEN) {
    30.         if (skb_pad(skb, ETH_ZLEN - skb->len))
    31.             return NETDEV_TX_OK;
    32.         skb->len = ETH_ZLEN;
    33.         skb_set_tail_pointer(skb, ETH_ZLEN);
    34.     }
    35.     mss = skb_shinfo(skb)->gso_size;
    36.     /* The controller does a simple calculation to
    37.      * make sure there is enough room in the FIFO before
    38.      * initiating the DMA for each buffer. The calc is:
    39.      * 4 = ceil(buffer len/mss). To make sure we don't
    40.      * overrun the FIFO, adjust the max buffer len if mss
    41.      * drops. */
    42.     if (mss) {
    43.         u8 hdr_len;
    44.         max_per_txd = min(mss << 2, max_per_txd);
    45.         max_txd_pwr = fls(max_per_txd) - 1;
    46.         hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
    47.         if (skb->data_len && hdr_len == len) {
    48.             switch (hw->mac_type) {
    49.                 unsigned int pull_size;
    50.             case e1000_82544:
    51.                 /* Make sure we have room to chop off 4 bytes,
    52.                  * and that the end alignment will work out to
    53.                  * this hardware's requirements
    54.                  * NOTE: this is a TSO only workaround
    55.                  * if end byte alignment not correct move us
    56.                  * into the next dword */
    57.                 if ((unsigned long)(skb_tail_pointer(skb) - 1) & 4)
    58.                     break;
    59.                 /* fall through */
    60.                 pull_size = min((unsigned int)4, skb->data_len);
    61.                 if (!__pskb_pull_tail(skb, pull_size)) {
    62.                     e_err(drv, "__pskb_pull_tail "
    63.                      "failed. ");
    64.                     dev_kfree_skb_any(skb);
    65.                     return NETDEV_TX_OK;
    66.                 }
    67.                 len = skb_headlen(skb);
    68.                 break;
    69.             default:
    70.                 /* do nothing */
    71.                 break;
    72.             }
    73.         }
    74.     }
    75.     /* reserve a descriptor for the offload context */
    76.     if ((mss) || (skb->ip_summed == CHECKSUM_PARTIAL))
    77.         count++;
    78.     count++;
    79.     /* Controller Erratum workaround */
    80.     if (!skb->data_len && tx_ring->last_tx_tso && !skb_is_gso(skb))
    81.         count++;
    82.     count += TXD_USE_COUNT(len, max_txd_pwr);
    83.     if (adapter->pcix_82544)
    84.         count++;
    85.     /* work-around for errata 10 and it applies to all controllers
    86.      * in PCI-X mode, so add one more descriptor to the count
    87.      */
    88.     if (unlikely((hw->bus_type == e1000_bus_type_pcix) &&
    89.             (len > 2015)))
    90.         count++;
    91.     nr_frags = skb_shinfo(skb)->nr_frags;
    92.     for (f = 0; f < nr_frags; f++)
    93.         count += TXD_USE_COUNT(skb_frag_size(&skb_shinfo(skb)->frags[f]),
    94.                  max_txd_pwr);
    95.     if (adapter->pcix_82544)
    96.         count += nr_frags;
    97.     /* need: count + 2 desc gap to keep tail from touching
    98.      * head, otherwise try next time */
    99.     if (unlikely(e1000_maybe_stop_tx(netdev, tx_ring, count + 2)))
    100.         return NETDEV_TX_BUSY;
    101.     if (unlikely((hw->mac_type == e1000_82547) &&
    102.          (e1000_82547_fifo_workaround(adapter, skb)))) {
    103.         netif_stop_queue(netdev);
    104.         if (!test_bit(__E1000_DOWN, &adapter->flags))
    105.             schedule_delayed_work(&adapter->fifo_stall_task, 1);
    106.         return NETDEV_TX_BUSY;
    107.     }
    108.     if (vlan_tx_tag_present(skb)) {
    109.         tx_flags |= E1000_TX_FLAGS_VLAN;
    110.         tx_flags |= (vlan_tx_tag_get(skb) << E1000_TX_FLAGS_VLAN_SHIFT);
    111.     }
    112.     first = tx_ring->next_to_use;
    113.     tso = e1000_tso(adapter, tx_ring, skb);
    114.     if (tso < 0) {
    115.         dev_kfree_skb_any(skb);
    116.         return NETDEV_TX_OK;
    117.     }
    118.     if (likely(tso)) {
    119.         if (likely(hw->mac_type != e1000_82544))
    120.             tx_ring->last_tx_tso = true;
    121.         tx_flags |= E1000_TX_FLAGS_TSO;
    122.     } else if (likely(e1000_tx_csum(adapter, tx_ring, skb)))
    123.         tx_flags |= E1000_TX_FLAGS_CSUM;
    124.     if (likely(skb->protocol == htons(ETH_P_IP)))
    125.         tx_flags |= E1000_TX_FLAGS_IPV4;
    126.     if (unlikely(skb->no_fcs))
    127.         tx_flags |= E1000_TX_FLAGS_NO_FCS;
    128.     count = e1000_tx_map(adapter, tx_ring, skb, first, max_per_txd,
    129.      nr_frags, mss);
    130.     if (count) {
    131.         netdev_sent_queue(netdev, skb->len);
    132.         skb_tx_timestamp(skb);
    133.         e1000_tx_queue(adapter, tx_ring, tx_flags, count);
    134.         /* Make sure there is space in the ring for the next send. */
    135.         e1000_maybe_stop_tx(netdev, tx_ring, MAX_SKB_FRAGS + 2);
    136.     } else {
    137.         dev_kfree_skb_any(skb);
    138.         tx_ring->buffer_info[first].time_stamp = 0;
    139.         tx_ring->next_to_use = first;
    140.     }
    141.     return NETDEV_TX_OK;
    142. }

    经过上次,邻居子系统后,数据帧已经到达驱动,数据放在skb指定的内存里. 
    看代码
    tx_ring = adapter->tx_ring;  //  获取发送的ring buffer
    接着我们看关键代码:
    count = e1000_tx_map(adapter, tx_ring, skb, first, max_per_txd,    nr_frags, mss);
    它做了什么呢?

    点击(此处)折叠或打开

    1. static int e1000_tx_map(struct e1000_adapter *adapter,
    2.             struct e1000_tx_ring *tx_ring,
    3.             struct sk_buff *skb, unsigned int first,
    4.             unsigned int max_per_txd, unsigned int nr_frags,
    5.             unsigned int mss)
    6. {
    7.     struct e1000_hw *hw = &adapter->hw;
    8.     struct pci_dev *pdev = adapter->pdev;
    9.     struct e1000_buffer *buffer_info;
    10.     unsigned int len = skb_headlen(skb);
    11.     unsigned int offset = 0, size, count = 0, i;
    12.     unsigned int f, bytecount, segs;
    13.     i = tx_ring->next_to_use;
    14.     while (len) {
    15.         buffer_info = &tx_ring->buffer_info[i];
    16.         size = min(len, max_per_txd);
    17.         /* Workaround for Controller erratum --
    18.          * descriptor for non-tso packet in a linear SKB that follows a
    19.          * tso gets written back prematurely before the data is fully
    20.          * DMA'd to the controller */
    21.         if (!skb->data_len && tx_ring->last_tx_tso &&
    22.          !skb_is_gso(skb)) {
    23.             tx_ring->last_tx_tso = false;
    24.             size -= 4;
    25.         }
    26.         /* Workaround for premature desc write-backs
    27.          * in TSO mode. Append 4-byte sentinel desc */
    28.         if (unlikely(mss && !nr_frags && size == len && size > 8))
    29.             size -= 4;
    30.         /* work-around for errata 10 and it applies
    31.          * to all controllers in PCI-X mode
    32.          * The fix is to make sure that the first descriptor of a
    33.          * packet is smaller than 2048 - 16 - 16 (or 2016) bytes
    34.          */
    35.         if (unlikely((hw->bus_type == e1000_bus_type_pcix) &&
    36.          (size > 2015) && count == 0))
    37.          size = 2015;
    38.         /* Workaround for potential 82544 hang in PCI-X. Avoid
    39.          * terminating buffers within evenly-aligned dwords. */
    40.         if (unlikely(adapter->pcix_82544 &&
    41.          !((unsigned long)(skb->data + offset + size - 1) & 4) &&
    42.          size > 4))
    43.             size -= 4;
    44.         buffer_info->length = size;
    45.         /* set time_stamp *before* dma to help avoid a possible race */
    46.         buffer_info->time_stamp = jiffies;
    47.         buffer_info->mapped_as_page = false;
    48.         buffer_info->dma = dma_map_single(&pdev->dev,
    49.                          skb->data + offset,
    50.                          size,    DMA_TO_DEVICE);
    51.         if (dma_mapping_error(&pdev->dev, buffer_info->dma))
    52.             goto dma_error;
    53.         buffer_info->next_to_watch = i;
    54.         len -= size;
    55.         offset += size;
    56.         count++;
    57.         if (len) {
    58.             i++;
    59.             if (unlikely(i == tx_ring->count))
    60.                 i = 0;
    61.         }
    62.     }
    63.     for (f = 0; f < nr_frags; f++) {
    64.         const struct skb_frag_struct *frag;
    65.         frag = &skb_shinfo(skb)->frags[f];
    66.         len = skb_frag_size(frag);
    67.         offset = 0;
    68.         while (len) {
    69.             unsigned long bufend;
    70.             i++;
    71.             if (unlikely(i == tx_ring->count))
    72.                 i = 0;
    73.             buffer_info = &tx_ring->buffer_info[i];
    74.             size = min(len, max_per_txd);
    75.             /* Workaround for premature desc write-backs
    76.              * in TSO mode. Append 4-byte sentinel desc */
    77.             if (unlikely(mss && f == (nr_frags-1) && size == len && size > 8))
    78.                 size -= 4;
    79.             /* Workaround for potential 82544 hang in PCI-X.
    80.              * Avoid terminating buffers within evenly-aligned
    81.              * dwords. */
    82.             bufend = (unsigned long)
    83.                 page_to_phys(skb_frag_page(frag));
    84.             bufend += offset + size - 1;
    85.             if (unlikely(adapter->pcix_82544 &&
    86.                  !(bufend & 4) &&
    87.                  size > 4))
    88.                 size -= 4;
    89.             buffer_info->length = size;
    90.             buffer_info->time_stamp = jiffies;
    91.             buffer_info->mapped_as_page = true;
    92.             buffer_info->dma = skb_frag_dma_map(&pdev->dev, frag,
    93.                         offset, size, DMA_TO_DEVICE);
    94.             if (dma_mapping_error(&pdev->dev, buffer_info->dma))
    95.                 goto dma_error;
    96.             buffer_info->next_to_watch = i;
    97.             len -= size;
    98.             offset += size;
    99.             count++;
    100.         }
    101.     }
    102.     segs = skb_shinfo(skb)->gso_segs ?: 1;
    103.     /* multiply data chunks by size of headers */
    104.     bytecount = ((segs - 1) * skb_headlen(skb)) + skb->len;
    105.     tx_ring->buffer_info[i].skb = skb;
    106.     tx_ring->buffer_info[i].segs = segs;
    107.     tx_ring->buffer_info[i].bytecount = bytecount;
    108.     tx_ring->buffer_info[first].next_to_watch = i;
    109.     return count;
    110. dma_error:
    111.     dev_err(&pdev->dev, "TX DMA map failed ");
    112.     buffer_info->dma = 0;
    113.     if (count)
    114.         count--;
    115.     while (count--) {
    116.         if (i==0)
    117.             i += tx_ring->count;
    118.         i--;
    119.         buffer_info = &tx_ring->buffer_info[i];
    120.         e1000_unmap_and_free_tx_resource(adapter, buffer_info);
    121.     }
    122.     return 0;
    123. }

    默认数据报文没有分片或者碎片什么的。
    那么进入第一个while(len)
    获取buffer_info = &tx_ring->buffer_info[i];
    然后:调用dma_map_single进行流式映射. 即把skb->data(虚拟地址) 和buffer_info->dma(物理地址)对应起来.操作两个地址等于操作同一片区域。

    点击(此处)折叠或打开

    1. buffer_info->length = size;
    2.         /* set time_stamp *before* dma to help avoid a possible race */
    3.         buffer_info->time_stamp = jiffies;
    4.         buffer_info->mapped_as_page = false;
    5.         buffer_info->dma = dma_map_single(&pdev->dev,
    6.                          skb->data + offset,
    7.                          size,    DMA_TO_DEVICE);

    回到主发送函数:

    点击(此处)折叠或打开

    1. if (count) {
    2.         netdev_sent_queue(netdev, skb->len);
    3.         skb_tx_timestamp(skb);
    4.         e1000_tx_queue(adapter, tx_ring, tx_flags, count);
    5.         /* Make sure there is space in the ring for the next send. */
    6.         e1000_maybe_stop_tx(netdev, tx_ring, MAX_SKB_FRAGS + 2);
    7.     }

    调用e1000_tx_queue把数据发送出去:

    点击(此处)折叠或打开

    1. static void e1000_tx_queue(struct e1000_adapter *adapter,
    2.              struct e1000_tx_ring *tx_ring, int tx_flags,
    3.              int count)
    4. {
    5.     struct e1000_hw *hw = &adapter->hw;
    6.     struct e1000_tx_desc *tx_desc = NULL;
    7.     struct e1000_buffer *buffer_info;
    8.     u32 txd_upper = 0, txd_lower = E1000_TXD_CMD_IFCS;
    9.     unsigned int i;
    10.     
    11.     ...
    12.     i = tx_ring->next_to_use;
    13.     while (count--) {
    14.         buffer_info = &tx_ring->buffer_info[i];
    15.         tx_desc = E1000_TX_DESC(*tx_ring, i);
    16.         tx_desc->buffer_addr = cpu_to_le64(buffer_info->dma);
    17.         tx_desc->lower.data =
    18.             cpu_to_le32(txd_lower | buffer_info->length);
    19.         tx_desc->upper.data = cpu_to_le32(txd_upper);
    20.         if (unlikely(++i == tx_ring->count)) i = 0;
    21.     }
    22.     tx_desc->lower.data |= cpu_to_le32(adapter->txd_cmd);
    23.     /* txd_cmd re-enables FCS, so we'll re-disable it here as desired. */
    24.     if (unlikely(tx_flags & E1000_TX_FLAGS_NO_FCS))
    25.         tx_desc->lower.data &= ~(cpu_to_le32(E1000_TXD_CMD_IFCS));
    26.     /* Force memory writes to complete before letting h/w
    27.      * know there are new descriptors to fetch. (Only
    28.      * applicable for weak-ordered memory model archs,
    29.      * such as IA-64). */
    30.     wmb();
    31.     tx_ring->next_to_use = i;
    32.     writel(i, hw->hw_addr + tx_ring->tdt);
    33.     /* we need this if more than one processor can write to our tail
    34.      * at a time, it syncronizes IO on IA64/Altix systems */
    35.     mmiowb();
    36. }

    我们看到它把刚才dma_map_singe里的映射赋值了:
    tx_desc->buffer_addr = cpu_to_le64(buffer_info->dma);
    说明发送的时候是根据发送描述符来发送的。
    然后操作寄存器:
    writel(i, hw->hw_addr + tx_ring->tdt);
    那么网卡就会自动读取tx desc 然后把数据发送出去。
    总结下流程:
    1. linux os会调用网卡的start_xmit()函数。在e1000里,对应的函数是 e1000_xmit_frame,
    2.   e1000_xmit_frame又会调用e1000_tx_queue(adapter, tx_ring, tx_flags, count)。
    这里的tx_queue指的是发送Descriptor的queue。
    3. e1000_tx_queue 在检查了一些参数后,最终调用 writel(i, hw->hw_addr + tx_ring->tdt)。
    这里的tx_ring->tdt中的tdt全写为 tx_descriptor_tail。从网卡的开发手册中可以查到,如果写了descriptor tail,那么网卡就会自动读取 descriptor,然后把包发送出去。
    descroptor的主要内容是addr pointer和length。前者是要发送的包的起始物理地址。后者是包的长度。有了这些,硬件就可以通过dma来读取包并发出去了。其他网卡也基本会用descriptor的结构。

    虽然流程明白了,但是还有几个点,
    1. tx_ring在哪初始化?
    2. 网卡到底是如何操作映射的dma地址的,把数据发送出去的?

    tx ring 在e1000_open 的时候:
    调用:

    点击(此处)折叠或打开

    1. /**
    2.  * e1000_setup_all_tx_resources - wrapper to allocate Tx resources
    3.  *                  (Descriptors) for all queues
    4.  * @adapter: board private structure
    5.  *
    6.  * Return 0 on success, negative on failure
    7.  **/
    8. int e1000_setup_all_tx_resources(struct e1000_adapter *adapter)
    9. {
    10.     int i, err = 0;
    11.     for (i = 0; i < adapter->num_tx_queues; i++) {
    12.         err = e1000_setup_tx_resources(adapter, &adapter->tx_ring[i]);
    13.         if (err) {
    14.             e_err(probe, "Allocation for Tx Queue %u failed ", i);
    15.             for (i-- ; i >= 0; i--)
    16.                 e1000_free_tx_resources(adapter,
    17.                             &adapter->tx_ring[i]);
    18.             break;
    19.         }
    20.     }
    21.     return err;
    22. }

    点击(此处)折叠或打开

    1. /**
    2.  * e1000_setup_tx_resources - allocate Tx resources (Descriptors)
    3.  * @adapter: board private structure
    4.  * @txdr: tx descriptor ring (for a specific queue) to setup
    5.  *
    6.  * Return 0 on success, negative on failure
    7.  **/
    8. static int e1000_setup_tx_resources(struct e1000_adapter *adapter,
    9.                  struct e1000_tx_ring *txdr)
    10. {
    11.     struct pci_dev *pdev = adapter->pdev;
    12.     int size;
    13.     size = sizeof(struct e1000_buffer) * txdr->count;
    14.     txdr->buffer_info = vzalloc(size);
    15.     if (!txdr->buffer_info) {
    16.         e_err(probe, "Unable to allocate memory for the Tx descriptor "
    17.          "ring ");
    18.         return -ENOMEM;
    19.     }
    20.     /* round up to nearest 4K */
    21.     txdr->size = txdr->count * sizeof(struct e1000_tx_desc);
    22.     txdr->size = ALIGN(txdr->size, 4096);
    23.     txdr->desc = dma_alloc_coherent(&pdev->dev, txdr->size, &txdr->dma,
    24.                     GFP_KERNEL);
    25.     if (!txdr->desc) {
    26. setup_tx_desc_die:
    27.         vfree(txdr->buffer_info);
    28.         e_err(probe, "Unable to allocate memory for the Tx descriptor "
    29.          "ring ");
    30.         return -ENOMEM;
    31.     }
    32.     /* Fix for errata 23, can't cross 64kB boundary */
    33.     if (!e1000_check_64k_bound(adapter, txdr->desc, txdr->size)) {
    34.         void *olddesc = txdr->desc;
    35.         dma_addr_t olddma = txdr->dma;
    36.         e_err(tx_err, "txdr align check failed: %u bytes at %p ",
    37.          txdr->size, txdr->desc);
    38.         /* Try again, without freeing the previous */
    39.         txdr->desc = dma_alloc_coherent(&pdev->dev, txdr->size,
    40.                         &txdr->dma, GFP_KERNEL);
    41.         /* Failed allocation, critical failure */
    42.         if (!txdr->desc) {
    43.             dma_free_coherent(&pdev->dev, txdr->size, olddesc,
    44.                      olddma);
    45.             goto setup_tx_desc_die;
    46.         }
    47.         if (!e1000_check_64k_bound(adapter, txdr->desc, txdr->size)) {
    48.             /* give up */
    49.             dma_free_coherent(&pdev->dev, txdr->size, txdr->desc,
    50.                      txdr->dma);
    51.             dma_free_coherent(&pdev->dev, txdr->size, olddesc,
    52.                      olddma);
    53.             e_err(probe, "Unable to allocate aligned memory "
    54.              "for the transmit descriptor ring ");
    55.             vfree(txdr->buffer_info);
    56.             return -ENOMEM;
    57.         } else {
    58.             /* Free old allocation, new allocation was successful */
    59.             dma_free_coherent(&pdev->dev, txdr->size, olddesc,
    60.                      olddma);
    61.         }
    62.     }
    63.     memset(txdr->desc, 0, txdr->size);
    64.     txdr->next_to_use = 0;
    65.     txdr->next_to_clean = 0;
    66.     return 0;
    67. }

    我们看:它建立了一致性dma映射.

    1.         txdr->desc = dma_alloc_coherent(&pdev->dev, txdr->size,
    2.                         &txdr->dma, GFP_KERNEL);

    desc是结构指针:它的结构跟网卡寄存器结构有关,e1000_hw.h

    点击(此处)折叠或打开

    1. /* Transmit Descriptor */
    2. struct e1000_tx_desc {
    3.     __le64 buffer_addr;    /* Address of the descriptor's data buffer */
    4.     union {
    5.         __le32 data;
    6.         struct {
    7.             __le16 length;    /* Data buffer length */
    8.             u8 cso;    /* Checksum offset */
    9.             u8 cmd;    /* Descriptor control */
    10.         } flags;
    11.     } lower;
    12.     union {
    13.         __le32 data;
    14.         struct {
    15.             u8 status;    /* Descriptor status */
    16.             u8 css;    /* Checksum start */
    17.             __le16 special;
    18.         } fields;
    19.     } upper;
    20. }


    我们稍微屡一下,
    1. skb->data  --- ring->buffer_info->dma
    2.ring->dma  ---  ring->desc
    3. ring->desc->buffer_addr ---ring->buffer_info->dma
    那么网卡又是如何和dma地址关联的呢?

    点击(此处)折叠或打开

    1. /**
    2.  * e1000_configure_tx - Configure 8254x Transmit Unit after Reset
    3.  * @adapter: board private structure
    4.  *
    5.  * Configure the Tx unit of the MAC after a reset.
    6.  **/
    7. static void e1000_configure_tx(struct e1000_adapter *adapter)
    8. {
    9.     u64 tdba;
    10.     struct e1000_hw *hw = &adapter->hw;
    11.     u32 tdlen, tctl, tipg;
    12.     u32 ipgr1, ipgr2;
    13.     /* Setup the HW Tx Head and Tail descriptor pointers */
    14.     switch (adapter->num_tx_queues) {
    15.     case 1:
    16.     default:
    17.         tdba = adapter->tx_ring[0].dma;
    18.         tdlen = adapter->tx_ring[0].count *
    19.             sizeof(struct e1000_tx_desc);
    20.         ew32(TDLEN, tdlen);
    21.         ew32(TDBAH, (tdba >> 32));
    22.         ew32(TDBAL, (tdba & 0x00000000ffffffffULL));
    23.         ew32(TDT, 0);
    24.         ew32(TDH, 0);
    25.         adapter->tx_ring[0].tdh = ((hw->mac_type >= e1000_82543) ? E1000_TDH : E1000_82542_TDH);
    26.         adapter->tx_ring[0].tdt = ((hw->mac_type >= e1000_82543) ? E1000_TDT : E1000_82542_TDT);
    27.         break;
    28.     }

    很明显它把dma地址写入了网卡dma寄存器。所以dma还需要网卡硬件的支持才行.

    当然e1000这个网卡驱动还是相当的复杂,不过它把一致性映射和流式映射都用上了。

  • 相关阅读:
    火炬之光模型导出(Unity载入火炬之光的模型)
    树的左旋与右旋
    javaEE开发之导出excel工具类
    STL algorithm算法is_permutation(27)
    学做衣服论坛 -服装DIY教程,缤纷服装网,裁剪教程,家用缝纫机,买布料
    傲娇_百度百科
    《失败不是成功之母》阅读理解
    失败是不是成功之母
    正则表达式多语种的web版本
    date tod = boost::gregorian::day_clock::local_day(); //当前日期
  • 原文地址:https://www.cnblogs.com/ilinuxer/p/4559174.html
走看看 - 开发者的网上家园