zoukankan      html  css  js  c++  java
  • dpdk rte_memzone_reserve

    [root@localhost dpdk-19.11]# cat   /sys/devices/system/node/node*/hugepages/hugepages-2048kB/nr_hugepages 
    0
    0
    0
    0
    [root@localhost dpdk-19.11]# cat   /sys/devices/system/node/node*/hugepages/hugepages-524288kB/nr_hugepages 
    64
    64
    64
    64
    [root@localhost dpdk-19.11]# 
    Breakpoint 1, main (argc=4, argv=0xfffffffff518) at /data1/dpdk-19.11/demo/memzone/main.c:45
    45                   mz = rte_memzone_reserve("memzone", sizeof(int)*2,
    (gdb) s
    rte_memzone_reserve (name=0xba6358 "memzone", len=8, socket_id=-1, flags=0)
        at /data1/dpdk-19.11/lib/librte_eal/common/eal_common_memzone.c:240
    240             return rte_memzone_reserve_thread_safe(name, len, socket_id,
    (gdb) s
    rte_memzone_reserve_thread_safe (name=0xba6358 "memzone", len=8, socket_id=-1, flags=0, align=128, bound=0)
        at /data1/dpdk-19.11/lib/librte_eal/common/eal_common_memzone.c:192
    192             const struct rte_memzone *mz = NULL;
    (gdb) list
    187     static const struct rte_memzone *
    188     rte_memzone_reserve_thread_safe(const char *name, size_t len, int socket_id,
    189                     unsigned int flags, unsigned int align, unsigned int bound)
    190     {
    191             struct rte_mem_config *mcfg;
    192             const struct rte_memzone *mz = NULL;
    193
    194             /* get pointer to global configuration */
    195             mcfg = rte_eal_get_configuration()->mem_config;
    196
    (gdb) n
    195             mcfg = rte_eal_get_configuration()->mem_config;
    (gdb) p *mcfg
    Cannot access memory at address 0xfffffffff380
    (gdb) n
    197             rte_rwlock_write_lock(&mcfg->mlock);
    (gdb) p *mcfg
    $1 = {magic = 19820526, version = 319488099, nchannel = 0, nrank = 0, mlock = {cnt = 0}, qlock = {cnt = 0}, 
      mplock = {cnt = 0}, tlock = {locked = 0}, memory_hotplug_lock = {cnt = 0}, memzones = {
        name = "memzone", '00' <repeats 56 times>, count = 143, len = 2560, elt_sz = 72, data = 0x100010000, 
        rwlock = {cnt = 0}}, memsegs = {{{base_va = 0x120000000, addr_64 = 4831838208}, page_sz = 536870912, 
          socket_id = 0, version = 1, len = 34359738368, external = 0, heap = 1, memseg_arr = {
            name = "memseg-524288k-0-0", '00' <repeats 45 times>, count = 1, len = 64, elt_sz = 48, 
            data = 0x100040000, rwlock = {cnt = 0}}}, {{base_va = 0x940000000, addr_64 = 39728447488}, 
          page_sz = 536870912, socket_id = 0, version = 0, len = 34359738368, external = 0, heap = 1, memseg_arr = {
            name = "memseg-524288k-0-1", '00' <repeats 45 times>, count = 0, len = 64, elt_sz = 48, 
            data = 0x920000000, rwlock = {cnt = 0}}}, {{base_va = 0x1160000000, addr_64 = 74625056768}, 
          page_sz = 536870912, socket_id = 0, version = 0, len = 34359738368, external = 0, heap = 1, memseg_arr = {
            name = "memseg-524288k-0-2", '00' <repeats 45 times>, count = 0, len = 64, elt_sz = 48, 
            data = 0x1140000000, rwlock = {cnt = 0}}}, {{base_va = 0x1980000000, addr_64 = 109521666048}, 
          page_sz = 536870912, socket_id = 0, version = 0, len = 34359738368, external = 0, heap = 1, memseg_arr = {
            name = "memseg-524288k-0-3", '00' <repeats 45 times>, count = 0, len = 64, elt_sz = 48, 
            data = 0x1960000000, rwlock = {cnt = 0}}}, {{base_va = 0x21a0000000, addr_64 = 144418275328}, 
          page_sz = 536870912, socket_id = 1, version = 0, len = 34359738368, external = 0, heap = 1, memseg_arr = {
            name = "memseg-524288k-1-0", '00' <repeats 45 times>, count = 0, len = 64, elt_sz = 48, 
            data = 0x2180000000, rwlock = {cnt = 0}}}, {{base_va = 0x29c0000000, addr_64 = 179314884608}, 
          page_sz = 536870912, socket_id = 1, version = 0, len = 34359738368, external = 0, heap = 1, memseg_arr = {
            name = "memseg-524288k-1-1", '00' <repeats 45 times>, count = 0, len = 64, elt_sz = 48, 
            data = 0x29a0000000, rwlock = {cnt = 0}}}, {{base_va = 0x31e0000000, addr_64 = 214211493888}, 
          page_sz = 536870912, socket_id = 1, version = 0, len = 34359738368, external = 0, heap = 1, memseg_arr = {
            name = "memseg-524288k-1-2", '00' <repeats 45 times>, count = 0, len = 64, elt_sz = 48, 
            data = 0x31c0000000, rwlock = {cnt = 0}}}, {{base_va = 0x3a00000000, addr_64 = 249108103168}, 
          page_sz = 536870912, socket_id = 1, version = 0, len = 34359738368, external = 0, heap = 1, memseg_arr = {
            name = "memseg-524288k-1-3", '00' <repeats 45 times>, count = 0, len = 64, elt_sz = 48, 
            data = 0x39e0000000, rwlock = {cnt = 0}}}, {{base_va = 0x4220000000, addr_64 = 284004712448}, 
          page_sz = 536870912, socket_id = 2, version = 0, len = 34359738368, external = 0, heap = 1, memseg_arr = {
            name = "memseg-524288k-2-0", '00' <repeats 45 times>, count = 0, len = 64, elt_sz = 48, 
            data = 0x4200000000, rwlock = {cnt = 0}}}, {{base_va = 0x4a40000000, addr_64 = 318901321728}, 
    ---Type <return> to continue, or q <return> to quit---
          page_sz = 536870912, socket_id = 2, version = 0, len = 34359738368, external = 0, heap = 1, memseg_arr = {
            name = "memseg-524288k-2-1", '00' <repeats 45 times>, count = 0, len = 64, elt_sz = 48, 
            data = 0x4a20000000, rwlock = {cnt = 0}}}, {{base_va = 0x5260000000, addr_64 = 353797931008}, 
          page_sz = 536870912, socket_id = 2, version = 0, len = 34359738368, external = 0, heap = 1, memseg_arr = {
            name = "memseg-524288k-2-2", '00' <repeats 45 times>, count = 0, len = 64, elt_sz = 48, 
            data = 0x5240000000, rwlock = {cnt = 0}}}, {{base_va = 0x5a80000000, addr_64 = 388694540288}, 
          page_sz = 536870912, socket_id = 2, version = 0, len = 34359738368, external = 0, heap = 1, memseg_arr = {
            name = "memseg-524288k-2-3", '00' <repeats 45 times>, count = 0, len = 64, elt_sz = 48, 
            data = 0x5a60000000, rwlock = {cnt = 0}}}, {{base_va = 0x62a0000000, addr_64 = 423591149568}, 
          page_sz = 536870912, socket_id = 3, version = 0, len = 34359738368, external = 0, heap = 1, memseg_arr = {
            name = "memseg-524288k-3-0", '00' <repeats 45 times>, count = 0, len = 64, elt_sz = 48, 
            data = 0x6280000000, rwlock = {cnt = 0}}}, {{base_va = 0x6ac0000000, addr_64 = 458487758848}, 
          page_sz = 536870912, socket_id = 3, version = 0, len = 34359738368, external = 0, heap = 1, memseg_arr = {
            name = "memseg-524288k-3-1", '00' <repeats 45 times>, count = 0, len = 64, elt_sz = 48, 
            data = 0x6aa0000000, rwlock = {cnt = 0}}}, {{base_va = 0x72e0000000, addr_64 = 493384368128}, 
          page_sz = 536870912, socket_id = 3, version = 0, len = 34359738368, external = 0, heap = 1, memseg_arr = {
            name = "memseg-524288k-3-2", '00' <repeats 45 times>, count = 0, len = 64, elt_sz = 48, 
            data = 0x72c0000000, rwlock = {cnt = 0}}}, {{base_va = 0x7b00000000, addr_64 = 528280977408}, 
          page_sz = 536870912, socket_id = 3, version = 0, len = 34359738368, external = 0, heap = 1, memseg_arr = {
            name = "memseg-524288k-3-3", '00' <repeats 45 times>, count = 0, len = 64, elt_sz = 48, 
            data = 0x7ae0000000, rwlock = {cnt = 0}}}, {{base_va = 0x0, addr_64 = 0}, page_sz = 0, socket_id = 0, 
          version = 0, len = 0, external = 0, heap = 0, memseg_arr = {name = '00' <repeats 63 times>, count = 0, 
            len = 0, elt_sz = 0, data = 0x0, rwlock = {cnt = 0}}} <repeats 48 times>}, tailq_head = {{tailq_head = {
            tqh_first = 0x0, tqh_last = 0x100002288}, name = "RTE_LPM", '00' <repeats 24 times>}, {tailq_head = {
            tqh_first = 0x0, tqh_last = 0x1000022b8}, name = "RTE_LPM6", '00' <repeats 23 times>}, {tailq_head = {
            tqh_first = 0x0, tqh_last = 0x1000022e8}, name = "RTE_ACL", '00' <repeats 24 times>}, {tailq_head = {
            tqh_first = 0x13ff79c00, tqh_last = 0x13ff79c00}, name = "RTE_HASH", '00' <repeats 23 times>}, {
          tailq_head = {tqh_first = 0x0, tqh_last = 0x100002348}, name = "RTE_FBK_HASH", '00' <repeats 19 times>}, 
        {tailq_head = {tqh_first = 0x0, tqh_last = 0x100002378}, name = "RTE_MEMBER", '00' <repeats 21 times>}, {
          tailq_head = {tqh_first = 0x0, tqh_last = 0x1000023a8}, 
          name = "RTE_MBUF_DYNFIELD", '00' <repeats 14 times>}, {tailq_head = {tqh_first = 0x0, 
    ---Type <return> to continue, or q <return> to quit---
            tqh_last = 0x1000023d8}, name = "RTE_MBUF_DYNFLAG", '00' <repeats 15 times>}, {tailq_head = {
            tqh_first = 0x0, tqh_last = 0x100002408}, name = "RTE_EVENT_RING", '00' <repeats 17 times>}, {
          tailq_head = {tqh_first = 0x13febd800, tqh_last = 0x13febd800}, 
          name = "RTE_MEMPOOL", '00' <repeats 20 times>}, {tailq_head = {tqh_first = 0x0, tqh_last = 0x100002468}, 
          name = "RTE_STACK", '00' <repeats 22 times>}, {tailq_head = {tqh_first = 0x13ff9a080, 
            tqh_last = 0x13febd500}, name = "RTE_RING", '00' <repeats 23 times>}, {tailq_head = {tqh_first = 0x0, 
            tqh_last = 0x1000024c8}, name = "RTE_REORDER", '00' <repeats 20 times>}, {tailq_head = {
            tqh_first = 0x0, tqh_last = 0x1000024f8}, name = "RTE_KNI", '00' <repeats 24 times>}, {tailq_head = {
            tqh_first = 0x13ffd4d80, tqh_last = 0x13ffd4d80}, 
          name = "VFIO_RESOURCE_LIST", '00' <repeats 13 times>}, {tailq_head = {tqh_first = 0x0, 
            tqh_last = 0x100002558}, name = "UIO_RESOURCE_LIST", '00' <repeats 14 times>}, {tailq_head = {
            tqh_first = 0x0, tqh_last = 0x100002588}, name = "VMBUS_RESOURCE_LIST", '00' <repeats 12 times>}, {
          tailq_head = {tqh_first = 0x0, tqh_last = 0x0}, name = '00' <repeats 31 times>} <repeats 15 times>}, 
      malloc_heaps = {{lock = {locked = 0}, free_head = {{lh_first = 0x0}, {lh_first = 0x0}, {lh_first = 0x0}, {
              lh_first = 0x0}, {lh_first = 0x0}, {lh_first = 0x13fe81000}, {lh_first = 0x0}, {lh_first = 0x0}, {
              lh_first = 0x0}, {lh_first = 0x0}, {lh_first = 0x0}, {lh_first = 0x120000000}, {lh_first = 0x0}}, 
          first = 0x120000000, last = 0x13fffdf80, alloc_count = 259, socket_id = 0, total_size = 536870912, 
          name = "socket_0", '00' <repeats 23 times>}, {lock = {locked = 0}, free_head = {{
              lh_first = 0x0} <repeats 13 times>}, first = 0x0, last = 0x0, alloc_count = 0, socket_id = 1, 
          total_size = 0, name = "socket_1", '00' <repeats 23 times>}, {lock = {locked = 0}, free_head = {{
              lh_first = 0x0} <repeats 13 times>}, first = 0x0, last = 0x0, alloc_count = 0, socket_id = 2, 
          total_size = 0, name = "socket_2", '00' <repeats 23 times>}, {lock = {locked = 0}, free_head = {{
              lh_first = 0x0} <repeats 13 times>}, first = 0x0, last = 0x0, alloc_count = 0, socket_id = 3, 
          total_size = 0, name = "socket_3", '00' <repeats 23 times>}, {lock = {locked = 0}, free_head = {{
              lh_first = 0x0} <repeats 13 times>}, first = 0x0, last = 0x0, alloc_count = 0, socket_id = 0, 
          total_size = 0, name = '00' <repeats 31 times>} <repeats 28 times>}, next_socket_id = 256, 
      mem_cfg_addr = 4294967296, legacy_mem = 0, single_file_segments = 0, tsc_hz = 100000000, 
      dma_maskbits = 0 '00'}
    (gdb) 
    (gdb) n
    199             mz = memzone_reserve_aligned_thread_unsafe(
    (gdb) s
    memzone_reserve_aligned_thread_unsafe (name=0xba6358 "memzone", len=8, socket_id=-1, flags=0, align=128, bound=0)
        at /data1/dpdk-19.11/lib/librte_eal/common/eal_common_memzone.c:69
    69              mcfg = rte_eal_get_configuration()->mem_config;
    (gdb) n
    70              arr = &mcfg->memzones;
    (gdb) n
    73              if (arr->count >= arr->len) {
    (gdb) p *arr
    $2 = {name = "memzone", '00' <repeats 56 times>, count = 143, len = 2560, elt_sz = 72, data = 0x100010000, 
      rwlock = {cnt = 0}}
    (gdb) n
    79              if (strlen(name) > sizeof(mz->name) - 1) {
    (gdb) n
    87              if ((memzone_lookup_thread_unsafe(name)) != NULL) {
    (gdb) n
    95              if (align && !rte_is_power_of_2(align)) {
    (gdb) n
    103             if (align < RTE_CACHE_LINE_SIZE)
    (gdb) n
    107             if (len > SIZE_MAX - RTE_CACHE_LINE_MASK) {
    (gdb) n
    112             len = RTE_ALIGN_CEIL(len, RTE_CACHE_LINE_SIZE);
    (gdb) n
    115             requested_len = RTE_MAX((size_t)RTE_CACHE_LINE_SIZE,  len);
    (gdb) n
    118             if (bound != 0 && (requested_len > bound || !rte_is_power_of_2(bound))) {
    (gdb) n
    123             if ((socket_id != SOCKET_ID_ANY) && socket_id < 0) {
    (gdb) n
    131             if (!rte_eal_has_hugepages() && socket_id < RTE_MAX_NUMA_NODES)
    (gdb) n
    134             contig = (flags & RTE_MEMZONE_IOVA_CONTIG) != 0;
    (gdb) n
    136             flags &= ~RTE_MEMZONE_IOVA_CONTIG;
    (gdb) n
    138             if (len == 0 && bound == 0) {
    (gdb) n
    144                     if (len == 0)
    (gdb) n
    147                     mz_addr = malloc_heap_alloc(NULL, requested_len, socket_id,            ---------------------mz_addr从heap分配
    (gdb) n
    150             if (mz_addr == NULL) {
    (gdb) n
    155             struct malloc_elem *elem = malloc_elem_from_data(mz_addr);
    (gdb) n
    158             mz_idx = rte_fbarray_find_next_free(arr, 0);
    (gdb) n
    160             if (mz_idx < 0) {
    (gdb) n
    163                     rte_fbarray_set_used(arr, mz_idx);
    (gdb) n
    164                     mz = rte_fbarray_get(arr, mz_idx);
    (gdb) p *mz
    $3 = {name = "356o.01c00v230000000000000000377377377377", '00' <repeats 11 times>, 
      {phys_addr = 0, iova = 0}, {addr = 0x656e6f7a6d656d, addr_64 = 28550397722191213}, len = 0, hugepage_sz = 0, 
      socket_id = 0, flags = 0}
    (gdb) n
    167             if (mz == NULL) {
    (gdb) n
    174             strlcpy(mz->name, name, sizeof(mz->name));
    (gdb) n
    175             mz->iova = rte_malloc_virt2iova(mz_addr);
    (gdb) n
    176             mz->addr = mz_addr;
    (gdb) n
    178                             elem->size - elem->pad - MALLOC_ELEM_OVERHEAD :
    (gdb) n
    177             mz->len = requested_len == 0 ?
    (gdb) n
    180             mz->hugepage_sz = elem->msl->page_sz;
    (gdb) n
    181             mz->socket_id = elem->msl->socket_id;
    (gdb) p *elem
    $4 = {heap = 0x100002900, prev = 0x13fe81000, next = 0x13febc800, free_list = {le_next = 0x0, le_prev = 0x0}, 
      msl = 0x100000088, state = ELEM_BUSY, pad = 0, size = 256, orig_elem = 0x120000000, orig_size = 536870912}
    (gdb) n
    182             mz->flags = 0;
    (gdb) n
    184             return mz;
    (gdb) p *mz
    $5 = {name = "memzone", '00' <repeats 24 times>, {phys_addr = 261454808960, iova = 261454808960}, {
        addr = 0x13febc780, addr_64 = 5367383936}, len = 128, hugepage_sz = 536870912, socket_id = 0, flags = 0}
    (gdb) c
    static const struct rte_memzone *
    memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
            int socket_id, unsigned flags, unsigned align, unsigned bound)
    {
        struct rte_memzone *mz;
        struct rte_mem_config *mcfg;
        size_t requested_len;
        int socket, i;
    
        /* 获取全局变量rte_mem_config结构的指针 */
        mcfg = rte_eal_get_configuration()->mem_config;
    
        /* no more room in config */
        /*如果分配的memzone数量已经超过了最大值,则返错(数组大小是有限的)*/
        if (mcfg->memzone_cnt >= RTE_MAX_MEMZONE) {
            RTE_LOG(ERR, EAL, "%s(): No more room in config
    ", __func__);
            rte_errno = ENOSPC;
            return NULL;
        }
        /*检查memzone的名字长度是否超过了限制*/
        if (strlen(name) > sizeof(mz->name) - 1) {
            RTE_LOG(DEBUG, EAL, "%s(): memzone <%s>: name too long
    ",
                __func__, name);
            rte_errno = ENAMETOOLONG;
            return NULL;
        }
    
        /* 在mcfg->memzone[]中查找是否已有同名的memzone,如果有表示已存在,返回创建出错*/
        if ((memzone_lookup_thread_unsafe(name)) != NULL) {
            RTE_LOG(DEBUG, EAL, "%s(): memzone <%s> already exists
    ",
                __func__, name);
            rte_errno = EEXIST;
            return NULL;
        }
    
        /* 检查对齐内存大小是否是2的幂大小 */
        if (align && !rte_is_power_of_2(align)) {
            RTE_LOG(ERR, EAL, "%s(): Invalid alignment: %u
    ", __func__,
                    align);
            rte_errno = EINVAL;
            return NULL;
        }
    
        /* alignment less than cache size is not allowed */
        if (align < RTE_CACHE_LINE_SIZE)/*对齐大小不能小于cache_line大小*/
            align = RTE_CACHE_LINE_SIZE;
    
        /* align length on cache boundary. Check for overflow before doing so */
        if (len > SIZE_MAX - RTE_CACHE_LINE_MASK) {
            rte_errno = EINVAL; /* requested size too big */
            return NULL;
        }
    
        len += RTE_CACHE_LINE_MASK;
        len &= ~((size_t) RTE_CACHE_LINE_MASK); /*申请内存大小进行内存对齐计算*/
    
        /* save minimal requested length */
        /*当申请的内存大小小于RTE_CACHE_LINE_SIZE时,则至少要分配RTE_CACHE_LINE_SIZE大小的内存*/
        requested_len = RTE_MAX((size_t)RTE_CACHE_LINE_SIZE, len);
    
        /* check that boundary condition is valid */
        if (bound != 0 && (requested_len > bound || !rte_is_power_of_2(bound))) {
            rte_errno = EINVAL;
            return NULL;
        }
        /*检查socket_id的合法性*/
        if ((socket_id != SOCKET_ID_ANY) && (socket_id >= RTE_MAX_NUMA_NODES)) {
            rte_errno = EINVAL;
            return NULL;
        }
        /*如果不使用hugepage,memzone的内存分配就不会考虑socke_id,而直接设置为SOCKET_ID_ANY*/
        if (!rte_eal_has_hugepages())
            socket_id = SOCKET_ID_ANY;
    
        if (len == 0) { /*申请内存大小等于0的情况,则申请申请最大的连续内存空间*/
            if (bound != 0)
                requested_len = bound;
            else {
                requested_len = find_heap_max_free_elem(&socket_id, align);
                if (requested_len == 0) {
                    rte_errno = ENOMEM;
                    return NULL;
                }
            }
        }
        /*如果socket_id为SOCKET_ID_ANY,则先在当前cpu所在的socket上分配内存*/
        if (socket_id == SOCKET_ID_ANY)
            socket = malloc_get_numa_socket();
        else
            socket = socket_id;
    
        /* 尝试在当前socket对应的malloc_heap上分配内存 */
        void *mz_addr = malloc_heap_alloc(&mcfg->malloc_heaps[socket], NULL,
                requested_len, flags, align, bound);
        /*如果socket_id为SOCKET_ID_ANY,且在当前socket上分配失败,就尝试在其他cpu分配*/
        if ((mz_addr == NULL) && (socket_id == SOCKET_ID_ANY)) {
            /* try other heaps */
            for (i = 0; i < RTE_MAX_NUMA_NODES; i++) {
                if (socket == i)
                    continue;
    
                mz_addr = malloc_heap_alloc(&mcfg->malloc_heaps[i],
                        NULL, requested_len, flags, align, bound);
                if (mz_addr != NULL)
                    break;
            }
        }
    
        if (mz_addr == NULL) {
            rte_errno = ENOMEM;
            return NULL;
        }
        /*获取对应内存的malloc_elem结构*/
        const struct malloc_elem *elem = malloc_elem_from_data(mz_addr);
    
        /* 从mcfg->memzone[]中找到一个还为使用的memzone结构 */
        mz = get_next_free_memzone();
    
        if (mz == NULL) {
            RTE_LOG(ERR, EAL, "%s(): Cannot find free memzone but there is room "
                    "in config!
    ", __func__);
            rte_errno = ENOSPC;
            return NULL;
        }
        /*增加mcfg的memzone计数*/
        mcfg->memzone_cnt++;
        snprintf(mz->name, sizeof(mz->name), "%s", name);
        mz->phys_addr = rte_malloc_virt2phy(mz_addr);
        mz->addr = mz_addr;
        mz->len = (requested_len == 0 ? elem->size : requested_len);
        mz->hugepage_sz = elem->ms->hugepage_sz;/*memzone对应的socketid和hupagesize即为对应malloc_elem的值*/
        mz->socket_id = elem->ms->socket_id;
        mz->flags = 0;
        mz->memseg_id = elem->ms - rte_eal_get_configuration()->mem_config->memseg;
    
        return mz;
    }
        mz = get_next_free_memzone();

    19.11版本

    mz = rte_fbarray_get(arr, mz_idx);
    if (len == 0 && bound == 0) {
                    /* no size constraints were placed, so use malloc elem len */
                    requested_len = 0;
                    mz_addr = malloc_heap_alloc_biggest(NULL, socket_id, flags,
                                    align, contig);
            } else {
                    if (len == 0)
                            requested_len = bound;
                    /* allocate memory on heap */
                    mz_addr = malloc_heap_alloc(NULL, requested_len, socket_id,
                                    flags, align, bound, contig);
            }
            if (mz_addr == NULL) {
                    rte_errno = ENOMEM;
                    return NULL;
            }
    struct malloc_elem *elem = malloc_elem_from_data(mz_addr);
    
            /* fill the zone in config */
            mz_idx = rte_fbarray_find_next_free(arr, 0);
    
            if (mz_idx < 0) {
                    mz = NULL;
            } else {
                    rte_fbarray_set_used(arr, mz_idx);
                    mz = rte_fbarray_get(arr, mz_idx);
            }
    
            if (mz == NULL) {
                    RTE_LOG(ERR, EAL, "%s(): Cannot find free memzone
    ", __func__);
                    malloc_heap_free(elem);
                    rte_errno = ENOSPC;
                    return NULL;
            }
    
            strlcpy(mz->name, name, sizeof(mz->name));
            mz->iova = rte_malloc_virt2iova(mz_addr);
            mz->addr = mz_addr;
            mz->len = requested_len == 0 ?
                            elem->size - elem->pad - MALLOC_ELEM_OVERHEAD :
                            requested_len;
            mz->hugepage_sz = elem->msl->page_sz;
            mz->socket_id = elem->msl->socket_id;
            mz->flags = 0;
    
            return mz;

     

    看一下memzone的结构体, 包含了zone的name、起始IO addr、virt addr、长度、对应的大页大小等。

    /**
     * A structure describing a memzone, which is a contiguous portion of
     * physical memory identified by a name.
     */
    struct rte_memzone {
    
    #define RTE_MEMZONE_NAMESIZE 32       /**< Maximum length of memory zone name.*/
    	char name[RTE_MEMZONE_NAMESIZE];  /**< Name of the memory zone. */
    
    	RTE_STD_C11
    	union {
    		phys_addr_t phys_addr;        /**< deprecated - Start physical address. */
    		rte_iova_t iova;              /**< Start IO address. */
    	};
    	RTE_STD_C11
    	union {
    		void *addr;                   /**< Start virtual address. */
    		uint64_t addr_64;             /**< Makes sure addr is always 64-bits */
    	};
    	size_t len;                       /**< Length of the memzone. */
    
    	uint64_t hugepage_sz;             /**< The page size of underlying memory */
    
    	int32_t socket_id;                /**< NUMA socket ID. */
    
    	uint32_t flags;                   /**< Characteristics of this memzone. */
    	uint32_t memseg_id;               /**< Memseg it belongs. */
    } __attribute__((__packed__));

    接下来,我们从rte_memzone_reserve()开始看起,用户程序会调用该函数申请memzone,此时不会指定align和bound,DPDK为提高内存读写效率,到处运用了内存对齐技术,但是暴露给客户的时候不会像他底层的实现那样需要到处留意,从这段就可以大概看到DPDK的封装确实很好,只暴露有必要暴露的。

    const struct rte_memzone *
    rte_memzone_reserve(const char *name, size_t len, int socket_id,
    		    unsigned flags)
    {
    	return rte_memzone_reserve_thread_safe(name, len, socket_id,
    					       flags, RTE_CACHE_LINE_SIZE, 0);
    }

    这里继续封装一层,上了一把锁,因此 memzone_reserve_aligned_thread_unsafe这个函数的实现将不会再考虑线程安全的问题了。

    static const struct rte_memzone *
    rte_memzone_reserve_thread_safe(const char *name, size_t len,
    				int socket_id, unsigned flags, unsigned align,
    				unsigned bound)
    {
    	rte_rwlock_write_lock(&mcfg->mlock);
    	mz = memzone_reserve_aligned_thread_unsafe(
    		name, len, socket_id, flags, align, bound);
    	rte_rwlock_write_unlock(&mcfg->mlock);
    	return mz;
    }

    继续分析 memzone_reserve_aligned_thread_unsafe()。首先检查memzone数量,这个最大值是用户编译DPDK前通过配置文件指定的,因此这里也可以看到,并不是DPDK绑定的所有大页内存都拿来做memzone了,还有其他的内存模块会使用到。

    	/* no more room in config */
    	if (mcfg->memzone_cnt >= RTE_MAX_MEMZONE) {
    		RTE_LOG(ERR, EAL, "%s(): No more room in config
    ", __func__);
    		rte_errno = ENOSPC;
    		return NULL;
    	}

    检查用户申请的name是否已经存在。这个函数里面的实现很简单,在memzone数组中一个一个memzone地找过去,一个一个比较这个name是否已经存在。这里就可以看到memzone的申请确实效率很低,不适合大数量多次数地申请,只适合对申请效率要求不高的程序,或者预先规划好在程序初始化过程中一次性把需要的memzone全部申请完。

    	/* zone already exist */
    	if ((memzone_lookup_thread_unsafe(name)) != NULL) {
    		RTE_LOG(DEBUG, EAL, "%s(): memzone <%s> already exists
    ",
    			__func__, name);
    		rte_errno = EEXIST;
    		return NULL;
    	}

    如果用户不指定要求alloc的memzone的内存长度,DPDK会在所有heap中找个最大的memsegelem给用户。find_heap_max_free_elem()这个函数效率更低,要每一个heap的每一个queue的每一个elem地遍历过去,全部遍历完了之后才能知道空闲的哪个elem才是长度最大的。

    			requested_len = find_heap_max_free_elem(&socket_id, align);
    			if (requested_len == 0) {
    				rte_errno = ENOMEM;
    				return NULL;
    			}

    如果用户指定了len,就以用户指定为准,如果没指定(即len=0),就以找到的最大长度来申请elem。

    	/* allocate memory on heap */
    	void *mz_addr = malloc_heap_alloc(&mcfg->malloc_heaps[socket], NULL,
    			requested_len, flags, align, bound);

    如果用户没有指定socket id的话,就到其他的heap中去申请一下内存,但这样存在一个问题,会出现跨socket访问内存的问题,这个对效率影响非常大,程序性能甚至会降到30%左右,直接打了3折。

    	if ((mz_addr == NULL) && (socket_id == SOCKET_ID_ANY)) {
    		/* try other heaps */
    		for (i = 0; i < RTE_MAX_NUMA_NODES; i++) {
    			if (socket == i)
    				continue;
    			mz_addr = malloc_heap_alloc(&mcfg->malloc_heaps[i],
    					NULL, requested_len, flags, align, bound);
    			if (mz_addr != NULL)
    				break;
    		}
    	}

    最后根据alloc到的elem和相关信息填写一下新的memzone,返回给用户

    	struct malloc_elem *elem = malloc_elem_from_data(mz_addr);
    
    	/* fill the zone in config */
    	mz = get_next_free_memzone();
    	mcfg->memzone_cnt++;
    	snprintf(mz->name, sizeof(mz->name), "%s", name);
    	mz->iova = rte_malloc_virt2iova(mz_addr);
    	mz->addr = mz_addr;
    	mz->len = (requested_len == 0 ? elem->size : requested_len);
    	mz->hugepage_sz = elem->ms->hugepage_sz;
    	mz->socket_id = elem->ms->socket_id;
    	mz->flags = 0;
    	mz->memseg_id = elem->ms - rte_eal_get_configuration()->mem_config->memseg;

    接下来看看memzone的释放流程。memset清空掉内存块后,最后调用rte_free。我们再下一篇文章再来分析这个rte_free的实现。

    int
    rte_memzone_free(const struct rte_memzone *mz)
    {
    	rte_rwlock_write_lock(&mcfg->mlock);
    
    	idx = ((uintptr_t)mz - (uintptr_t)mcfg->memzone);
    	idx = idx / sizeof(struct rte_memzone);
    
    	addr = mcfg->memzone[idx].addr;
    	if (addr == NULL)
    		ret = -EINVAL;
    	else if (mcfg->memzone_cnt == 0) {
    		rte_panic("%s(): memzone address not NULL but memzone_cnt is 0!
    ",
    				__func__);
    	} else {
    		memset(&mcfg->memzone[idx], 0, sizeof(mcfg->memzone[idx]));
    		mcfg->memzone_cnt--;
    	}
    
    	rte_rwlock_write_unlock(&mcfg->mlock);
    
    	rte_free(addr);
    
    	return ret;
    }
    
  • 相关阅读:
    springmvc log4j 配置
    intellij idea maven springmvc 环境搭建
    spring,property not found on type
    intellij idea maven 工程生成可执行的jar
    device eth0 does not seem to be present, delaying initialization
    macos ssh host配置及免密登陆
    centos7 搭建 docker 环境
    通过rest接口获取自增id (twitter snowflake算法)
    微信小程序开发体验
    gitbook 制作 beego 参考手册
  • 原文地址:https://www.cnblogs.com/dream397/p/13601025.html
Copyright © 2011-2022 走看看