dpdk 使用 Linux 提供的获取大页、页表、numa 节点表、mmap 功能重新组织内存。
- 大页: 最多可以同时存在 3 种大页。一般只是用 2M 的大页。linux 中获取大页相关信息是通过访问解析
/sys/kernel/mm/hugepages
、/proc/meminfo
、/proc/mounts
等内容得到的。 - 页表: linux 中每个进程的页表对应的文件是
/proc/pid/pagemap
,本进程的页表是/proc/self/pagemap
。 - numa 节点表: linux 中每个物理页所在的 numa socket 表对应的文件是
/proc/pid/numa_maps
,本进程的 numa socket 表是/proc/self/numa_maps
。 - 大页内存:Hugepages详解
[root@localhost simple_mp]# ./build/simple_mp -l 126-127 --proc-type=primary EAL: Detected 128 lcore(s) EAL: Detected 4 NUMA nodes EAL: Multi-process socket /var/run/dpdk/rte/mp_socket EAL: Selected IOVA mode 'PA' EAL: Probing VFIO support... EAL: VFIO support initialized EAL: PCI device 0000:05:00.0 on NUMA socket 0 EAL: probe driver: 19e5:200 net_hinic
[root@localhost simple_mp]# ps -elf | grep simple 0 R root 15154 124128 96 80 0 - 8389397 - 05:27 pts/1 00:00:09 ./build/simple_mp -l 126-127 --proc-type=primary 0 S root 15168 7504 0 80 0 - 1729 pipe_w 05:27 pts/2 00:00:00 grep --color=auto simple [root@localhost simple_mp]# cat /proc/15154/numa_maps 00400000 default file=/data1/dpdk-19.11/examples/multi_process/simple_mp/build/simple_mp mapped=44 N1=44 kernelpagesize_kB=64 00ad0000 default file=/data1/dpdk-19.11/examples/multi_process/simple_mp/build/simple_mp anon=1 dirty=1 mapped=2 N0=1 N1=1 kernelpagesize_kB=64 00af0000 default file=/data1/dpdk-19.11/examples/multi_process/simple_mp/build/simple_mp anon=4 dirty=4 N3=4 kernelpagesize_kB=64 00b30000 default anon=21 dirty=21 N0=15 N3=6 kernelpagesize_kB=64 0ead0000 default heap anon=42 dirty=42 N0=15 N3=27 kernelpagesize_kB=64 100000000 default file=/run/dpdk/rte/config dirty=1 N0=1 kernelpagesize_kB=64 100010000 default file=/run/dpdk/rte/fbarray_memzone dirty=3 N0=3 kernelpagesize_kB=64 100040000 default file=/run/dpdk/rte/fbarray_memseg-524288k-0-0 dirty=1 N0=1 kernelpagesize_kB=64 120000000 default 920000000 default file=/run/dpdk/rte/fbarray_memseg-524288k-0-1 dirty=1 N0=1 kernelpagesize_kB=64 940000000 default 1140000000 default file=/run/dpdk/rte/fbarray_memseg-524288k-1-0 dirty=1 N0=1 kernelpagesize_kB=64 1160000000 default 1960000000 default file=/run/dpdk/rte/fbarray_memseg-524288k-1-1 dirty=1 N0=1 kernelpagesize_kB=64 1980000000 default 2180000000 default file=/run/dpdk/rte/fbarray_memseg-524288k-2-0 dirty=1 N0=1 kernelpagesize_kB=64 21a0000000 default 29a0000000 default file=/run/dpdk/rte/fbarray_memseg-524288k-2-1 dirty=1 N0=1 kernelpagesize_kB=64 29c0000000 default 31c0000000 default file=/run/dpdk/rte/fbarray_memseg-524288k-3-0 dirty=1 N0=1 kernelpagesize_kB=64 31e0000000 default 39e0000000 default file=/run/dpdk/rte/fbarray_memseg-524288k-3-1 dirty=1 N0=1 kernelpagesize_kB=64 3a00000000 default 4200000000 default file=/run/dpdk/rte/fbarray_memseg-2048k-0-0 dirty=7 N0=7 kernelpagesize_kB=64 4200200000 default 4600200000 default file=/run/dpdk/rte/fbarray_memseg-2048k-0-1 dirty=7 N0=7 kernelpagesize_kB=64 4600400000 default 4a00400000 default file=/run/dpdk/rte/fbarray_memseg-2048k-0-2 dirty=7 N0=7 kernelpagesize_kB=64 4a00600000 default 4e00600000 default file=/run/dpdk/rte/fbarray_memseg-2048k-0-3 dirty=7 N0=7 kernelpagesize_kB=64 4e00800000 default 5200800000 default file=/run/dpdk/rte/fbarray_memseg-2048k-1-0 dirty=7 N0=7 kernelpagesize_kB=64 5200a00000 default 5600a00000 default file=/run/dpdk/rte/fbarray_memseg-2048k-1-1 dirty=7 N0=7 kernelpagesize_kB=64 5600c00000 default 5a00c00000 default file=/run/dpdk/rte/fbarray_memseg-2048k-1-2 dirty=7 N0=7 kernelpagesize_kB=64 5a00e00000 default 5e00e00000 default file=/run/dpdk/rte/fbarray_memseg-2048k-1-3 dirty=7 N0=7 kernelpagesize_kB=64 5e01000000 default 6201000000 default file=/run/dpdk/rte/fbarray_memseg-2048k-2-0 dirty=7 N0=7 kernelpagesize_kB=64 6201200000 default 6601200000 default file=/run/dpdk/rte/fbarray_memseg-2048k-2-1 dirty=7 N0=7 kernelpagesize_kB=64 6601400000 default 6a01400000 default file=/run/dpdk/rte/fbarray_memseg-2048k-2-2 dirty=7 N0=7 kernelpagesize_kB=64 6a01600000 default 6e01600000 default file=/run/dpdk/rte/fbarray_memseg-2048k-2-3 dirty=7 N0=7 kernelpagesize_kB=64 6e01800000 default 7201800000 default file=/run/dpdk/rte/fbarray_memseg-2048k-3-0 dirty=7 N0=7 kernelpagesize_kB=64 7201a00000 default file=/mnt/huge/rtemap_163840 huge dirty=1 N3=1 kernelpagesize_kB=2048 7201c00000 default file=/mnt/huge/rtemap_163841 huge dirty=1 N3=1 kernelpagesize_kB=2048 7201e00000 default file=/mnt/huge/rtemap_163842 huge dirty=1 N3=1 kernelpagesize_kB=2048 7202000000 default 7601a00000 default file=/run/dpdk/rte/fbarray_memseg-2048k-3-1 dirty=7 N0=7 kernelpagesize_kB=64 7601c00000 default 7a01c00000 default file=/run/dpdk/rte/fbarray_memseg-2048k-3-2 dirty=7 N0=7 kernelpagesize_kB=64 7a01e00000 default 7e01e00000 default file=/run/dpdk/rte/fbarray_memseg-2048k-3-3 dirty=7 N0=7 kernelpagesize_kB=64 7e02000000 default ffffb8cc0000 default ffffb8cd0000 default anon=2 dirty=2 N0=1 N3=1 kernelpagesize_kB=64 ffffb94e0000 default ffffb94f0000 default anon=1 dirty=1 N0=1 kernelpagesize_kB=64 ffffb9cf0000 default ffffb9d00000 default anon=1 dirty=1 N0=1 kernelpagesize_kB=64 ffffba500000 default file=/usr/lib64/libgcc_s-4.8.5-20150702.so.1 mapped=1 mapmax=20 N2=1 kernelpagesize_kB=64 ffffba520000 default file=/usr/lib64/libgcc_s-4.8.5-20150702.so.1 anon=1 dirty=1 N0=1 kernelpagesize_kB=64 ffffba530000 default file=/usr/lib64/libgcc_s-4.8.5-20150702.so.1 anon=1 dirty=1 N0=1 kernelpagesize_kB=64 ffffba540000 default file=/usr/lib64/libc-2.17.so mapped=19 mapmax=31 N2=19 kernelpagesize_kB=64 ffffba6b0000 default file=/usr/lib64/libc-2.17.so anon=1 dirty=1 N0=1 kernelpagesize_kB=64 ffffba6c0000 default file=/usr/lib64/libc-2.17.so anon=1 dirty=1 N0=1 kernelpagesize_kB=64 ffffba6d0000 default file=/usr/lib64/libpthread-2.17.so mapped=2 mapmax=21 N2=2 kernelpagesize_kB=64 ffffba6f0000 default file=/usr/lib64/libpthread-2.17.so anon=1 dirty=1 N0=1 kernelpagesize_kB=64 ffffba700000 default file=/usr/lib64/libpthread-2.17.so anon=1 dirty=1 N0=1 kernelpagesize_kB=64 ffffba710000 default file=/usr/lib64/libdl-2.17.so mapped=1 mapmax=27 N2=1 kernelpagesize_kB=64 ffffba720000 default file=/usr/lib64/libdl-2.17.so anon=1 dirty=1 N0=1 kernelpagesize_kB=64 ffffba730000 default file=/usr/lib64/libdl-2.17.so anon=1 dirty=1 N0=1 kernelpagesize_kB=64 ffffba740000 default file=/usr/lib64/libnuma.so.1.0.0 mapped=1 mapmax=2 N0=1 kernelpagesize_kB=64 ffffba750000 default file=/usr/lib64/libnuma.so.1.0.0 anon=1 dirty=1 N0=1 kernelpagesize_kB=64 ffffba760000 default file=/usr/lib64/libnuma.so.1.0.0 anon=1 dirty=1 N0=1 kernelpagesize_kB=64 ffffba770000 default file=/usr/lib64/libm-2.17.so mapped=1 mapmax=17 N0=1 kernelpagesize_kB=64 ffffba810000 default file=/usr/lib64/libm-2.17.so anon=1 dirty=1 N0=1 kernelpagesize_kB=64 ffffba820000 default file=/usr/lib64/libm-2.17.so anon=1 dirty=1 N0=1 kernelpagesize_kB=64 ffffba830000 default file=/usr/lib64/librt-2.17.so mapped=1 mapmax=20 N2=1 kernelpagesize_kB=64 ffffba840000 default file=/usr/lib64/librt-2.17.so anon=1 dirty=1 N0=1 kernelpagesize_kB=64 ffffba850000 default file=/usr/lib64/librt-2.17.so anon=1 dirty=1 N0=1 kernelpagesize_kB=64 ffffba860000 default anon=1 dirty=1 N0=1 kernelpagesize_kB=64 ffffba870000 default ffffba880000 default ffffba890000 default file=/usr/lib64/ld-2.17.so mapped=2 mapmax=31 N2=2 kernelpagesize_kB=64 ffffba8b0000 default file=/usr/lib64/ld-2.17.so anon=1 dirty=1 N0=1 kernelpagesize_kB=64 ffffba8c0000 default file=/usr/lib64/ld-2.17.so anon=1 dirty=1 N0=1 kernelpagesize_kB=64 fffffa700000 default stack anon=1 dirty=1 N0=1 kernelpagesize_kB=64 [root@localhost simple_mp]
[root@localhost simple_mp]# cat /sys/devices/system/node/node0/meminfo Node 0 MemTotal: 133176640 kB Node 0 MemFree: 96906880 kB Node 0 MemUsed: 36269760 kB Node 0 Active: 1257024 kB Node 0 Inactive: 387328 kB Node 0 Active(anon): 73152 kB Node 0 Inactive(anon): 448 kB Node 0 Active(file): 1183872 kB Node 0 Inactive(file): 386880 kB Node 0 Unevictable: 0 kB Node 0 Mlocked: 0 kB Node 0 Dirty: 0 kB Node 0 Writeback: 0 kB Node 0 FilePages: 1580160 kB Node 0 Mapped: 11712 kB Node 0 AnonPages: 65536 kB Node 0 Shmem: 9408 kB Node 0 KernelStack: 27328 kB Node 0 PageTables: 4032 kB Node 0 NFS_Unstable: 0 kB Node 0 Bounce: 0 kB Node 0 WritebackTmp: 0 kB Node 0 Slab: 367936 kB Node 0 SReclaimable: 110208 kB Node 0 SUnreclaim: 257728 kB Node 0 AnonHugePages: 0 kB Node 0 ShmemHugePages: 0 kB Node 0 ShmemPmdMapped: 0 kB Node 0 HugePages_Total: 64 Node 0 HugePages_Free: 64 Node 0 HugePages_Surp: 0 [root@localhost simple_mp]#
[root@localhost simple_mp]# ls -tlr /mnt/huge/ total 6144 -rw-------. 1 root root 2097152 Aug 28 05:27 rtemap_163840 -rw-------. 1 root root 2097152 Aug 28 05:27 rtemap_163842 -rw-------. 1 root root 2097152 Aug 28 05:27 rtemap_163841 [root@localhost simple_mp]#
rte_eal_hugepage_init()主要是在/mnt/huge目录下创建hugetlbfs配置的内存页数(在本文中就是64)的rtemap_xx文件,并为每个rtemap_xx文件做mmap映射,保证mmap后的虚拟地址与实际的物理地址是一样的。
[root@localhost lib]# ps -elf | grep simple 0 S root 15154 124128 7 80 0 - 8389378 wait_w 05:27 pts/1 00:01:36 ./build/simple_mp -l 126-127 --proc-type=primary 0 S root 16221 57486 0 80 0 - 1729 pipe_w 05:48 pts/0 00:00:00 grep --color=auto simple [root@localhost lib]# kill -9 15154 [root@localhost lib]# ls -ltr /mnt/huge/ // kill 了还存在 total 6144 -rw-------. 1 root root 2097152 Aug 28 05:27 rtemap_163840 -rw-------. 1 root root 2097152 Aug 28 05:27 rtemap_163842 -rw-------. 1 root root 2097152 Aug 28 05:27 rtemap_163841
[root@localhost dpdk-19.11]# ./usertools/dpdk-devbind.py -u 0000:05:00.0 [root@localhost dpdk-19.11]# ./usertools/dpdk-devbind.py --bind=vfio-pci 0000:05:00.0 [root@localhost dpdk-19.11]# ./examples/kni/build/app/kni -c 0xF -n 4 -- -P -p 0x1 --config="(0,3,1)" EAL: Detected 128 lcore(s) EAL: Detected 4 NUMA nodes EAL: Multi-process socket /var/run/dpdk/rte/mp_socket EAL: Selected IOVA mode 'PA' EAL: Probing VFIO support... EAL: VFIO support initialized EAL: PCI device 0000:05:00.0 on NUMA socket 0 EAL: probe driver: 19e5:200 net_hinic EAL: using IOMMU type 1 (Type 1) net_hinic: Initializing pf hinic-0000:05:00.0 in primary process
[root@localhost simple_mp]# ls -tlr /mnt/huge total 12288 -rw-------. 1 root root 2097152 Aug 28 05:59 rtemap_65536 -rw-------. 1 root root 2097152 Aug 28 05:59 rtemap_65537 -rw-------. 1 root root 2097152 Aug 28 05:59 rtemap_65541 -rw-------. 1 root root 2097152 Aug 28 05:59 rtemap_65540 -rw-------. 1 root root 2097152 Aug 28 05:59 rtemap_65539 -rw-------. 1 root root 2097152 Aug 28 05:59 rtemap_65538
[root@localhost simple_mp]# ls /var/run/dpdk/rte/ -lrt total 8000 srwxr-xr-x. 1 root root 0 Aug 28 05:59 mp_socket -rw-------. 1 root root 18816 Aug 28 05:59 config -rw-------. 1 root root 16576 Aug 28 05:59 hugepage_info -rw-------. 1 root root 196608 Aug 28 05:59 fbarray_memzone -rw-------. 1 root root 65536 Aug 28 05:59 fbarray_memseg-524288k-3-1 -rw-------. 1 root root 65536 Aug 28 05:59 fbarray_memseg-524288k-3-0 -rw-------. 1 root root 65536 Aug 28 05:59 fbarray_memseg-524288k-2-1 -rw-------. 1 root root 65536 Aug 28 05:59 fbarray_memseg-524288k-2-0 -rw-------. 1 root root 65536 Aug 28 05:59 fbarray_memseg-524288k-1-1 -rw-------. 1 root root 65536 Aug 28 05:59 fbarray_memseg-524288k-1-0 -rw-------. 1 root root 65536 Aug 28 05:59 fbarray_memseg-524288k-0-1 -rw-------. 1 root root 65536 Aug 28 05:59 fbarray_memseg-524288k-0-0 -rw-------. 1 root root 458752 Aug 28 05:59 fbarray_memseg-2048k-3-3 -rw-------. 1 root root 458752 Aug 28 05:59 fbarray_memseg-2048k-3-2 -rw-------. 1 root root 458752 Aug 28 05:59 fbarray_memseg-2048k-3-1 -rw-------. 1 root root 458752 Aug 28 05:59 fbarray_memseg-2048k-3-0 -rw-------. 1 root root 458752 Aug 28 05:59 fbarray_memseg-2048k-2-3 -rw-------. 1 root root 458752 Aug 28 05:59 fbarray_memseg-2048k-2-2 -rw-------. 1 root root 458752 Aug 28 05:59 fbarray_memseg-2048k-2-1 -rw-------. 1 root root 458752 Aug 28 05:59 fbarray_memseg-2048k-2-0 -rw-------. 1 root root 458752 Aug 28 05:59 fbarray_memseg-2048k-1-3 -rw-------. 1 root root 458752 Aug 28 05:59 fbarray_memseg-2048k-1-2 -rw-------. 1 root root 458752 Aug 28 05:59 fbarray_memseg-2048k-1-1 -rw-------. 1 root root 458752 Aug 28 05:59 fbarray_memseg-2048k-1-0 -rw-------. 1 root root 458752 Aug 28 05:59 fbarray_memseg-2048k-0-3 -rw-------. 1 root root 458752 Aug 28 05:59 fbarray_memseg-2048k-0-2 -rw-------. 1 root root 458752 Aug 28 05:59 fbarray_memseg-2048k-0-1 -rw-------. 1 root root 458752 Aug 28 05:59 fbarray_memseg-2048k-0-0 [root@localhost simple_mp]#
[root@localhost simple_mp]# cat /proc/meminfo MemTotal: 535414912 kB MemFree: 395457472 kB MemAvailable: 356217664 kB Buffers: 13696 kB Cached: 2478208 kB SwapCached: 0 kB Active: 1958720 kB Inactive: 718464 kB Active(anon): 201216 kB Inactive(anon): 51648 kB Active(file): 1757504 kB Inactive(file): 666816 kB Unevictable: 0 kB Mlocked: 0 kB SwapTotal: 0 kB SwapFree: 0 kB Dirty: 256 kB Writeback: 0 kB AnonPages: 187520 kB Mapped: 60992 kB Shmem: 67520 kB Slab: 1339008 kB SReclaimable: 264896 kB SUnreclaim: 1074112 kB KernelStack: 85312 kB PageTables: 9728 kB NFS_Unstable: 0 kB Bounce: 0 kB WritebackTmp: 0 kB CommitLimit: 200336448 kB Committed_AS: 549184 kB VmallocTotal: 133009637312 kB VmallocUsed: 0 kB VmallocChunk: 0 kB HardwareCorrupted: 0 kB AnonHugePages: 0 kB ShmemHugePages: 0 kB ShmemPmdMapped: 0 kB HugePages_Total: 256 HugePages_Free: 255 HugePages_Rsvd: 0 HugePages_Surp: 0 Hugepagesize: 524288 kB
[root@localhost simple_mp]# cat /sys/devices/system/node/node3/meminfo | grep HugePages_ Node 3 HugePages_Total: 64 Node 3 HugePages_Free: 64 Node 3 HugePages_Surp: 0 [root@localhost simple_mp]# cat /sys/devices/system/node/node2/meminfo | grep HugePages_ Node 2 HugePages_Total: 64 Node 2 HugePages_Free: 64 Node 2 HugePages_Surp: 0 [root@localhost simple_mp]# cat /sys/devices/system/node/node1/meminfo | grep HugePages_ Node 1 HugePages_Total: 64 Node 1 HugePages_Free: 64 Node 1 HugePages_Surp: 0 [root@localhost simple_mp]# cat /sys/devices/system/node/node0/meminfo | grep HugePages_ Node 0 HugePages_Total: 64 Node 0 HugePages_Free: 63 Node 0 HugePages_Surp: 0 [root@localhost simple_mp]#
[root@localhost simple_mp]# cat /proc/17051/m map_files/ maps mem mountinfo mounts mountstats [root@localhost simple_mp]# cat /proc/17051/maps 00400000-00ac0000 r-xp 00000000 08:10 22155424 /data1/dpdk-19.11/examples/kni/build/app/kni 00ad0000-00af0000 r--p 006c0000 08:10 22155424 /data1/dpdk-19.11/examples/kni/build/app/kni 00af0000-00b30000 rw-p 006e0000 08:10 22155424 /data1/dpdk-19.11/examples/kni/build/app/kni 00b30000-00d90000 rw-p 00000000 00:00 0 37d60000-38020000 rw-p 00000000 00:00 0 [heap] 100000000-100010000 rw-s 00000000 00:17 128179 /run/dpdk/rte/config 100010000-100040000 rw-s 00000000 00:17 133227 /run/dpdk/rte/fbarray_memzone 100040000-100050000 rw-s 00000000 00:17 224017 /run/dpdk/rte/fbarray_memseg-524288k-0-0 120000000-140000000 rw-s 00000000 00:29 333018 /dev/hugepages/rtemap_0 140000000-920000000 r--p 00000000 00:00 0 920000000-920010000 rw-s 00000000 00:17 224018 /run/dpdk/rte/fbarray_memseg-524288k-0-1 940000000-1140000000 r--p 00000000 00:00 0 1140000000-1140010000 rw-s 00000000 00:17 224019 /run/dpdk/rte/fbarray_memseg-524288k-1-0 1160000000-1960000000 r--p 00000000 00:00 0 1960000000-1960010000 rw-s 00000000 00:17 224020 /run/dpdk/rte/fbarray_memseg-524288k-1-1 1980000000-2180000000 r--p 00000000 00:00 0 2180000000-2180010000 rw-s 00000000 00:17 224021 /run/dpdk/rte/fbarray_memseg-524288k-2-0 21a0000000-29a0000000 r--p 00000000 00:00 0 29a0000000-29a0010000 rw-s 00000000 00:17 224022 /run/dpdk/rte/fbarray_memseg-524288k-2-1 29c0000000-31c0000000 r--p 00000000 00:00 0 31c0000000-31c0010000 rw-s 00000000 00:17 224023 /run/dpdk/rte/fbarray_memseg-524288k-3-0 31e0000000-39e0000000 r--p 00000000 00:00 0 39e0000000-39e0010000 rw-s 00000000 00:17 224024 /run/dpdk/rte/fbarray_memseg-524288k-3-1 3a00000000-4200000000 r--p 00000000 00:00 0 4200000000-4200070000 rw-s 00000000 00:17 224025 /run/dpdk/rte/fbarray_memseg-2048k-0-0 4200200000-4200400000 rw-s 00000000 00:2b 332953 /mnt/huge/rtemap_65536 4200400000-4200600000 rw-s 00000000 00:2b 332954 /mnt/huge/rtemap_65537 4200600000-4200800000 rw-s 00000000 00:2b 332955 /mnt/huge/rtemap_65538 4200800000-4200a00000 rw-s 00000000 00:2b 332956 /mnt/huge/rtemap_65539 4200a00000-4200c00000 rw-s 00000000 00:2b 332957 /mnt/huge/rtemap_65540 4200c00000-4200e00000 rw-s 00000000 00:2b 332958 /mnt/huge/rtemap_65541 4200e00000-4600200000 r--p 00000000 00:00 0 4600200000-4600270000 rw-s 00000000 00:17 224026 /run/dpdk/rte/fbarray_memseg-2048k-0-1 4600400000-4a00400000 r--p 00000000 00:00 0 4a00400000-4a00470000 rw-s 00000000 00:17 224027 /run/dpdk/rte/fbarray_memseg-2048k-0-2 4a00600000-4e00600000 r--p 00000000 00:00 0 4e00600000-4e00670000 rw-s 00000000 00:17 224028 /run/dpdk/rte/fbarray_memseg-2048k-0-3 4e00800000-5200800000 r--p 00000000 00:00 0 5200800000-5200870000 rw-s 00000000 00:17 224029 /run/dpdk/rte/fbarray_memseg-2048k-1-0 5200a00000-5600a00000 r--p 00000000 00:00 0 5600a00000-5600a70000 rw-s 00000000 00:17 224030 /run/dpdk/rte/fbarray_memseg-2048k-1-1 5600c00000-5a00c00000 r--p 00000000 00:00 0 5a00c00000-5a00c70000 rw-s 00000000 00:17 224031 /run/dpdk/rte/fbarray_memseg-2048k-1-2 5a00e00000-5e00e00000 r--p 00000000 00:00 0 5e00e00000-5e00e70000 rw-s 00000000 00:17 224032 /run/dpdk/rte/fbarray_memseg-2048k-1-3 5e01000000-6201000000 r--p 00000000 00:00 0 6201000000-6201070000 rw-s 00000000 00:17 224033 /run/dpdk/rte/fbarray_memseg-2048k-2-0 6201200000-6601200000 r--p 00000000 00:00 0 6601200000-6601270000 rw-s 00000000 00:17 224034 /run/dpdk/rte/fbarray_memseg-2048k-2-1 6601400000-6a01400000 r--p 00000000 00:00 0 6a01400000-6a01470000 rw-s 00000000 00:17 224035 /run/dpdk/rte/fbarray_memseg-2048k-2-2 6a01600000-6e01600000 r--p 00000000 00:00 0 6e01600000-6e01670000 rw-s 00000000 00:17 224036 /run/dpdk/rte/fbarray_memseg-2048k-2-3 6e01800000-7201800000 r--p 00000000 00:00 0 7201800000-7201870000 rw-s 00000000 00:17 224037 /run/dpdk/rte/fbarray_memseg-2048k-3-0 7201a00000-7601a00000 r--p 00000000 00:00 0 7601a00000-7601a70000 rw-s 00000000 00:17 224038 /run/dpdk/rte/fbarray_memseg-2048k-3-1 7601c00000-7a01c00000 r--p 00000000 00:00 0 7a01c00000-7a01c70000 rw-s 00000000 00:17 224039 /run/dpdk/rte/fbarray_memseg-2048k-3-2 7a01e00000-7e01e00000 r--p 00000000 00:00 0 7e01e00000-7e01e70000 rw-s 00000000 00:17 224040 /run/dpdk/rte/fbarray_memseg-2048k-3-3 7e02000000-8202000000 r--p 00000000 00:00 0 8202000000-8202020000 rw-s 80007b00000 00:0d 8356 anon_inode:[vfio-device] 8202030000-8202130000 rw-s 80000200000 00:0d 8356 anon_inode:[vfio-device] ffff80000000-ffff80030000 rw-p 00000000 00:00 0 ffff80030000-ffff84000000 ---p 00000000 00:00 0 ffff84000000-ffff84030000 rw-p 00000000 00:00 0 ffff84030000-ffff88000000 ---p 00000000 00:00 0 ffff88000000-ffff88030000 rw-p 00000000 00:00 0 ffff88030000-ffff8c000000 ---p 00000000 00:00 0 ffff8c000000-ffff8c030000 rw-p 00000000 00:00 0 ffff8c030000-ffff90000000 ---p 00000000 00:00 0 ffff90000000-ffff90030000 rw-p 00000000 00:00 0 ffff90030000-ffff94000000 ---p 00000000 00:00 0 ffff94000000-ffff94030000 rw-p 00000000 00:00 0 ffff94030000-ffff98000000 ---p 00000000 00:00 0 ffff98000000-ffff98030000 rw-p 00000000 00:00 0 ffff98030000-ffff9c000000 ---p 00000000 00:00 0 ffff9cfb0000-ffff9cfc0000 ---p 00000000 00:00 0 ffff9cfc0000-ffff9d7c0000 rw-p 00000000 00:00 0 ffff9d7c0000-ffff9d7d0000 ---p 00000000 00:00 0 ffff9d7d0000-ffff9dfd0000 rw-p 00000000 00:00 0 ffff9dfd0000-ffff9dfe0000 ---p 00000000 00:00 0 ffff9dfe0000-ffff9e7e0000 rw-p 00000000 00:00 0 ffff9e7e0000-ffff9e7f0000 ---p 00000000 00:00 0 ffff9e7f0000-ffff9eff0000 rw-p 00000000 00:00 0 ffff9eff0000-ffff9f000000 ---p 00000000 00:00 0 ffff9f000000-ffff9f800000 rw-p 00000000 00:00 0 ffff9f800000-ffff9f810000 ---p 00000000 00:00 0 ffff9f810000-ffffa0010000 rw-p 00000000 00:00 0 ffffa0010000-ffffa0020000 ---p 00000000 00:00 0 ffffa0020000-ffffa0820000 rw-p 00000000 00:00 0 ffffa0820000-ffffa0830000 ---p 00000000 00:00 0 ffffa0830000-ffffa1040000 rw-p 00000000 00:00 0 ffffa1040000-ffffa1050000 ---p 00000000 00:00 0 ffffa1050000-ffffa1850000 rw-p 00000000 00:00 0 ffffa1850000-ffffa1860000 ---p 00000000 00:00 0 ffffa1860000-ffffa2060000 rw-p 00000000 00:00 0 ffffa2060000-ffffa2080000 r-xp 00000000 08:03 229 /usr/lib64/libgcc_s-4.8.5-20150702.so.1 ffffa2080000-ffffa2090000 r--p 00010000 08:03 229 /usr/lib64/libgcc_s-4.8.5-20150702.so.1 ffffa2090000-ffffa20a0000 rw-p 00020000 08:03 229 /usr/lib64/libgcc_s-4.8.5-20150702.so.1 ffffa20a0000-ffffa2210000 r-xp 00000000 08:03 247 /usr/lib64/libc-2.17.so ffffa2210000-ffffa2220000 r--p 00160000 08:03 247 /usr/lib64/libc-2.17.so ffffa2220000-ffffa2230000 rw-p 00170000 08:03 247 /usr/lib64/libc-2.17.so ffffa2230000-ffffa2250000 r-xp 00000000 08:03 51073 /usr/lib64/libpthread-2.17.so ffffa2250000-ffffa2260000 r--p 00010000 08:03 51073 /usr/lib64/libpthread-2.17.so ffffa2260000-ffffa2270000 rw-p 00020000 08:03 51073 /usr/lib64/libpthread-2.17.so ffffa2270000-ffffa2280000 r-xp 00000000 08:03 253 /usr/lib64/libdl-2.17.so ffffa2280000-ffffa2290000 r--p 00000000 08:03 253 /usr/lib64/libdl-2.17.so ffffa2290000-ffffa22a0000 rw-p 00010000 08:03 253 /usr/lib64/libdl-2.17.so ffffa22a0000-ffffa22b0000 r-xp 00000000 08:03 230 /usr/lib64/libnuma.so.1.0.0 ffffa22b0000-ffffa22c0000 r--p 00000000 08:03 230 /usr/lib64/libnuma.so.1.0.0 ffffa22c0000-ffffa22d0000 rw-p 00010000 08:03 230 /usr/lib64/libnuma.so.1.0.0 ffffa22d0000-ffffa2370000 r-xp 00000000 08:03 255 /usr/lib64/libm-2.17.so ffffa2370000-ffffa2380000 r--p 00090000 08:03 255 /usr/lib64/libm-2.17.so ffffa2380000-ffffa2390000 rw-p 000a0000 08:03 255 /usr/lib64/libm-2.17.so ffffa2390000-ffffa23a0000 r-xp 00000000 08:03 51079 /usr/lib64/librt-2.17.so ffffa23a0000-ffffa23b0000 r--p 00000000 08:03 51079 /usr/lib64/librt-2.17.so ffffa23b0000-ffffa23c0000 rw-p 00010000 08:03 51079 /usr/lib64/librt-2.17.so ffffa23c0000-ffffa23d0000 rw-p 00000000 00:00 0 ffffa23d0000-ffffa23e0000 r--p 00000000 00:00 0 [vvar] ffffa23e0000-ffffa23f0000 r-xp 00000000 00:00 0 [vdso] ffffa23f0000-ffffa2410000 r-xp 00000000 08:03 240 /usr/lib64/ld-2.17.so ffffa2410000-ffffa2420000 r--p 00010000 08:03 240 /usr/lib64/ld-2.17.so ffffa2420000-ffffa2430000 rw-p 00020000 08:03 240 /usr/lib64/ld-2.17.so ffffdbe20000-ffffdbe50000 rw-p 00000000 00:00 0 [stack] [root@localhost simple_mp]#
https://www.cnblogs.com/MerlinJ/p/4074391.html
struct hugepage_info { size_t hugepage_sz; /**< size of a huge page */ const char *hugedir; /**< dir where hugetlbfs is mounted */ uint32_t num_pages[RTE_MAX_NUMA_NODES]; /**< number of hugepages of that size on each socket */ int lock_descriptor; /**< file descriptor for hugepage dir */ };
具体赋值如下,
hpi->hugepage_sz = 2M;
hpi->hugedir = /mnt/huge;
hpi->num_pages[0] = 64; // 由于此时还不知道哪些内存页分处在哪个socket上,故,都先放在socket-0上。
hpi->lock_descriptor = open(hpi->hugedir, O_RONLY); // 在读取hugetlbfs配置的时候,需要锁住整个目录。当所有hugepage都mmap完成后,会解锁。
[root@localhost ~]# mount | grep huge cgroup on /sys/fs/cgroup/hugetlb type cgroup (rw,nosuid,nodev,noexec,relatime,seclabel,hugetlb) hugetlbfs on /dev/hugepages type hugetlbfs (rw,relatime,seclabel,pagesize=512M) none on /mnt/huge type hugetlbfs (rw,relatime,seclabel,pagesize=2M) [root@localhost ~]# ls /mnt/huge/ rtemap_65536 rtemap_65537 rtemap_65538 rtemap_65539 rtemap_65540 rtemap_65541 [root@localhost ~]#
rte_eal_config_create()主要是初始化rte_config.mem_config。如果是以root用户运行dpdk程序的话,rte_config.mem_config指向/var/run/.rte_config文件mmap的一段sizeof(struct rte_mem_config)大小的内存。
rte_config.mem_config =/var/run/dpdk/rte/config
如果是primary,则调用rte_eal_config_create(); 函数去创建/var/run/dpdk/rte/config 文件;并设置共享内存;
如果是secondary,则调用rte_eal_config_attach();去打开/var/run/dpdk/rte/config 文件,并设置相应的共享内存
文件mmap的首地址;
[root@localhost ~]# ls /var/run/dpdk/rte/config /var/run/dpdk/rte/config
rte_eal_hugepage_init()主要是在/mnt/huge目录下创建hugetlbfs配置的内存页数(在本文中就是64)的rtemap_xx文件,并为每个rtemap_xx文件做mmap映射,保证mmap后的虚拟地址与实际的物理地址是一样的。
[root@localhost ~]# ls /mnt/huge/
rtemap_65536 rtemap_65537 rtemap_65538 rtemap_65539 rtemap_65540 rtemap_65541
通过读取/proc/self/pagemap页表文件,得到本进程中虚拟地址与物理地址的映射关系。使用上一步中,每个rtemap_xx文件mmap得到的虚拟地址,除以操作系统内存页的大小4k,得到一个偏移量。根据这个偏移量,在/prox/self/pagemap中,得到物理地址的页框,假设为page,那么,物理页框page乘以操作系统内存页的大小4K,再加上虚拟地址的页偏移,就是物理地址。每个rtemap_xx映射的物理地址保存在对应的hugepage_file->physaddr中。
1 physaddr = ((page & 0x7fffffffffffffULL) * page_size) + ((unsigned long)virtaddr % page_size);
读取/proc/self/numa_maps,得到每个rtemap_xx文件mmap得到的虚拟地址在哪个Socket上,即,哪个CPU上。其socketid保存在对应的hugepage_file->socket_id中。
[root@localhost ~]# cat /proc/79260/numa_maps 00400000 default file=/data1/dpdk-19.11/examples/kni/build/app/kni mapped=48 N1=48 kernelpagesize_kB=64 00ad0000 default file=/data1/dpdk-19.11/examples/kni/build/app/kni anon=1 dirty=1 mapped=2 N0=1 N1=1 kernelpagesize_kB=64 00af0000 default file=/data1/dpdk-19.11/examples/kni/build/app/kni anon=4 dirty=4 N0=4 kernelpagesize_kB=64 00b30000 default anon=21 dirty=21 N0=21 kernelpagesize_kB=64 318d0000 default heap anon=43 dirty=43 N0=43 kernelpagesize_kB=64 100000000 default file=/run/dpdk/rte/config dirty=1 N0=1 kernelpagesize_kB=64 100010000 default file=/run/dpdk/rte/fbarray_memzone dirty=3 N0=3 kernelpagesize_kB=64 100040000 default file=/run/dpdk/rte/fbarray_memseg-524288k-0-0 dirty=1 N0=1 kernelpagesize_kB=64 120000000 default file=/dev/hugepages/rtemap_0 huge dirty=1 N0=1 kernelpagesize_kB=524288 140000000 default 920000000 default file=/run/dpdk/rte/fbarray_memseg-524288k-0-1 dirty=1 N0=1 kernelpagesize_kB=64 940000000 default 1140000000 default file=/run/dpdk/rte/fbarray_memseg-524288k-1-0 dirty=1 N0=1 kernelpagesize_kB=64 1160000000 default 1960000000 default file=/run/dpdk/rte/fbarray_memseg-524288k-1-1 dirty=1 N0=1 kernelpagesize_kB=64 1980000000 default 2180000000 default file=/run/dpdk/rte/fbarray_memseg-524288k-2-0 dirty=1 N0=1 kernelpagesize_kB=64 21a0000000 default 29a0000000 default file=/run/dpdk/rte/fbarray_memseg-524288k-2-1 dirty=1 N0=1 kernelpagesize_kB=64 29c0000000 default 31c0000000 default file=/run/dpdk/rte/fbarray_memseg-524288k-3-0 dirty=1 N0=1 kernelpagesize_kB=64 31e0000000 default 39e0000000 default file=/run/dpdk/rte/fbarray_memseg-524288k-3-1 dirty=1 N0=1 kernelpagesize_kB=64 3a00000000 default 4200000000 default file=/run/dpdk/rte/fbarray_memseg-2048k-0-0 dirty=7 N0=7 kernelpagesize_kB=64 4200200000 default file=/mnt/huge/rtemap_65536 huge dirty=1 N0=1 kernelpagesize_kB=2048 4200400000 default file=/mnt/huge/rtemap_65537 huge dirty=1 N0=1 kernelpagesize_kB=2048 4200600000 default file=/mnt/huge/rtemap_65538 huge dirty=1 N0=1 kernelpagesize_kB=2048 4200800000 default file=/mnt/huge/rtemap_65539 huge dirty=1 N0=1 kernelpagesize_kB=2048 4200a00000 default file=/mnt/huge/rtemap_65540 huge dirty=1 N0=1 kernelpagesize_kB=2048 4200c00000 default file=/mnt/huge/rtemap_65541 huge dirty=1 N0=1 kernelpagesize_kB=2048 4200e00000 default 4600200000 default file=/run/dpdk/rte/fbarray_memseg-2048k-0-1 dirty=7 N0=7 kernelpagesize_kB=64 4600400000 default 4a00400000 default file=/run/dpdk/rte/fbarray_memseg-2048k-0-2 dirty=7 N0=7 kernelpagesize_kB=64 4a00600000 default 4e00600000 default file=/run/dpdk/rte/fbarray_memseg-2048k-0-3 dirty=7 N0=7 kernelpagesize_kB=64 4e00800000 default 5200800000 default file=/run/dpdk/rte/fbarray_memseg-2048k-1-0 dirty=7 N0=7 kernelpagesize_kB=64 5200a00000 default 5600a00000 default file=/run/dpdk/rte/fbarray_memseg-2048k-1-1 dirty=7 N0=7 kernelpagesize_kB=64 5600c00000 default 5a00c00000 default file=/run/dpdk/rte/fbarray_memseg-2048k-1-2 dirty=7 N0=7 kernelpagesize_kB=64 5a00e00000 default 5e00e00000 default file=/run/dpdk/rte/fbarray_memseg-2048k-1-3 dirty=7 N0=7 kernelpagesize_kB=64 5e01000000 default 6201000000 default file=/run/dpdk/rte/fbarray_memseg-2048k-2-0 dirty=7 N0=7 kernelpagesize_kB=64 6201200000 default 6601200000 default file=/run/dpdk/rte/fbarray_memseg-2048k-2-1 dirty=7 N0=7 kernelpagesize_kB=64 6601400000 default 6a01400000 default file=/run/dpdk/rte/fbarray_memseg-2048k-2-2 dirty=7 N0=7 kernelpagesize_kB=64 6a01600000 default 6e01600000 default file=/run/dpdk/rte/fbarray_memseg-2048k-2-3 dirty=7 N0=7 kernelpagesize_kB=64 6e01800000 default 7201800000 default file=/run/dpdk/rte/fbarray_memseg-2048k-3-0 dirty=7 N0=7 kernelpagesize_kB=64 7201a00000 default 7601a00000 default file=/run/dpdk/rte/fbarray_memseg-2048k-3-1 dirty=7 N0=7 kernelpagesize_kB=64 7601c00000 default 7a01c00000 default file=/run/dpdk/rte/fbarray_memseg-2048k-3-2 dirty=7 N0=7 kernelpagesize_kB=64 7a01e00000 default 7e01e00000 default file=/run/dpdk/rte/fbarray_memseg-2048k-3-3 dirty=7 N0=7 kernelpagesize_kB=64 7e02000000 default 8202000000 default file=anon_inode:[vfio-device] 8202030000 default file=anon_inode:[vfio-device] ffff94000000 default anon=1 dirty=1 N0=1 kernelpagesize_kB=64 ffff94030000 default ffff98000000 default anon=1 dirty=1 N0=1 kernelpagesize_kB=64 ffff98030000 default ffff9c000000 default anon=1 dirty=1 N0=1 kernelpagesize_kB=64 ffff9c030000 default ffffa3130000 default ffffa3140000 default anon=1 dirty=1 N0=1 kernelpagesize_kB=64 ffffa3940000 default ffffa3950000 default anon=1 dirty=1 N0=1 kernelpagesize_kB=64 ffffa4150000 default ffffa4160000 default anon=1 dirty=1 N0=1 kernelpagesize_kB=64 ffffa4960000 default ffffa4970000 default anon=2 dirty=2 N0=2 kernelpagesize_kB=64 ffffa5180000 default ffffa5190000 default anon=1 dirty=1 N0=1 kernelpagesize_kB=64 ffffa5990000 default ffffa59a0000 default anon=1 dirty=1 N0=1 kernelpagesize_kB=64 ffffa61a0000 default file=/usr/lib64/libgcc_s-4.8.5-20150702.so.1 mapped=1 mapmax=20 N2=1 kernelpagesize_kB=64 ffffa61c0000 default file=/usr/lib64/libgcc_s-4.8.5-20150702.so.1 anon=1 dirty=1 N0=1 kernelpagesize_kB=64 ffffa61d0000 default file=/usr/lib64/libgcc_s-4.8.5-20150702.so.1 anon=1 dirty=1 N0=1 kernelpagesize_kB=64 ffffa61e0000 default file=/usr/lib64/libc-2.17.so mapped=19 mapmax=27 N2=19 kernelpagesize_kB=64 ffffa6350000 default file=/usr/lib64/libc-2.17.so anon=1 dirty=1 N0=1 kernelpagesize_kB=64 ffffa6360000 default file=/usr/lib64/libc-2.17.so anon=1 dirty=1 N0=1 kernelpagesize_kB=64 ffffa6370000 default file=/usr/lib64/libpthread-2.17.so mapped=2 mapmax=21 N2=2 kernelpagesize_kB=64 ffffa6390000 default file=/usr/lib64/libpthread-2.17.so anon=1 dirty=1 N0=1 kernelpagesize_kB=64 ffffa63a0000 default file=/usr/lib64/libpthread-2.17.so anon=1 dirty=1 N0=1 kernelpagesize_kB=64 ffffa63b0000 default file=/usr/lib64/libdl-2.17.so mapped=1 mapmax=23 N2=1 kernelpagesize_kB=64 ffffa63c0000 default file=/usr/lib64/libdl-2.17.so anon=1 dirty=1 N0=1 kernelpagesize_kB=64 ffffa63d0000 default file=/usr/lib64/libdl-2.17.so anon=1 dirty=1 N0=1 kernelpagesize_kB=64 ffffa63e0000 default file=/usr/lib64/libnuma.so.1.0.0 mapped=1 mapmax=2 N0=1 kernelpagesize_kB=64 ffffa63f0000 default file=/usr/lib64/libnuma.so.1.0.0 anon=1 dirty=1 N0=1 kernelpagesize_kB=64 ffffa6400000 default file=/usr/lib64/libnuma.so.1.0.0 anon=1 dirty=1 N0=1 kernelpagesize_kB=64 ffffa6410000 default file=/usr/lib64/libm-2.17.so mapped=1 mapmax=17 N0=1 kernelpagesize_kB=64 ffffa64b0000 default file=/usr/lib64/libm-2.17.so anon=1 dirty=1 N0=1 kernelpagesize_kB=64 ffffa64c0000 default file=/usr/lib64/libm-2.17.so anon=1 dirty=1 N0=1 kernelpagesize_kB=64 ffffa64d0000 default file=/usr/lib64/librt-2.17.so mapped=1 mapmax=20 N2=1 kernelpagesize_kB=64 ffffa64e0000 default file=/usr/lib64/librt-2.17.so anon=1 dirty=1 N0=1 kernelpagesize_kB=64 ffffa64f0000 default file=/usr/lib64/librt-2.17.so anon=1 dirty=1 N0=1 kernelpagesize_kB=64 ffffa6500000 default anon=1 dirty=1 N0=1 kernelpagesize_kB=64 ffffa6510000 default ffffa6520000 default ffffa6530000 default file=/usr/lib64/ld-2.17.so mapped=2 mapmax=27 N2=2 kernelpagesize_kB=64 ffffa6550000 default file=/usr/lib64/ld-2.17.so anon=1 dirty=1 N0=1 kernelpagesize_kB=64 ffffa6560000 default file=/usr/lib64/ld-2.17.so anon=1 dirty=1 N0=1 kernelpagesize_kB=64 ffffdf9f0000 default stack anon=1 dirty=1 N0=1 kernelpagesize_kB=64 [root@localhost ~]#
[root@localhost ~]# getconf PAGESIZE 65536 --64K=64*1024 [root@localhost ~]#
次进程使用
次进程主要是把重新组织的内存映射到进程中。
- 根据共享的mem_config,获得memseg段信息。memseg中包括每个段的信息(段大小、虚拟和物理地址等)。再检查是否能获得每个段对应的虚拟地址空间。
- 然后,再根据共享的.rte_hugepage_info获得每个页信息。
- 根据.rte_hugepage_info中每个页的信息(dir、memseg_id),把每个页映射到次进程中,映射的虚拟地址和主进程中的一样。
helloworld
CFLAGS += -O3 -I$(RTE_SDK)/lib/librte_eal/common/ CFLAGS += $(WERROR_FLAGS) include $(RTE_SDK)/mk/rte.extapp.mk endif
#include <stdio.h> #include <string.h> #include <stdint.h> #include <errno.h> #include <sys/queue.h> #include <rte_memory.h> #include <rte_launch.h> #include <rte_eal.h> #include <rte_per_lcore.h> #include <rte_lcore.h> #include <rte_debug.h> #include <unistd.h> #include "eal_private.h" static int lcore_hello(__attribute__((unused)) void *arg) { unsigned lcore_id, socket_id; lcore_id = rte_lcore_id(); socket_id = rte_socket_id(); printf("hello from core %u ", lcore_id); printf("socket id %u ", socket_id); sleep(60); return 0; } int main(int argc, char **argv) { int ret; unsigned lcore_id; ret = rte_eal_init(argc, argv); if (ret < 0) rte_panic("Cannot init EAL "); /* call lcore_hello() on every slave lcore */ RTE_LCORE_FOREACH_SLAVE(lcore_id) { rte_eal_remote_launch(lcore_hello, NULL, lcore_id); } /* call it on master lcore too */ lcore_hello(NULL); struct rte_config *tmp_config; tmp_config=rte_eal_get_configuration(); printf("rte_config->lcore_count = %d ",tmp_config->lcore_count); rte_eal_mp_wait_lcore(); return 0; }
[root@localhost helloworld]# build/app/helloworld -c 0xf -n 4 --proc-type=auto EAL: Detected 128 lcore(s) EAL: Detected 4 NUMA nodes EAL: Auto-detected process type: PRIMARY EAL: Multi-process socket /var/run/dpdk/rte/mp_socket EAL: Selected IOVA mode 'PA' EAL: Probing VFIO support... EAL: VFIO support initialized EAL: PCI device 0000:05:00.0 on NUMA socket 0 EAL: probe driver: 19e5:200 net_hinic EAL: using IOMMU type 1 (Type 1) net_hinic: Initializing pf hinic-0000:05:00.0 in primary process net_hinic: Device 0000:05:00.0 hwif attribute: net_hinic: func_idx:0, p2p_idx:0, pciintf_idx:0, vf_in_pf:0, ppf_idx:0, global_vf_id:15, func_type:2 net_hinic: num_aeqs:4, num_ceqs:4, num_irqs:32, dma_attr:2 net_hinic: Get public resource capability: net_hinic: host_id: 0x0, ep_id: 0x0, intr_type: 0x0, max_cos_id: 0x7, er_id: 0x0, port_id: 0x0 net_hinic: host_total_function: 0xf2, host_oq_id_mask_val: 0x8, max_vf: 0x78 net_hinic: pf_num: 0x2, pf_id_start: 0x0, vf_num: 0xf0, vf_id_start: 0x10 net_hinic: Get share resource capability: net_hinic: host_pctxs: 0x0, host_cctxs: 0x0, host_scqs: 0x0, host_srqs: 0x0, host_mpts: 0x0 net_hinic: Get l2nic resource capability: net_hinic: max_sqs: 0x10, max_rqs: 0x10, vf_max_sqs: 0x4, vf_max_rqs: 0x4 net_hinic: Initialize 0000:05:00.0 in primary successfully EAL: PCI device 0000:06:00.0 on NUMA socket 0 EAL: probe driver: 19e5:200 net_hinic EAL: PCI device 0000:7d:00.0 on NUMA socket 0 EAL: probe driver: 19e5:a222 net_hns3 EAL: PCI device 0000:7d:00.1 on NUMA socket 0 EAL: probe driver: 19e5:a221 net_hns3 EAL: PCI device 0000:7d:00.2 on NUMA socket 0 EAL: probe driver: 19e5:a222 net_hns3 EAL: PCI device 0000:7d:00.3 on NUMA socket 0 EAL: probe driver: 19e5:a221 net_hns3 hello from core 1 socket id 0 hello from core 2 socket id 0 hello from core 3 socket id 0 hello from core 0 socket id 0
rte_config->lcore_count = 4 [root@localhost helloworld]#
#define FBARRAY_NAME_FMT "%s/fbarray_%s" static inline const char * eal_get_fbarray_path(char *buffer, size_t buflen, const char *name) { snprintf(buffer, buflen, FBARRAY_NAME_FMT, rte_eal_get_runtime_dir(), name); return buffer; }
/** String format for hugepage map files. */ #define HUGEFILE_FMT "%s/%smap_%d" static inline const char * eal_get_hugefile_path(char *buffer, size_t buflen, const char *hugedir, int f_id) { snprintf(buffer, buflen, HUGEFILE_FMT, hugedir, eal_get_hugefile_prefix(), f_id); return buffer; }
rte_fbarray_init
struct rte_fbarray { char name[RTE_FBARRAY_NAME_LEN]; /**< name associated with an array */ unsigned int count; /**< number of entries stored */ unsigned int len; /**< current length of the array */ unsigned int elt_sz; /**< size of each element */ void *data; /**< data pointer */ rte_rwlock_t rwlock; /**< multiprocess lock */ };
rte_fbarray_init: ----------------------------主进程 data = eal_get_virtual_area(NULL, &mmap_len, page_sz, 0, 0);
rte_fbarray_attach(struct rte_fbarray *arr) -------------次进程 data = eal_get_virtual_area(arr->data, &mmap_len, page_sz, 0, 0);
rte_fbarray_init data = eal_get_virtual_area(NULL, &mmap_len, page_sz, 0, 0); eal_get_fbarray_path(path, sizeof(path), name); //生成/run/dpdk/rte/fbarray_memseg文件 resize_and_map //调用mmap
resize_and_map(int fd, void *addr, size_t len) { char path[PATH_MAX]; void *map_addr; if (ftruncate(fd, len)) { RTE_LOG(ERR, EAL, "Cannot truncate %s ", path); /* pass errno up the chain */ rte_errno = errno; return -1; } map_addr = mmap(addr, len, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, fd, 0); if (map_addr != addr) { RTE_LOG(ERR, EAL, "mmap() failed: %s ", strerror(errno)); /* pass errno up the chain */ rte_errno = errno; return -1; } return 0; }
struct rte_config { uint32_t master_lcore; /**< Id of the master lcore */ uint32_t lcore_count; /**< Number of available logical cores. */ uint32_t numa_node_count; /**< Number of detected NUMA nodes. */ uint32_t numa_nodes[RTE_MAX_NUMA_NODES]; /**< List of detected NUMA nodes. */ uint32_t service_lcore_count;/**< Number of available service cores. */ enum rte_lcore_role_t lcore_role[RTE_MAX_LCORE]; /**< State of cores. */ /** Primary or secondary configuration */ enum rte_proc_type_t process_type; /** PA or VA mapping mode */ enum rte_iova_mode iova_mode; /** * Pointer to memory configuration, which may be shared across multiple * DPDK instances */ struct rte_mem_config *mem_config; } __attribute__((__packed__)); struct rte_mem_config { volatile uint32_t magic; /**< Magic number - sanity check. */ uint32_t version; /**< Prevent secondary processes using different DPDK versions. */ /* memory topology */ uint32_t nchannel; /**< Number of channels (0 if unknown). */ uint32_t nrank; /**< Number of ranks (0 if unknown). */ /** * current lock nest order * - qlock->mlock (ring/hash/lpm) * - mplock->qlock->mlock (mempool) * Notice: * *ALWAYS* obtain qlock first if having to obtain both qlock and mlock */ rte_rwlock_t mlock; /**< used by memzones for thread safety. */ rte_rwlock_t qlock; /**< used by tailqs for thread safety. */ rte_rwlock_t mplock; /**< used by mempool library for thread safety. */ rte_spinlock_t tlock; /**< used by timer library for thread safety. */ rte_rwlock_t memory_hotplug_lock; /**< Indicates whether memory hotplug request is in progress. */ /* memory segments and zones */ struct rte_fbarray memzones; /**< Memzone descriptors. */ struct rte_memseg_list memsegs[RTE_MAX_MEMSEG_LISTS]; /**< List of dynamic arrays holding memsegs */ struct rte_tailq_head tailq_head[RTE_MAX_TAILQ]; /**< Tailqs for objects */ struct malloc_heap malloc_heaps[RTE_MAX_HEAPS]; /**< DPDK malloc heaps */ int next_socket_id; /**< Next socket ID for external malloc heap */ /* rte_mem_config has to be mapped at the exact same address in all * processes, so we need to store it. */ uint64_t mem_cfg_addr; /**< Address of this structure in memory. */ /* Primary and secondary processes cannot run with different legacy or * single file segments options, so to avoid having to specify these * options to all processes, store them in shared config and update the * internal config at init time. */ uint32_t legacy_mem; /**< stored legacy mem parameter. */ uint32_t single_file_segments; /**< stored single file segments parameter. */ uint64_t tsc_hz; /**< TSC rate */ uint8_t dma_maskbits; /**< Keeps the more restricted dma mask. */ };
DPDK内存的初始化
内存的初始化在rte_eal_init()
中完成,由于DPDK的进程分为primary和secondary,内存的初始化工作只能在primory进程中完成。主要的步骤如下:
eal_hugepage_info_init()
;获取大页的信息,并初始化内部的结构。rte_config_init()
;创建配置文件,并做内存映射。rte_eal_memory_init()
;大页的内存初始化,并连接成连续的内存区。rte_eal_memzone_init()
;初始化memzone子系统。
eal_get_virtual_area mapped_addr = mmap(requested_addr, (size_t)map_sz, PROT_READ, mmap_flags, -1, 0); ---匿名映射,通过mmap申请堆内存
mmap匿名映射
从原型可知,存在一个参数为fd,根据fd,存在一种情况叫匿名映射,所谓匿名映射,表示不存在fd这么个真实的文件。实现匿名映射的方式主要有以下两种:
1、BSD 提供匿名映射的办法是fd =-1,同时 flag 指定为MAP_SHARE|MAP_ANON。
ptr = mmap(NULL,sizeof(int),PROT_READ|PROT_WRITE,
MAP_SHARED|MAP_ANON,-1,0);
2、SVR4 提供匿名映射的办法是 open /dev/zero设备文件,把返回的文件描述符,作为mmap的fd参数。
fd = open("/dev/zero",O_RDWR);
/dev/zero 是一个特殊的文件,当你读它的时候,它会提供无限的空字符(NULL, ASCII NUL, 0x00)
一个作用是用它作为源,产生一个特定大小的空白文件。