zoukankan      html  css  js  c++  java
  • 内存管理阅读

    1、系统中的内存域,使用枚举常量zone_type来表示:

    enum zone_type {
    #ifdef CONFIG_ZONE_DMA
    	/*
    	 * ZONE_DMA is used when there are devices that are not able
    	 * to do DMA to all of addressable memory (ZONE_NORMAL). Then we
    	 * carve out the portion of memory that is needed for these devices.
    	 * The range is arch specific.
    	 *
    	 * Some examples
    	 *
    	 * Architecture		Limit
    	 * ---------------------------
    	 * parisc, ia64, sparc	<4G
    	 * s390			<2G
    	 * arm			Various
    	 * alpha		Unlimited or 0-16MB.
    	 *
    	 * i386, x86_64 and multiple other arches
    	 * 			<16M.
    	 */
    	ZONE_DMA,
    #endif
    #ifdef CONFIG_ZONE_DMA32
    	/*
    	 * x86_64 needs two ZONE_DMAs because it supports devices that are
    	 * only able to do DMA to the lower 16M but also 32 bit devices that
    	 * can only do DMA areas below 4G.
    	 */
    	ZONE_DMA32,
    #endif
    	/*
    	 * Normal addressable memory is in ZONE_NORMAL. DMA operations can be
    	 * performed on pages in ZONE_NORMAL if the DMA devices support
    	 * transfers to all addressable memory.
    	 */
    	ZONE_NORMAL,
    #ifdef CONFIG_HIGHMEM
    	/*
    	 * A memory area that is only addressable by the kernel through
    	 * mapping portions into its own address space. This is for example
    	 * used by i386 to allow the kernel to address the memory beyond
    	 * 900MB. The kernel will set up special mappings (page
    	 * table entries on i386) for each page that the kernel needs to
    	 * access.
    	 */
    	ZONE_HIGHMEM,
    #endif
    	ZONE_MOVABLE,
    	MAX_NR_ZONES
    };

    (1)ZONE_DMA:当一个外设需要用DMA技术,但又不能映射到所有内存区域【ZONE_NORMAL】时,需要用到ZONE_DMA。在I32/x86_64下,一般不会超过16M;

    (2)ZONE_DMA32:在64位机器上,若要支持只能访问4G一下内存的32bit外设,才需要ZONE_DMA32

    (3)ZONE_HIGHMEM:对内核来说,如果物理内存比VM的空间还大时,需要使用高端内存【high memory】;高端内存区域的访问,需要内核借助映射机制,将当前需要用的一部分物理内存映射到VM空间。

    所以:需要一些数据结构进行内存域管理

    2、每个内存域都关联了一个数组,用来组织属于该内存域的物理内存页【页帧】,对于每个页帧,都分配一个struct page实例以及所需的管理数据。所以,需要一些数据结构进行页帧管理,物理页帧对应的数据结构是:struct page。页帧代表系统内存的最小单位。

    3、Linux内存管理充分考虑了UMA和NUMA系统的划分,主要基于以下思想:在UMA系统上,则只用一个NUMA结点来管理整个系统的内存,而内存管理的其他部分则相信它们在管理一个伪NUMA系统。所以,需要一些数据结构进行结点管理

    负责进行结点管理的数据结构为:pg_data_t,定义为:

    /*
     * The pg_data_t structure is used in machines with CONFIG_DISCONTIGMEM
     * (mostly NUMA machines?) to denote a higher-level memory zone than the
     * zone denotes.
     *
     * On NUMA machines, each NUMA node would have a pg_data_t to describe
     * it's memory layout.
     *
     * Memory statistics and page replacement data structures are maintained on a
     * per-zone basis.
     */
    struct bootmem_data;
    typedef struct pglist_data {
    	struct zone node_zones[MAX_NR_ZONES];
    	struct zonelist node_zonelists[MAX_ZONELISTS];
    	int nr_zones;
    #ifdef CONFIG_FLAT_NODE_MEM_MAP
    	struct page *node_mem_map;
    #endif
    	struct bootmem_data *bdata;
    #ifdef CONFIG_MEMORY_HOTPLUG
    	/*
    	 * Must be held any time you expect node_start_pfn, node_present_pages
    	 * or node_spanned_pages stay constant.  Holding this will also
    	 * guarantee that any pfn_valid() stays that way.
    	 *
    	 * Nests above zone->lock and zone->size_seqlock.
    	 */
    	spinlock_t node_size_lock;
    #endif
    	unsigned long node_start_pfn;
    	unsigned long node_present_pages; /* total number of physical pages */
    //node中所有存在的可用的物理页的数量
    	unsigned long node_spanned_pages; /* total size of physical page
    					     range, including holes */
    	//node中所有存在的物理页的总数量,包括空洞
    	int node_id;
    	wait_queue_head_t kswapd_wait;
    	struct task_struct *kswapd;
    	int kswapd_max_order;
    } pg_data_t;



    如果结点的个数多余一个,那么内核会维护各个结点的状态信息,相应的数据结构定义如下(node_set_state和node_clear_state用来处理该位图操作):

    /*
     * Bitmasks that are kept for all the nodes.
     */
    enum node_states {
    	N_POSSIBLE,		/* The node could become online at some point */
    	N_ONLINE,		/* The node is online */
    	N_NORMAL_MEMORY,	/* The node has regular memory */
    #ifdef CONFIG_HIGHMEM
    	N_HIGH_MEMORY,		/* The node has regular or high memory */
    #else
    	N_HIGH_MEMORY = N_NORMAL_MEMORY,
    #endif
    	N_CPU,		/* The node has one or more cpus */
    	NR_NODE_STATES
    };

    4、内存域管理
    struct zone {
    	/* Fields commonly accessed by the page allocator */
    	unsigned long		pages_min, pages_low, pages_high;
    	/*
    	 * We don't know if the memory that we're going to allocate will be freeable
    	 * or/and it will be released eventually, so to avoid totally wasting several
    	 * GB of ram we must reserve some of the lower zone memory (otherwise we risk
    	 * to run OOM on the lower zones despite there's tons of freeable ram
    	 * on the higher zones). This array is recalculated at runtime if the
    	 * sysctl_lowmem_reserve_ratio sysctl changes.
    	 */
    	unsigned long		lowmem_reserve[MAX_NR_ZONES];
    
    #ifdef CONFIG_NUMA
    	int node;
    	/*
    	 * zone reclaim becomes active if more unmapped pages exist.
    	 */
    	unsigned long		min_unmapped_pages;
    	unsigned long		min_slab_pages;
    	struct per_cpu_pageset	*pageset[NR_CPUS];
    #else
    	struct per_cpu_pageset	pageset[NR_CPUS];
    #endif
    	/*
    	 * free areas of different sizes
    	 */
    	spinlock_t		lock;
    #ifdef CONFIG_MEMORY_HOTPLUG
    	/* see spanned/present_pages for more description */
    	seqlock_t		span_seqlock;
    #endif
    	struct free_area	free_area[MAX_ORDER];
    
    #ifndef CONFIG_SPARSEMEM
    	/*
    	 * Flags for a pageblock_nr_pages block. See pageblock-flags.h.
    	 * In SPARSEMEM, this map is stored in struct mem_section
    	 */
    	unsigned long		*pageblock_flags;
    #endif /* CONFIG_SPARSEMEM */
    
    
    	ZONE_PADDING(_pad1_)
    
    	/* Fields commonly accessed by the page reclaim scanner */
    	spinlock_t		lru_lock;	
    	struct list_head	active_list;
    	struct list_head	inactive_list;
    	unsigned long		nr_scan_active;
    	unsigned long		nr_scan_inactive;
    	unsigned long		pages_scanned;	   /* since last reclaim */
    	unsigned long		flags;		   /* zone flags, see below */
    
    	/* Zone statistics */
    	atomic_long_t		vm_stat[NR_VM_ZONE_STAT_ITEMS];
    
    	/*
    	 * prev_priority holds the scanning priority for this zone.  It is
    	 * defined as the scanning priority at which we achieved our reclaim
    	 * target at the previous try_to_free_pages() or balance_pgdat()
    	 * invokation.
    	 *
    	 * We use prev_priority as a measure of how much stress page reclaim is
    	 * under - it drives the swappiness decision: whether to unmap mapped
    	 * pages.
    	 *
    	 * Access to both this field is quite racy even on uniprocessor.  But
    	 * it is expected to average out OK.
    	 */
    	int prev_priority;
    
    
    	ZONE_PADDING(_pad2_)
    	/* Rarely used or read-mostly fields */
    
    	/*
    	 * wait_table		-- the array holding the hash table
    	 * wait_table_hash_nr_entries	-- the size of the hash table array
    	 * wait_table_bits	-- wait_table_size == (1 << wait_table_bits)
    	 *
    	 * The purpose of all these is to keep track of the people
    	 * waiting for a page to become available and make them
    	 * runnable again when possible. The trouble is that this
    	 * consumes a lot of space, especially when so few things
    	 * wait on pages at a given time. So instead of using
    	 * per-page waitqueues, we use a waitqueue hash table.
    	 *
    	 * The bucket discipline is to sleep on the same queue when
    	 * colliding and wake all in that wait queue when removing.
    	 * When something wakes, it must check to be sure its page is
    	 * truly available, a la thundering herd. The cost of a
    	 * collision is great, but given the expected load of the
    	 * table, they should be so rare as to be outweighed by the
    	 * benefits from the saved space.
    	 *
    	 * __wait_on_page_locked() and unlock_page() in mm/filemap.c, are the
    	 * primary users of these fields, and in mm/page_alloc.c
    	 * free_area_init_core() performs the initialization of them.
    	 */
    	wait_queue_head_t	* wait_table;
    	unsigned long		wait_table_hash_nr_entries;
    	unsigned long		wait_table_bits;
    
    	/*
    	 * Discontig memory support fields.
    	 */
    	struct pglist_data	*zone_pgdat;
    	/* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */
    	unsigned long		zone_start_pfn;
    
    	/*
    	 * zone_start_pfn, spanned_pages and present_pages are all
    	 * protected by span_seqlock.  It is a seqlock because it has
    	 * to be read outside of zone->lock, and it is done in the main
    	 * allocator path.  But, it is written quite infrequently.
    	 *
    	 * The lock is declared along with zone->lock because it is
    	 * frequently read in proximity to zone->lock.  It's good to
    	 * give them a chance of being in the same cacheline.
    	 */
    	unsigned long		spanned_pages;	/* total size, including holes */
    	unsigned long		present_pages;	/* amount of memory (excluding holes) */
    
    	/*
    	 * rarely used fields:
    	 */
    	const char		*name;
    } ____cacheline_internodealigned_in_smp;



  • 相关阅读:
    2019.6.20刷题统计
    36 线程 队列 守护线程 互斥锁 死锁 可重入锁 信号量
    35 守护进程 互斥锁 IPC 共享内存 的方式 生产者消费者模型
    34 进程 pid ppid 并发与并行,阻塞与非阻塞 join函数 process对象 孤儿进程与僵尸进程
    33 udp 域名 进程
    32 粘包 文件传输
    31 socket客户端. 服务器 异常 语法
    30 网络编程
    29 元类 异常
    26 封装 反射 常用内置函数
  • 原文地址:https://www.cnblogs.com/javaadu/p/11742687.html
Copyright © 2011-2022 走看看