zoukankan      html  css  js  c++  java
  • 内存管理阅读

    1、系统中的内存域,使用枚举常量zone_type来表示:

    enum zone_type {
    #ifdef CONFIG_ZONE_DMA
    	/*
    	 * ZONE_DMA is used when there are devices that are not able
    	 * to do DMA to all of addressable memory (ZONE_NORMAL). Then we
    	 * carve out the portion of memory that is needed for these devices.
    	 * The range is arch specific.
    	 *
    	 * Some examples
    	 *
    	 * Architecture		Limit
    	 * ---------------------------
    	 * parisc, ia64, sparc	<4G
    	 * s390			<2G
    	 * arm			Various
    	 * alpha		Unlimited or 0-16MB.
    	 *
    	 * i386, x86_64 and multiple other arches
    	 * 			<16M.
    	 */
    	ZONE_DMA,
    #endif
    #ifdef CONFIG_ZONE_DMA32
    	/*
    	 * x86_64 needs two ZONE_DMAs because it supports devices that are
    	 * only able to do DMA to the lower 16M but also 32 bit devices that
    	 * can only do DMA areas below 4G.
    	 */
    	ZONE_DMA32,
    #endif
    	/*
    	 * Normal addressable memory is in ZONE_NORMAL. DMA operations can be
    	 * performed on pages in ZONE_NORMAL if the DMA devices support
    	 * transfers to all addressable memory.
    	 */
    	ZONE_NORMAL,
    #ifdef CONFIG_HIGHMEM
    	/*
    	 * A memory area that is only addressable by the kernel through
    	 * mapping portions into its own address space. This is for example
    	 * used by i386 to allow the kernel to address the memory beyond
    	 * 900MB. The kernel will set up special mappings (page
    	 * table entries on i386) for each page that the kernel needs to
    	 * access.
    	 */
    	ZONE_HIGHMEM,
    #endif
    	ZONE_MOVABLE,
    	MAX_NR_ZONES
    };

    (1)ZONE_DMA:当一个外设需要用DMA技术,但又不能映射到所有内存区域【ZONE_NORMAL】时,需要用到ZONE_DMA。在I32/x86_64下,一般不会超过16M;

    (2)ZONE_DMA32:在64位机器上,若要支持只能访问4G一下内存的32bit外设,才需要ZONE_DMA32

    (3)ZONE_HIGHMEM:对内核来说,如果物理内存比VM的空间还大时,需要使用高端内存【high memory】;高端内存区域的访问,需要内核借助映射机制,将当前需要用的一部分物理内存映射到VM空间。

    所以:需要一些数据结构进行内存域管理

    2、每个内存域都关联了一个数组,用来组织属于该内存域的物理内存页【页帧】,对于每个页帧,都分配一个struct page实例以及所需的管理数据。所以,需要一些数据结构进行页帧管理,物理页帧对应的数据结构是:struct page。页帧代表系统内存的最小单位。

    3、Linux内存管理充分考虑了UMA和NUMA系统的划分,主要基于以下思想:在UMA系统上,则只用一个NUMA结点来管理整个系统的内存,而内存管理的其他部分则相信它们在管理一个伪NUMA系统。所以,需要一些数据结构进行结点管理

    负责进行结点管理的数据结构为:pg_data_t,定义为:

    /*
     * The pg_data_t structure is used in machines with CONFIG_DISCONTIGMEM
     * (mostly NUMA machines?) to denote a higher-level memory zone than the
     * zone denotes.
     *
     * On NUMA machines, each NUMA node would have a pg_data_t to describe
     * it's memory layout.
     *
     * Memory statistics and page replacement data structures are maintained on a
     * per-zone basis.
     */
    struct bootmem_data;
    typedef struct pglist_data {
    	struct zone node_zones[MAX_NR_ZONES];
    	struct zonelist node_zonelists[MAX_ZONELISTS];
    	int nr_zones;
    #ifdef CONFIG_FLAT_NODE_MEM_MAP
    	struct page *node_mem_map;
    #endif
    	struct bootmem_data *bdata;
    #ifdef CONFIG_MEMORY_HOTPLUG
    	/*
    	 * Must be held any time you expect node_start_pfn, node_present_pages
    	 * or node_spanned_pages stay constant.  Holding this will also
    	 * guarantee that any pfn_valid() stays that way.
    	 *
    	 * Nests above zone->lock and zone->size_seqlock.
    	 */
    	spinlock_t node_size_lock;
    #endif
    	unsigned long node_start_pfn;
    	unsigned long node_present_pages; /* total number of physical pages */
    //node中所有存在的可用的物理页的数量
    	unsigned long node_spanned_pages; /* total size of physical page
    					     range, including holes */
    	//node中所有存在的物理页的总数量,包括空洞
    	int node_id;
    	wait_queue_head_t kswapd_wait;
    	struct task_struct *kswapd;
    	int kswapd_max_order;
    } pg_data_t;



    如果结点的个数多余一个,那么内核会维护各个结点的状态信息,相应的数据结构定义如下(node_set_state和node_clear_state用来处理该位图操作):

    /*
     * Bitmasks that are kept for all the nodes.
     */
    enum node_states {
    	N_POSSIBLE,		/* The node could become online at some point */
    	N_ONLINE,		/* The node is online */
    	N_NORMAL_MEMORY,	/* The node has regular memory */
    #ifdef CONFIG_HIGHMEM
    	N_HIGH_MEMORY,		/* The node has regular or high memory */
    #else
    	N_HIGH_MEMORY = N_NORMAL_MEMORY,
    #endif
    	N_CPU,		/* The node has one or more cpus */
    	NR_NODE_STATES
    };

    4、内存域管理
    struct zone {
    	/* Fields commonly accessed by the page allocator */
    	unsigned long		pages_min, pages_low, pages_high;
    	/*
    	 * We don't know if the memory that we're going to allocate will be freeable
    	 * or/and it will be released eventually, so to avoid totally wasting several
    	 * GB of ram we must reserve some of the lower zone memory (otherwise we risk
    	 * to run OOM on the lower zones despite there's tons of freeable ram
    	 * on the higher zones). This array is recalculated at runtime if the
    	 * sysctl_lowmem_reserve_ratio sysctl changes.
    	 */
    	unsigned long		lowmem_reserve[MAX_NR_ZONES];
    
    #ifdef CONFIG_NUMA
    	int node;
    	/*
    	 * zone reclaim becomes active if more unmapped pages exist.
    	 */
    	unsigned long		min_unmapped_pages;
    	unsigned long		min_slab_pages;
    	struct per_cpu_pageset	*pageset[NR_CPUS];
    #else
    	struct per_cpu_pageset	pageset[NR_CPUS];
    #endif
    	/*
    	 * free areas of different sizes
    	 */
    	spinlock_t		lock;
    #ifdef CONFIG_MEMORY_HOTPLUG
    	/* see spanned/present_pages for more description */
    	seqlock_t		span_seqlock;
    #endif
    	struct free_area	free_area[MAX_ORDER];
    
    #ifndef CONFIG_SPARSEMEM
    	/*
    	 * Flags for a pageblock_nr_pages block. See pageblock-flags.h.
    	 * In SPARSEMEM, this map is stored in struct mem_section
    	 */
    	unsigned long		*pageblock_flags;
    #endif /* CONFIG_SPARSEMEM */
    
    
    	ZONE_PADDING(_pad1_)
    
    	/* Fields commonly accessed by the page reclaim scanner */
    	spinlock_t		lru_lock;	
    	struct list_head	active_list;
    	struct list_head	inactive_list;
    	unsigned long		nr_scan_active;
    	unsigned long		nr_scan_inactive;
    	unsigned long		pages_scanned;	   /* since last reclaim */
    	unsigned long		flags;		   /* zone flags, see below */
    
    	/* Zone statistics */
    	atomic_long_t		vm_stat[NR_VM_ZONE_STAT_ITEMS];
    
    	/*
    	 * prev_priority holds the scanning priority for this zone.  It is
    	 * defined as the scanning priority at which we achieved our reclaim
    	 * target at the previous try_to_free_pages() or balance_pgdat()
    	 * invokation.
    	 *
    	 * We use prev_priority as a measure of how much stress page reclaim is
    	 * under - it drives the swappiness decision: whether to unmap mapped
    	 * pages.
    	 *
    	 * Access to both this field is quite racy even on uniprocessor.  But
    	 * it is expected to average out OK.
    	 */
    	int prev_priority;
    
    
    	ZONE_PADDING(_pad2_)
    	/* Rarely used or read-mostly fields */
    
    	/*
    	 * wait_table		-- the array holding the hash table
    	 * wait_table_hash_nr_entries	-- the size of the hash table array
    	 * wait_table_bits	-- wait_table_size == (1 << wait_table_bits)
    	 *
    	 * The purpose of all these is to keep track of the people
    	 * waiting for a page to become available and make them
    	 * runnable again when possible. The trouble is that this
    	 * consumes a lot of space, especially when so few things
    	 * wait on pages at a given time. So instead of using
    	 * per-page waitqueues, we use a waitqueue hash table.
    	 *
    	 * The bucket discipline is to sleep on the same queue when
    	 * colliding and wake all in that wait queue when removing.
    	 * When something wakes, it must check to be sure its page is
    	 * truly available, a la thundering herd. The cost of a
    	 * collision is great, but given the expected load of the
    	 * table, they should be so rare as to be outweighed by the
    	 * benefits from the saved space.
    	 *
    	 * __wait_on_page_locked() and unlock_page() in mm/filemap.c, are the
    	 * primary users of these fields, and in mm/page_alloc.c
    	 * free_area_init_core() performs the initialization of them.
    	 */
    	wait_queue_head_t	* wait_table;
    	unsigned long		wait_table_hash_nr_entries;
    	unsigned long		wait_table_bits;
    
    	/*
    	 * Discontig memory support fields.
    	 */
    	struct pglist_data	*zone_pgdat;
    	/* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */
    	unsigned long		zone_start_pfn;
    
    	/*
    	 * zone_start_pfn, spanned_pages and present_pages are all
    	 * protected by span_seqlock.  It is a seqlock because it has
    	 * to be read outside of zone->lock, and it is done in the main
    	 * allocator path.  But, it is written quite infrequently.
    	 *
    	 * The lock is declared along with zone->lock because it is
    	 * frequently read in proximity to zone->lock.  It's good to
    	 * give them a chance of being in the same cacheline.
    	 */
    	unsigned long		spanned_pages;	/* total size, including holes */
    	unsigned long		present_pages;	/* amount of memory (excluding holes) */
    
    	/*
    	 * rarely used fields:
    	 */
    	const char		*name;
    } ____cacheline_internodealigned_in_smp;



  • 相关阅读:
    索引
    排序---冒泡排序、快速排序、选择排序、插入排序、希尔排序
    设计模式6大原则
    InputStream的read()读取机制
    Java中的关键字---native
    Java中的关键字---static
    Java中的关键字---transient
    负载均理解
    2020-03-29 微服务网关GateWay
    2020-03-28 微服务网关Zuul
  • 原文地址:https://www.cnblogs.com/javaadu/p/11742687.html
Copyright © 2011-2022 走看看