zoukankan      html  css  js  c++  java
  • Linux-3.14.12内存管理笔记【建立内核页表(3)

    前面已经分析了内核页表的准备工作以及内核低端内存页表的建立,接着回到init_mem_mapping()中,低端内存页表建立后紧随着还有一个函数early_ioremap_page_table_range_init():

    【file:/arch/x86/mm/init.c】
    /*
     * Build a proper pagetable for the kernel mappings. Up until this
     * point, we've been running on some set of pagetables constructed by
     * the boot process.
     *
     * If we're booting on native hardware, this will be a pagetable
     * constructed in arch/x86/kernel/head_32.S. The root of the
     * pagetable will be swapper_pg_dir.
     *
     * If we're booting paravirtualized under a hypervisor, then there are
     * more options: we may already be running PAE, and the pagetable may
     * or may not be based in swapper_pg_dir. In any case,
     * paravirt_pagetable_init() will set up swapper_pg_dir
     * appropriately for the rest of the initialization to work.
     *
     * In general, pagetable_init() assumes that the pagetable may already
     * be partially populated, and so it avoids stomping on any existing
     * mappings.
     */
    void __init early_ioremap_page_table_range_init(void)
    {
        pgd_t *pgd_base = swapper_pg_dir;
        unsigned long vaddr, end;
     
        /*
         * Fixed mappings, only the page table structure has to be
         * created - mappings will be set by set_fixmap():
         */
        vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
        end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK;
        page_table_range_init(vaddr, end, pgd_base);
        early_ioremap_reset();
    }
    

    该函数主要是用于建立固定内存映射区的。固定内存映射区是指FIXADDR_START到FIXADDR_TOP的地址空间,而该地址空间因功能特性不同通过索引来定义区分,其中索引以枚举类型的形式定义在enum fixed_addresses里面。

    【file:/arch/x86/include/asm/fixmap.h】
    /*
     * Here we define all the compile-time 'special' virtual
     * addresses. The point is to have a constant address at
     * compile time, but to set the physical address only
     * in the boot process.
     * for x86_32: We allocate these special addresses
     * from the end of virtual memory (0xfffff000) backwards.
     * Also this lets us do fail-safe vmalloc(), we
     * can guarantee that these special addresses and
     * vmalloc()-ed addresses never overlap.
     *
     * These 'compile-time allocated' memory buffers are
     * fixed-size 4k pages (or larger if used with an increment
     * higher than 1). Use set_fixmap(idx,phys) to associate
     * physical memory with fixmap indices.
     *
     * TLB entries of such buffers will not be flushed across
     * task switches.
     */
    enum fixed_addresses {
    #ifdef CONFIG_X86_32
        FIX_HOLE,
        FIX_VDSO,
    #else
        VSYSCALL_LAST_PAGE,
        VSYSCALL_FIRST_PAGE = VSYSCALL_LAST_PAGE
                    + ((VSYSCALL_END-VSYSCALL_START) >> PAGE_SHIFT) - 1,
        VVAR_PAGE,
        VSYSCALL_HPET,
    #ifdef CONFIG_PARAVIRT_CLOCK
        PVCLOCK_FIXMAP_BEGIN,
        PVCLOCK_FIXMAP_END = PVCLOCK_FIXMAP_BEGIN+PVCLOCK_VSYSCALL_NR_PAGES-1,
    #endif
    #endif
        FIX_DBGP_BASE,
        FIX_EARLYCON_MEM_BASE,
    #ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
        FIX_OHCI1394_BASE,
    #endif
    #ifdef CONFIG_X86_LOCAL_APIC
        FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */
    #endif
    #ifdef CONFIG_X86_IO_APIC
        FIX_IO_APIC_BASE_0,
        FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1,
    #endif
    #ifdef CONFIG_X86_VISWS_APIC
        FIX_CO_CPU, /* Cobalt timer */
        FIX_CO_APIC, /* Cobalt APIC Redirection Table */
        FIX_LI_PCIA, /* Lithium PCI Bridge A */
        FIX_LI_PCIB, /* Lithium PCI Bridge B */
    #endif
        FIX_RO_IDT, /* Virtual mapping for read-only IDT */
    #ifdef CONFIG_X86_32
        FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */
        FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
    #ifdef CONFIG_PCI_MMCONFIG
        FIX_PCIE_MCFG,
    #endif
    #endif
    #ifdef CONFIG_PARAVIRT
        FIX_PARAVIRT_BOOTMAP,
    #endif
        FIX_TEXT_POKE1, /* reserve 2 pages for text_poke() */
        FIX_TEXT_POKE0, /* first page is last, because allocation is backward */
    #ifdef CONFIG_X86_INTEL_MID
        FIX_LNW_VRTC,
    #endif
        __end_of_permanent_fixed_addresses,
     
        /*
         * 256 temporary boot-time mappings, used by early_ioremap(),
         * before ioremap() is functional.
         *
         * If necessary we round it up to the next 256 pages boundary so
         * that we can have a single pgd entry and a single pte table:
         */
    #define NR_FIX_BTMAPS 64
    #define FIX_BTMAPS_SLOTS 4
    #define TOTAL_FIX_BTMAPS (NR_FIX_BTMAPS * FIX_BTMAPS_SLOTS)
        FIX_BTMAP_END =
         (__end_of_permanent_fixed_addresses ^
          (__end_of_permanent_fixed_addresses + TOTAL_FIX_BTMAPS - 1)) &
         -PTRS_PER_PTE
         ? __end_of_permanent_fixed_addresses + TOTAL_FIX_BTMAPS -
           (__end_of_permanent_fixed_addresses & (TOTAL_FIX_BTMAPS - 1))
         : __end_of_permanent_fixed_addresses,
        FIX_BTMAP_BEGIN = FIX_BTMAP_END + TOTAL_FIX_BTMAPS - 1,
    #ifdef CONFIG_X86_32
        FIX_WP_TEST,
    #endif
    #ifdef CONFIG_INTEL_TXT
        FIX_TBOOT_BASE,
    #endif
        __end_of_fixed_addresses
    };
    

    但是各枚举标识的分区并不是从低地址往高地址分布,而是自高地址往低地址分布。其中__fix_to_virt宏定义就是用来通过索引来计算相应的固定映射区域的线性地址。

    #define __fix_to_virt(x)         (FIXADDR_TOP - ((x) << PAGE_SHIFT))
    

    对应的有虚拟地址转索引的宏:

    #define __virt_to_fix(x)         ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT)
    

    接着回到early_ioremap_page_table_range_init()的第一个函数page_table_range_init():

    【file:/arch/x86/mm/init_32.c】
    /*
     * This function initializes a certain range of kernel virtual memory
     * with new bootmem page tables, everywhere page tables are missing in
     * the given range.
     *
     * NOTE: The pagetables are allocated contiguous on the physical space
     * so we can cache the place of the first one and move around without
     * checking the pgd every time.
     */
    static void __init
    page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base)
    {
        int pgd_idx, pmd_idx;
        unsigned long vaddr;
        pgd_t *pgd;
        pmd_t *pmd;
        pte_t *pte = NULL;
        unsigned long count = page_table_range_init_count(start, end);
        void *adr = NULL;
     
        if (count)
            adr = alloc_low_pages(count);
     
        vaddr = start;
        pgd_idx = pgd_index(vaddr);
        pmd_idx = pmd_index(vaddr);
        pgd = pgd_base + pgd_idx;
     
        for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) {
            pmd = one_md_table_init(pgd);
            pmd = pmd + pmd_index(vaddr);
            for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end);
                                pmd++, pmd_idx++) {
                pte = page_table_kmap_check(one_page_table_init(pmd),
                                pmd, vaddr, pte, &adr);
     
                vaddr += PMD_SIZE;
            }
            pmd_idx = 0;
        }
    }
     
    

    page_table_range_init_count()用来计算指临时内核映射区间的页表数量。前面提到FIXADDR_START到FIXADDR_TOP是固定映射区,其间有多个索引标识不同功能的映射区间,其中的一个区间FIX_KMAP_BEGIN到FIX_KMAP_END是临时内核映射区。顺便可以看一下两者的定义:

    FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */
    
    FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
    

    其中KM_TYPE_NR表示“窗口”数量,在高端内存的任意一个页框都可以通过一个“窗口”映射到内核地址空间,调用kmap_atomic可以搭建起“窗口”到高端内存的关系,即建立临时内核映射。而NR_CPUS则表示CPU数量。总的来说就是该临时内核映射区间是为了给各个CPU准备一个指定的窗口空间。由于kmap_atomic()对该区间的使用,所以该区间必须保证其页表连续性。

    如果页全局目录数不为0的时候,紧接着page_table_range_init_count()的是alloc_low_pages():

    【file:/arch/x86/mm/init.c】
    /*
     * Pages returned are already directly mapped.
     *
     * Changing that is likely to break Xen, see commit:
     *
     * 279b706 x86,xen: introduce x86_init.mapping.pagetable_reserve
     *
     * for detailed information.
     */
    __ref void *alloc_low_pages(unsigned int num)
    {
        unsigned long pfn;
        int i;
     
        if (after_bootmem) {
            unsigned int order;
     
            order = get_order((unsigned long)num << PAGE_SHIFT);
            return (void *)__get_free_pages(GFP_ATOMIC | __GFP_NOTRACK |
                            __GFP_ZERO, order);
        }
     
        if ((pgt_buf_end + num) > pgt_buf_top || !can_use_brk_pgt) {
            unsigned long ret;
            if (min_pfn_mapped >= max_pfn_mapped)
                panic("alloc_low_pages: ran out of memory");
            ret = memblock_find_in_range(min_pfn_mapped << PAGE_SHIFT,
                        max_pfn_mapped << PAGE_SHIFT,
                        PAGE_SIZE * num , PAGE_SIZE);
            if (!ret)
                panic("alloc_low_pages: can not alloc memory");
            memblock_reserve(ret, PAGE_SIZE * num);
            pfn = ret >> PAGE_SHIFT;
        } else {
            pfn = pgt_buf_end;
            pgt_buf_end += num;
            printk(KERN_DEBUG "BRK [%#010lx, %#010lx] PGTABLE
    ",
                pfn << PAGE_SHIFT, (pgt_buf_end << PAGE_SHIFT) - 1);
        }
     
        for (i = 0; i < num; i++) {
            void *adr;
     
            adr = __va((pfn + i) << PAGE_SHIFT);
            clear_page(adr);
        }
     
        return __va(pfn << PAGE_SHIFT);
    }
    

    则是根据前面early_alloc_pgt_buf()申请保留的页表缓冲空间使用情况来判断,是从页表缓冲空间中申请还是通过memblock算法申请页表内存。

    回到page_table_range_init(),其中one_md_table_init()是用于当pgd入参为空时,申请新物理页作为页中间目录的,但是此次仅分析x86非PAE环境的情况,不存在页中间目录,故实际上返回的仍是入参。附代码:

    【file:/arch/x86/mm/init_32.c】
    /*
     * Creates a middle page table and puts a pointer to it in the
     * given global directory entry. This only returns the gd entry
     * in non-PAE compilation mode, since the middle layer is folded.
     */
    static pmd_t * __init one_md_table_init(pgd_t *pgd)
    {
        pud_t *pud;
        pmd_t *pmd_table;
     
    #ifdef CONFIG_X86_PAE
        if (!(pgd_val(*pgd) & _PAGE_PRESENT)) {
            pmd_table = (pmd_t *)alloc_low_page();
            paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
            set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
            pud = pud_offset(pgd, 0);
            BUG_ON(pmd_table != pmd_offset(pud, 0));
     
            return pmd_table;
        }
    #endif
        pud = pud_offset(pgd, 0);
        pmd_table = pmd_offset(pud, 0);
     
        return pmd_table;
    }
    

    接着的是page_table_kmap_check(),其入参调用的one_page_table_init()是用于当入参pmd没有页表指向时,创建页表并使其指向被创建的页表。page_table_kmap_check()实现:

    【file:/arch/x86/mm/init_32.c】
    static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd,
                           unsigned long vaddr, pte_t *lastpte,
                           void **adr)
    {
    #ifdef CONFIG_HIGHMEM
        /*
         * Something (early fixmap) may already have put a pte
         * page here, which causes the page table allocation
         * to become nonlinear. Attempt to fix it, and if it
         * is still nonlinear then we have to bug.
         */
        int pmd_idx_kmap_begin = fix_to_virt(FIX_KMAP_END) >> PMD_SHIFT;
        int pmd_idx_kmap_end = fix_to_virt(FIX_KMAP_BEGIN) >> PMD_SHIFT;
     
        if (pmd_idx_kmap_begin != pmd_idx_kmap_end
            && (vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin
            && (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end) {
            pte_t *newpte;
            int i;
     
            BUG_ON(after_bootmem);
            newpte = *adr;
            for (i = 0; i < PTRS_PER_PTE; i++)
                set_pte(newpte + i, pte[i]);
            *adr = (void *)(((unsigned long)(*adr)) + PAGE_SIZE);
     
            paravirt_alloc_pte(&init_mm, __pa(newpte) >> PAGE_SHIFT);
            set_pmd(pmd, __pmd(__pa(newpte)|_PAGE_TABLE));
            BUG_ON(newpte != pte_offset_kernel(pmd, 0));
            __flush_tlb_all();
     
            paravirt_release_pte(__pa(pte) >> PAGE_SHIFT);
            pte = newpte;
        }
        BUG_ON(vaddr < fix_to_virt(FIX_KMAP_BEGIN - 1)
               && vaddr > fix_to_virt(FIX_KMAP_END)
               && lastpte && lastpte + PTRS_PER_PTE != pte);
    #endif
        return pte;
    }
    

    可以看到这里在此出现临时内核映射区间的标识(FIX_KMAP_END和FIX_KMAP_BEGIN),检查当前页表初始化的地址是否处于该区间范围,如果是,则把其pte页表的内容拷贝到page_table_range_init()申请的页表空间中,并将newpte新页表的地址设置到pmd中(32bit系统实际上就是页全局目录),然后调用__flush_tlb_all()刷新TLB缓存;如果不是该区间,则仅是由入参中调用的one_page_table_init()被分配到了页表空间。

    由此,可以知道page_table_range_init()主要是做了什么了。这是由于kmap_atomic()对该区间的使用,该区间必须保证其页表连续性。为了避免前期可能对固定映射区已经分配了页表项,基于临时内核映射区间要求页表连续性的保证,所以在此重新申请连续的页表空间将原页表内容拷贝至此。值得注意的是,与低端内存的页表初始化不同的是,这里的页表只是被分配,相应的PTE项并未初始化,这个工作将会交由以后各个固定映射区部分的相关代码调用set_fixmap()来将相关的固定映射区页表与物理内存关联。

    early_ioremap_page_table_range_init()函数再往下的early_ioremap_reset()仅是对after_paging_init全局变量赋值。

    最后退出early_ioremap_page_table_range_init()后,init_mem_mapping()调用load_cr3()刷新CR3寄存器,__flush_tlb_all()则用于刷新TLB,由此启用新的内存分页映射。

    至此,内核页表建立完毕。

  • 相关阅读:
    Python 递归函数详解
    CentOS7 删除virbr0虚拟网卡
    /usr/bin/docker-current: Error response from daemon: oci runtime error: container_linux.go:247: starting container process caused "process_linux.go:245: running exec setns .....
    Linux系统添加永久静态路由的方法(包含Centos7)
    正则表达式
    Unity3D -- shader语法内置函数
    Unity3D -- shader光照常用函数和变量
    Unity3D -- shader常用函数和变量
    Unity 着色器训练营(2)
    Unity Shader着色器优化
  • 原文地址:https://www.cnblogs.com/linhaostudy/p/11621647.html
Copyright © 2011-2022 走看看