在内存系统初始化过程中,有如下代码:
1: static void __init pagetable_init(void)
2: { 3: pgd_t *pgd_base = swapper_pg_dir; 4: 5: permanent_kmaps_init(pgd_base); 6: }这里,我们看到了神秘的swapper_pg_dir,全局搜索一下,发现了
1: /*
2: * Build a proper pagetable for the kernel mappings. Up until this
3: * point, we've been running on some set of pagetables constructed by
4: * the boot process.
5: *
6: * If we're booting on native hardware, this will be a pagetable
7: * constructed in arch/x86/kernel/head_32.S. The root of the
8: * pagetable will be swapper_pg_dir.
9: *
10: * If we're booting paravirtualized under a hypervisor, then there are
11: * more options: we may already be running PAE, and the pagetable may
12: * or may not be based in swapper_pg_dir. In any case,
13: * paravirt_pagetable_setup_start() will set up swapper_pg_dir
14: * appropriately for the rest of the initialization to work.
15: *
16: * In general, pagetable_init() assumes that the pagetable may already
17: * be partially populated, and so it avoids stomping on any existing
18: * mappings.
19: */
20: void __init early_ioremap_page_table_range_init(void)
21: { 22: pgd_t *pgd_base = swapper_pg_dir;23: unsigned long vaddr, end;
24: 25: /*
26: * Fixed mappings, only the page table structure has to be
27: * created - mappings will be set by set_fixmap():
28: */
29: vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; 30: end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK; 31: page_table_range_init(vaddr, end, pgd_base); 32: early_ioremap_reset(); 33: }
在head_32.S中,定义了如下的BSS段,BSS段是在内核映像文件中不占空间,但是在内核被加载到内存时,会保留相应的空间。
在BSS段,一共保留了4个页面的空间,分别用initial_page_table, initial_pg_fixmap, empty_zero_page和swapper_pg_dir来标志其地址。
1: /*
2: * BSS section
3: */
4: __PAGE_ALIGNED_BSS 5: .align PAGE_SIZE6: #ifdef CONFIG_X86_PAE
7: initial_pg_pmd: 8: .fill 1024*KPMDS,4,09: #else
10: ENTRY(initial_page_table) 11: .fill 1024,4,012: #endif
13: initial_pg_fixmap: 14: .fill 1024,4,0 15: ENTRY(empty_zero_page) 16: .fill 4096,1,0 17: ENTRY(swapper_pg_dir) 18: .fill 1024,4,0通过如下代码,将initial_page_table设置为初始页目录
1: /*
2: * Enable paging
3: */
4: movl $pa(initial_page_table), %eax5: movl %eax,%cr3 /* set the page table pointer.. */
6: movl %cr0,%eax 7: orl $X86_CR0_PG,%eax8: movl %eax,%cr0 /* ..and set paging (PG) bit */
9: ljmp $__BOOT_CS,$1f /* Clear prefetch and normalize %eip */
在内核初始化阶段,setup_arch调用了如下的函数:
1: void __init setup_arch(char **cmdline_p)
2: { 3: 4: ......5: /* max_pfn_mapped is updated here */
6: max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT); 7: max_pfn_mapped = max_low_pfn_mapped; 8: ...... 9: x86_init.paging.pagetable_setup_start(swapper_pg_dir); 10: paging_init(); 11: x86_init.paging.pagetable_setup_done(swapper_pg_dir); 12: 13: ...... 14: }init_memory_mapping调用了kernel_physical_mapping_init,初始化swapper_pg_dir
1: /*
2: * This maps the physical memory to kernel virtual address space, a total
3: * of max_low_pfn pages, by creating page tables starting from address
4: * PAGE_OFFSET:
5: */
6: unsigned long __init
7: kernel_physical_mapping_init(unsigned long start,
8: unsigned long end,
9: unsigned long page_size_mask)
10: {11: int use_pse = page_size_mask == (1<<PG_LEVEL_2M);
12: unsigned long last_map_addr = end;
13: unsigned long start_pfn, end_pfn;
14: pgd_t *pgd_base = swapper_pg_dir;15: int pgd_idx, pmd_idx, pte_ofs;
16: unsigned long pfn;
17: pgd_t *pgd; 18: pmd_t *pmd; 19: pte_t *pte;20: unsigned pages_2m, pages_4k;
21: int mapping_iter;
22: 23: start_pfn = start >> PAGE_SHIFT; 24: end_pfn = end >> PAGE_SHIFT; 25: 26: /*
27: * First iteration will setup identity mapping using large/small pages
28: * based on use_pse, with other attributes same as set by
29: * the early code in head_32.S
30: *
31: * Second iteration will setup the appropriate attributes (NX, GLOBAL..)
32: * as desired for the kernel identity mapping.
33: *
34: * This two pass mechanism conforms to the TLB app note which says:
35: *
36: * "Software should not write to a paging-structure entry in a way
37: * that would change, for any linear address, both the page size
38: * and either the page frame or attributes."
39: */
40: mapping_iter = 1; 41: 42: if (!cpu_has_pse)
43: use_pse = 0; 44: 45: at: 46: pages_2m = pages_4k = 0; 47: pfn = start_pfn; 48: pgd_idx = pgd_index((pfn<<PAGE_SHIFT) + PAGE_OFFSET); 49: pgd = pgd_base + pgd_idx;50: for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) {
51: pmd = one_md_table_init(pgd); 52: 53: if (pfn >= end_pfn)
54: continue;
55: ef CONFIG_X86_PAE 56: pmd_idx = pmd_index((pfn<<PAGE_SHIFT) + PAGE_OFFSET); 57: pmd += pmd_idx; 58: e 59: pmd_idx = 0;60: if
61: for (; pmd_idx < PTRS_PER_PMD && pfn < end_pfn;
62: pmd++, pmd_idx++) {63: unsigned int addr = pfn * PAGE_SIZE + PAGE_OFFSET;
64: 65: /*
66: * Map with big pages if possible, otherwise
67: * create normal page tables:
68: */
69: if (use_pse) {
70: unsigned int addr2;
71: pgprot_t prot = PAGE_KERNEL_LARGE;72: /*
73: * first pass will use the same initial
74: * identity mapping attribute + _PAGE_PSE.
75: */
76: pgprot_t init_prot = 77: __pgprot(PTE_IDENT_ATTR | 78: _PAGE_PSE); 79: 80: addr2 = (pfn + PTRS_PER_PTE-1) * PAGE_SIZE + 81: PAGE_OFFSET + PAGE_SIZE-1; 82: 83: if (is_kernel_text(addr) ||
84: is_kernel_text(addr2)) 85: prot = PAGE_KERNEL_LARGE_EXEC; 86: 87: pages_2m++;88: if (mapping_iter == 1)
89: set_pmd(pmd, pfn_pmd(pfn, init_prot));90: else
91: set_pmd(pmd, pfn_pmd(pfn, prot)); 92: 93: pfn += PTRS_PER_PTE;94: continue;
95: } 96: pte = one_page_table_init(pmd); 97: 98: pte_ofs = pte_index((pfn<<PAGE_SHIFT) + PAGE_OFFSET); 99: pte += pte_ofs;100: for (; pte_ofs < PTRS_PER_PTE && pfn < end_pfn;
101: pte++, pfn++, pte_ofs++, addr += PAGE_SIZE) { 102: pgprot_t prot = PAGE_KERNEL;103: /*
104: * first pass will use the same initial
105: * identity mapping attribute.
106: */
107: pgprot_t init_prot = __pgprot(PTE_IDENT_ATTR); 108: 109: if (is_kernel_text(addr))
110: prot = PAGE_KERNEL_EXEC; 111: 112: pages_4k++;113: if (mapping_iter == 1) {
114: set_pte(pte, pfn_pte(pfn, init_prot)); 115: last_map_addr = (pfn << PAGE_SHIFT) + PAGE_SIZE;116: } else
117: set_pte(pte, pfn_pte(pfn, prot)); 118: } 119: } 120: }121: if (mapping_iter == 1) {
122: /*
123: * update direct mapping page count only in the first
124: * iteration.
125: */
126: update_page_count(PG_LEVEL_2M, pages_2m); 127: update_page_count(PG_LEVEL_4K, pages_4k); 128: 129: /*
130: * local global flush tlb, which will flush the previous
131: * mappings present in both small and large page TLB's.
132: */
133: __flush_tlb_all(); 134: 135: /*
136: * Second iteration will set the actual desired PTE attributes.
137: */
138: mapping_iter = 2;139: goto repeat;
140: }141: return last_map_addr;
142: }pgd_t *pgd_base = swapper_pg_dir;
将swapper_pg_dir作为页目录地址,赋给pgd_base
start_pfn = start >> PAGE_SHIFT;
end_pfn = end >> PAGE_SHIFT;
start和end代表着,内核直接映射的虚拟地址区域的开始物理地址和结束物理地址,通过右移PAGE_SHIFT位,再加上PAGE_OFFSET,得到其对应的页表项索引。
pgd_idx = pgd_index((pfn<<PAGE_SHIFT) + PAGE_OFFSET);
pgd = pgd_base + pgd_idx;
pgd_idx,pgd代表着在页目录中的索引,以及相应的页目录项
1: pgprot_t prot = PAGE_KERNEL;2: /*
3: * first pass will use the same initial
4: * identity mapping attribute.
5: */
6: pgprot_t init_prot = __pgprot(PTE_IDENT_ATTR); 7: 8: if (is_kernel_text(addr))
9: prot = PAGE_KERNEL_EXEC; 10: 11: pages_4k++;12: if (mapping_iter == 1) {
13: set_pte(pte, pfn_pte(pfn, init_prot)); 14: last_map_addr = (pfn << PAGE_SHIFT) + PAGE_SIZE;15: } else
16: set_pte(pte, pfn_pte(pfn, prot));最后,通过两个回合的遍历,将属性设置到对应的页表项上去。