zoukankan      html  css  js  c++  java
  • linux 内核启动流程

    本文以Linux3.14版本源码为例分析其启动流程。各版本启动代码略有不同,但核心流程与思想万变不离其宗。

    内核映像被加载到内存并获得控制权之后,内核启动流程开始。通常,内核映像以压缩形式存储,并不是一个可以执行的内核。因此,内核阶段的首要工作是自解压内核映像。

    内核编译生成 vmlinux 后,通常会对其进行压缩,得到 zImage(小内核,小于512KB)或 bzImage(大内核,大于512KB)。在它们的头部嵌有解压缩程序。

    通过 linux/arch/arm/boot/compressed 目录下的 Makefile 寻找到 vmlinux文件的链接脚本(vmlinux.lds),从中查找系统启动入口函数。

    $(obj)/vmlinux: $(obj)/vmlinux.lds $(obj)/$(HEAD) $(obj)/piggy.$(suffix_y).o 
            $(addprefix $(obj)/, $(OBJS)) $(lib1funcs) $(ashldi3) 
            $(bswapsdi2) FORCE
        @$(check_for_multiple_zreladdr)
        $(call if_changed,ld)
        @$(check_for_bad_syms)

    vmlinux.lds(linux/arch/arm/kernel/vmlinux.lds)链接脚本开头内容

    OUTPUT_ARCH(arm)
    ENTRY(stext)
    jiffies = jiffies_64;
    SECTIONS
    {
        。
        。
        。

    得到内核入口函数为 stext(linux/arch/arm/kernel/head.S)

    内核引导阶段

    ENTRY(stext)
        。
        。
        。
        bl    __lookup_processor_type    @ r5=procinfo r9=cpuid                             //处理器是否支持
        movs    r10, r5                @ invalid processor (r5=0)?
     THUMB( it    eq )        @ force fixup-able long branch encoding
        beq    __error_p            @ yes, error 'p'                           //不支持则打印错误信息
     
              。
        。
        。
        bl    __create_page_tables                                                       //创建页表
     
        /*
         * The following calls CPU specific code in a position independent
         * manner.  See arch/arm/mm/proc-*.S for details.  r10 = base of
         * xxx_proc_info structure selected by __lookup_processor_type
         * above.  On return, the CPU will be ready for the MMU to be
         * turned on, and r0 will hold the CPU control register value.
         */
        ldr    r13, =__mmap_switched        @ address to jump to after                 //保存MMU使能后跳转地址
                            @ mmu has been enabled
        adr    lr, BSYM(1f)            @ return (PIC) address
        mov    r8, r4                @ set TTBR1 to swapper_pg_dir
     ARM(    add    pc, r10, #PROCINFO_INITFUNC    )
     THUMB(    add    r12, r10, #PROCINFO_INITFUNC    )
     THUMB(    mov    pc, r12                )
    1:    b    __enable_mmu                                                                           //使能MMU后跳转到__mmap_switched

     查找标签__mmap_switched所在位置:/linux/arch/arm/kernel/head-common.S

    __mmap_switched:
        /*
         * The following fragment of code is executed with the MMU on in MMU mode,
         * and uses absolute addresses; this is not position independent.
         *
         *  r0  = cp#15 control register
         *  r1  = machine ID
         *  r2  = atags/dtb pointer
         *  r9  = processor ID
         */
        //保存设备信息、设备树及启动参数存储地址
        。
        。
        。
        b    start_kernel

    内核初始化阶段

    从start_kernel函数开始,内核进入C语言部分,完成内核的大部分初始化工作。

    函数所在位置:/linux/init/Main.c

    start_kernel涉及大量初始化工作,只例举重要的初始化工作。

    asmlinkage void __init start_kernel(void)
    {
        ……                                                                              //类型判断
        smp_setup_processor_id();                                                         //smp相关,返回启动CPU号
        ……
        local_irq_disable();                                                                   //关闭当前CPU中断
        early_boot_irqs_disabled = true;
    /*
     * Interrupts are still disabled. Do necessary setups, then
     * enable them
     */
        boot_cpu_init();
        page_address_init();                                                            //初始化页地址
        pr_notice("%s", linux_banner);                                                   //显示内核版本信息
        setup_arch(&command_line);
        mm_init_owner(&init_mm, &init_task);
        mm_init_cpumask(&init_mm);
        setup_command_line(command_line);
        setup_nr_cpu_ids();
        setup_per_cpu_areas();
        smp_prepare_boot_cpu();    /* arch-specific boot-cpu hooks */
     
        build_all_zonelists(NULL, NULL);
        page_alloc_init();                                                                            //页内存申请初始化
     
        pr_notice("Kernel command line: %s
    ", boot_command_line);                                     //打印内核启动命令行参数
        parse_early_param();
        parse_args("Booting kernel", static_command_line, __start___param,
               __stop___param - __start___param,
               -1, -1, &unknown_bootoption);
     
        ……
        /*
         * Set up the scheduler prior starting any interrupts (such as the
         * timer interrupt). Full topology setup happens at smp_init()
         * time - but meanwhile we still have a functioning scheduler.
         */
        sched_init();                                                                                    //进程调度器初始化
        /*
         * Disable preemption - early bootup scheduling is extremely
         * fragile until we cpu_idle() for the first time.
         */
        preempt_disable();                                                                                    //禁止内核抢占
        if (WARN(!irqs_disabled(), "Interrupts were enabled *very* early, fixing it
    "))
            local_irq_disable();                                                                      //检查关闭CPU中断
        
        
              /*大量初始化内容 见名知意*/
        idr_init_cache();
        rcu_init();
        tick_nohz_init();
        context_tracking_init();
        radix_tree_init();
        /* init some links before init_ISA_irqs() */
        early_irq_init();
        init_IRQ();
        tick_init();
        init_timers();
        hrtimers_init();
        softirq_init();
        timekeeping_init();
        time_init();
        sched_clock_postinit();
        perf_event_init();
        profile_init();
        call_function_init();
        WARN(!irqs_disabled(), "Interrupts were enabled early
    ");
        early_boot_irqs_disabled = false;
        local_irq_enable();                                                                            //本地中断可以使用了
     
        kmem_cache_init_late();
     
        /*
         * HACK ALERT! This is early. We're enabling the console before
         * we've done PCI setups etc, and console_init() must be aware of
         * this. But we do want output early, in case something goes wrong.
         */
        console_init();                                                                            //初始化控制台,可以使用printk了
        if (panic_later)
            panic("Too many boot %s vars at `%s'", panic_later,
                  panic_param);
     
        lockdep_info();
     
        /*
         * Need to run this when irqs are enabled, because it wants
         * to self-test [hard/soft]-irqs on/off lock inversion bugs
         * too:
         */
        locking_selftest();
     
    #ifdef CONFIG_BLK_DEV_INITRD
        if (initrd_start && !initrd_below_start_ok &&
            page_to_pfn(virt_to_page((void *)initrd_start)) < min_low_pfn) {
            pr_crit("initrd overwritten (0x%08lx < 0x%08lx) - disabling it.
    ",
                page_to_pfn(virt_to_page((void *)initrd_start)),
                min_low_pfn);
            initrd_start = 0;
        }
    #endif
        page_cgroup_init();
        debug_objects_mem_init();
        kmemleak_init();
        setup_per_cpu_pageset();
        numa_policy_init();
        if (late_time_init)
            late_time_init();
        sched_clock_init();
        calibrate_delay();
        pidmap_init();
        anon_vma_init();
        acpi_early_init();
    #ifdef CONFIG_X86
        if (efi_enabled(EFI_RUNTIME_SERVICES))
            efi_enter_virtual_mode();
    #endif
    #ifdef CONFIG_X86_ESPFIX64
        /* Should be run before the first non-init thread is created */
        init_espfix_bsp();
    #endif
        thread_info_cache_init();
        cred_init();
        fork_init(totalram_pages);                                                             //初始化fork
        proc_caches_init();
        buffer_init();
        key_init();
        security_init();
        dbg_late_init();
        vfs_caches_init(totalram_pages);                                                      //虚拟文件系统初始化
        signals_init();
        /* rootfs populating might need page-writeback */
        page_writeback_init();
    #ifdef CONFIG_PROC_FS
        proc_root_init();
    #endif
        cgroup_init();
        cpuset_init();
        taskstats_init_early();
        delayacct_init();
     
        check_bugs();
     
        sfi_init_late();
     
        if (efi_enabled(EFI_RUNTIME_SERVICES)) {
            efi_late_init();
            efi_free_boot_services();
        }
     
        ftrace_init();
     
        /* Do the rest non-__init'ed, we're now alive */
        rest_init();
    }

     函数最后调用rest_init()函数

    /*最重要使命:创建kernel_init进程,并进行后续初始化*/
    static noinline void __init_refok rest_init(void)
    {
        int pid;
     
        rcu_scheduler_starting();
        /*
         * We need to spawn init first so that it obtains pid 1, however
         * the init task will end up wanting to create kthreads, which, if
         * we schedule it before we create kthreadd, will OOPS.
         */
        
        kernel_thread(kernel_init, NULL, CLONE_FS | CLONE_SIGHAND);                             //创建kernel_init进程
        
        numa_default_policy();
        pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES);
        rcu_read_lock();
        kthreadd_task = find_task_by_pid_ns(pid, &init_pid_ns);
        rcu_read_unlock();
        complete(&kthreadd_done);
     
        /*
         * The boot idle thread must execute schedule()
         * at least once to get things moving:
         */
        init_idle_bootup_task(current);
        schedule_preempt_disabled();
        /* Call into cpu_idle with preempt disabled */
        //cpu_idle就是在系统闲置时用来降低电力的使用和减少热的产生的空转函数,函数至此不再返回,其余工作从kernel_init进程处发起
        cpu_startup_entry(CPUHP_ONLINE);
    }

    kernel_init函数将完成设备驱动程序的初始化,并调用init_post函数启动用户进程

    部分书籍介绍的内核启动流程基于经典的2.6版本,kernel_init函数还会调用init_post函数专门负责_init进程的启动,现版本已经被整合到了一起。

    static int __ref kernel_init(void *unused)
    {
        int ret;
     
        kernel_init_freeable();                 //该函数中完成smp开启  驱动初始化 共享内存初始化等工作
        /* need to finish all async __init code before freeing the memory */
        async_synchronize_full();
        free_initmem();                         //初始化尾声,清除内存无用数据
        mark_rodata_ro();
        system_state = SYSTEM_RUNNING;
        numa_default_policy();
     
        flush_delayed_fput();
     
        if (ramdisk_execute_command) {
            ret = run_init_process(ramdisk_execute_command);
            if (!ret)
                return 0;
            pr_err("Failed to execute %s (error %d)
    ",
                   ramdisk_execute_command, ret);
        }
     
        /*
         * We try each of these until one succeeds.
         *
         * The Bourne shell can be used instead of init if we are
         * trying to recover a really broken machine.
                                                              *寻找init函数,创建一号进程_init (第一个用户空间进程)*/
        if (execute_command) {
            ret = run_init_process(execute_command);
            if (!ret)
                return 0;
            pr_err("Failed to execute %s (error %d).  Attempting defaults...
    ",
                execute_command, ret);
        }
        if (!try_to_run_init_process("/sbin/init") ||
            !try_to_run_init_process("/etc/init") ||
            !try_to_run_init_process("/bin/init") ||
            !try_to_run_init_process("/bin/sh"))
            return 0;
     
        panic("No working init found.  Try passing init= option to kernel. "
              "See Linux Documentation/init.txt for guidance.");
    }
    static int __ref kernel_init(void *unused)
    {
        int ret;
     
        kernel_init_freeable();                 //该函数中完成smp开启  驱动初始化 共享内存初始化等工作
        /* need to finish all async __init code before freeing the memory */
        async_synchronize_full();
        free_initmem();                         //初始化尾声,清除内存无用数据
        mark_rodata_ro();
        system_state = SYSTEM_RUNNING;
        numa_default_policy();
     
        flush_delayed_fput();
     
        if (ramdisk_execute_command) {
            ret = run_init_process(ramdisk_execute_command);
            if (!ret)
                return 0;
            pr_err("Failed to execute %s (error %d)
    ",
                   ramdisk_execute_command, ret);
        }
     
        /*
         * We try each of these until one succeeds.
         *
         * The Bourne shell can be used instead of init if we are
         * trying to recover a really broken machine.
                                                              *寻找init函数,创建一号进程_init (第一个用户空间进程)*/
        if (execute_command) {
            ret = run_init_process(execute_command);
            if (!ret)
                return 0;
            pr_err("Failed to execute %s (error %d).  Attempting defaults...
    ",
                execute_command, ret);
        }
        if (!try_to_run_init_process("/sbin/init") ||
            !try_to_run_init_process("/etc/init") ||
            !try_to_run_init_process("/bin/init") ||
            !try_to_run_init_process("/bin/sh"))
            return 0;
     
        panic("No working init found.  Try passing init= option to kernel. "
              "See Linux Documentation/init.txt for guidance.");
    }

    到此,内核初始化已经接近尾声,所有的初始化函数都已经调用,因此free_initmem函数可以舍弃内存的__init_begin至__init_end之间的数据。

    当内核被引导并进行初始化后,内核启动了自己的第一个用户空间应用程序_init,这是调用的第一个使用标准C库编译的程序,其进程编号时钟为1.

    _init负责出发其他必须的进程,以使系统进入整体可用的状态。

    以下为内核启动流程图:

  • 相关阅读:
    2021/9/20 开始排序算法
    快速排序(自己版本)
    2021/9/17(栈实现+中后缀表达式求值)
    2021/9/18+19(中缀转后缀 + 递归 迷宫 + 八皇后)
    20212021/9/13 稀疏数组
    2021/9/12 线性表之ArrayList
    开发环境重整
    Nginx入门
    《财富的帝国》读书笔记
    Linux入门
  • 原文地址:https://www.cnblogs.com/god-of-death/p/14971565.html
Copyright © 2011-2022 走看看