zoukankan      html  css  js  c++  java
  • linux用户栈内核栈的设置---进程的创建: fork/execve【转】

    转自:http://blog.csdn.net/u011279649/article/details/18795547

    版权声明:本文为博主原创文章,未经博主允许不得转载。
    
    目录(?)[-]
    应用层怎样使用fork and execve
    fork的返回值怎样区分0pid
    fork系统调用的入口参数来自哪里
    how to implement do_fork
    copy_process
    How to check the kernel stack correctivity
    How to set the new process entry
    new process entry point
    sys_execve
    对elf 格式文件而言
    应用层怎样使用fork and execve
    /**************************************************************************/
    
    main()
    {
        int ret_from_fork,mypid;
        mypid = getPid();
        printf("before:my pid is d%
    ",mypid);
        ret_from_fork = fork();
        /*该方法返回生成的子进程的进程id号。用于复制出一个进程后,他们都运行到同样的地方,
         *所以父进程中的ret_from_fork的值是id值,而不时初值0,
         *而子进程的ret_from_fork却没有获得值,还是0.通过这样就可以区别两个进程改变两个进程的走向
             **/
        switch(ret_from_fork){
            case -1:
            perror(" fork failed");
            exit(1);
            /*以下就是子进程要执行的代码,他调用exec载入用户输入的命令指定的程序,
             *清除进程空间执行用户指定的程序
             **/
            case 0:
            execvp(arglist[0],arglist);//arglist[0]中指定用户想执行的命令名。
            perror("execvp failed");
            exit(1);
    
            default:
            while(wait(&exitstatus)!=ret_from_fork);
            /*shell程序,等待子进程运行结束后,再接受用户输入*/
        }
    
    }
    
    fork的返回值怎样区分0/pid
    /*
     *用户空间fork函数调用时,返回的0也不是内核的do_fork返回的,do_fork只会返回新进程的pid,
     *而 fork的0返回值是内核在ret_from_fork之后进入用户空间前RESTORE_ALL的时候pop到eax中的,
     *然后库实现的fork将 eax作为返回值;
     *实际上,fork的子进程在进入用户空间前从来不经过do_fork这条路,可以看看它的thread的eip是 ret_from_fork,
     *也就是只要开始运行子进程,就在switch_to中会执行ret_from_fork,而从ret_from_fork顺 着看,
     *一直就到了RESTORE_ALL从 而返回用户空间
    **/
    
    fork系统调用的入口,参数来自哪里?
    入口参数保存在当前的内核栈中:结构为struct pt_regs
    系统调用的入口:
    arch/arm/kernel/entry-common.S
    sys_fork_wrapper:
        add    r0, sp, #S_OFF
        b    sys_fork
    ENDPROC(sys_fork_wrapper)
    
    crash> dis sys_fork_wrapper
    0xc000e800 <sys_fork_wrapper>:      add     r0, sp, #8
    0xc000e804 <sys_fork_wrapper+4>:        b       0xc0011d28 <sys_fork>
    
    arch/arm/kernel/sys_arm.c
    /* Fork a new task - this creates a new program thread.
     * This is called indirectly via a small wrapper
     */
    asmlinkage int sys_fork(struct pt_regs *regs)
    {
    #ifdef CONFIG_MMU
        return do_fork(SIGCHLD, regs->ARM_sp, regs, 0, NULL, NULL);
    #else
        /* can not support in nommu mode */
        return(-EINVAL);
    #endif
    }
    
    crash> dis sys_fork
    0xc0011d28 <sys_fork>:  mov     r12, sp
    0xc0011d2c <sys_fork+4>:        push    {r11, r12, lr, pc}
    0xc0011d30 <sys_fork+8>:        sub     r11, r12, #4
    0xc0011d34 <sys_fork+12>:       sub     sp, sp, #8
    0xc0011d38 <sys_fork+16>:       mov     r12, #0
    0xc0011d3c <sys_fork+20>:       mov     r1, r0
    0xc0011d40 <sys_fork+24>:       ldr     r1, [r1, #52]   ; 0x34
    0xc0011d44 <sys_fork+28>:       mov     r2, r0
    0xc0011d48 <sys_fork+32>:       mov     r3, r12
    0xc0011d4c <sys_fork+36>:       mov     r0, #17
    0xc0011d50 <sys_fork+40>:       str     r12, [sp]
    0xc0011d54 <sys_fork+44>:       str     r12, [sp, #4]
    0xc0011d58 <sys_fork+48>:       bl      0xc0027550 <do_fork>
    0xc0011d5c <sys_fork+52>:       sub     sp, r11, #12
    0xc0011d60 <sys_fork+56>:       ldm     sp, {r11, sp, pc}
    
    /**************************************************************/
    /arch/arm/kernel/entry-header.s
    @
    @ Most of the stack format comes from struct pt_regs, but with
    @ the addition of 8 bytes for storing syscall args 5 and 6.
    @ This _must_ remain a multiple of 8 for EABI.
    @
    #define S_OFF        8
    
    /**************************************************************/
    /arch/arm/include/asm/ptrace.h
    /*
     * This struct defines the way the registers are stored on the
     * stack during a system call.  Note that sizeof(struct pt_regs)
     * has to be a multiple of 8.
     */
    
    struct pt_regs {
        unsigned long uregs[18];
    };
    
    
    #define ARM_cpsr    uregs[16]
    #define ARM_pc        uregs[15]
    #define ARM_lr        uregs[14]
    #define ARM_sp        uregs[13]
    #define ARM_ip        uregs[12]/*?*/
    #define ARM_fp        uregs[11]/*frame point*/
    #define ARM_r10        uregs[10]
    #define ARM_r9        uregs[9]
    #define ARM_r8        uregs[8]
    #define ARM_r7        uregs[7]
    #define ARM_r6        uregs[6]
    #define ARM_r5        uregs[5]
    #define ARM_r4        uregs[4]
    #define ARM_r3        uregs[3]
    #define ARM_r2        uregs[2]
    #define ARM_r1        uregs[1]
    #define ARM_r0        uregs[0]
    #define ARM_ORIG_r0    uregs[17]
    
    how to implement do_fork
    
    /**************************************************************/
    do_fork(SIGCHLD, regs->ARM_sp, regs, 0, NULL, NULL);
    /*
     *  Ok, this is the main fork-routine.
     *
     * It copies the process, and if successful kick-starts
     * it and waits for it to finish using the VM if required.
     */
    long do_fork(unsigned long clone_flags,
              unsigned long stack_start,
              struct pt_regs *regs,
              unsigned long stack_size,
              int __user *parent_tidptr,
              int __user *child_tidptr)
    {
        struct task_struct *p;
        int trace = 0;
        long nr;
    
        p = copy_process(clone_flags, stack_start, regs, stack_size,
                 child_tidptr, NULL, trace);
    
        /*
         * Do this prior waking up the new thread - the thread pointer
         * might get invalid after that point, if the thread exits quickly.
         */
        if (!IS_ERR(p)) {
            nr = task_pid_vnr(p);
            wake_up_new_task(p);
        }
        return nr;
    }
    
    copy_process
    /*
     * This creates a new process as a copy of the old one,
     * but does not actually start it yet.
     *
     * It copies the registers, and all the appropriate
     * parts of the process environment (as per the clone
     * flags). The actual kick-off is left to the caller.
     */
    static struct task_struct *copy_process(unsigned long clone_flags,
                        unsigned long stack_start,
                        struct pt_regs *regs,
                        unsigned long stack_size,
                        int __user *child_tidptr,
                        struct pid *pid,
                        int trace)
    {/*分配了相关结构体的memory;并用原来的赋值*/
        struct task_struct *p;
        p = dup_task_struct(current);
        ----
        /* Perform scheduler related setup. Assign this task to a CPU. */
        sched_fork(p);
    
        retval = perf_event_init_task(p);
        if (retval)
            goto bad_fork_cleanup_policy;
        retval = audit_alloc(p);
        if (retval)
            goto bad_fork_cleanup_policy;
        /* copy all the process information */
        retval = copy_semundo(clone_flags, p);
        if (retval)
            goto bad_fork_cleanup_audit;
        retval = copy_files(clone_flags, p);
        if (retval)
            goto bad_fork_cleanup_semundo;
        retval = copy_fs(clone_flags, p);
        if (retval)
            goto bad_fork_cleanup_files;
        retval = copy_sighand(clone_flags, p);
        if (retval)
            goto bad_fork_cleanup_fs;
        retval = copy_signal(clone_flags, p);
        if (retval)
            goto bad_fork_cleanup_sighand;
        retval = copy_mm(clone_flags, p);
        if (retval)
            goto bad_fork_cleanup_signal;
        retval = copy_namespaces(clone_flags, p);
        if (retval)
            goto bad_fork_cleanup_mm;
        retval = copy_io(clone_flags, p);
        if (retval)
            goto bad_fork_cleanup_namespaces;
        retval = copy_thread(clone_flags, stack_start, stack_size, p, regs);
    }
    
    
    static struct task_struct *dup_task_struct(struct task_struct *orig)
    {
        struct task_struct *tsk;
        struct thread_info *ti;
        unsigned long *stackend;
        int node = tsk_fork_get_node(orig);
        int err;
    
        /*分配了memory for task_struct and thread_info*/
        tsk = alloc_task_struct_node(node);
        if (!tsk)
            return NULL;
    
        ti = alloc_thread_info_node(tsk, node);
        if (!ti) {
            free_task_struct(tsk);
            return NULL;
        }
        /*
        int arch_dup_task_struct(struct task_struct *dst,
                  struct task_struct *src)
        {
            *dst = *src;
            return 0;
        }
        */
        err = arch_dup_task_struct(tsk, orig);
        if (err)
            goto out;
    
        tsk->stack = ti;
    
        setup_thread_stack(tsk, orig);
        clear_user_return_notifier(tsk);
        clear_tsk_need_resched(tsk);
        stackend = end_of_stack(tsk);
        *stackend = STACK_END_MAGIC;    /* for overflow detection */
    
    
        /*
         * One for us, one for whoever does the "release_task()" (usually
         * parent)
         */
        atomic_set(&tsk->usage, 2);
    
        tsk->splice_pipe = NULL;
    
        account_kernel_stack(ti, 1);
    
        return tsk;
    
    out:
        free_thread_info(ti);
        free_task_struct(tsk);
        return NULL;
    }
    
    How to check the kernel stack correctivity
    static inline unsigned long *end_of_stack(struct task_struct *p)
    {
        return (unsigned long *)(task_thread_info(p) + 1);
    }
    #define STACK_END_MAGIC        0x57AC6E9D
    
    COMMAND: "dwc_otg"
       TASK: ee1a3420  [THREAD_INFO: ee1c6000]
        CPU: 0
      STATE: TASK_INTERRUPTIBLE 
    crash> thread_info ee1c6000
    struct thread_info {
      flags = 0, 
      preempt_count = 1, 
      addr_limit = 0, 
      task = 0xee1a3420,
    
    crash> struct task_struct.stack 0xee1a3420
      stack = 0xee1c6000
    
    crash> bt -r
    PID: 760    TASK: ee1a3420  CPU: 0   COMMAND: "dwc_otg"
    ee1c6000:  00000000 00000001 00000000 ee1a3420 
    ee1c6010:  default_exec_domain 00000000 00000015 ee1a3420 
    ee1c6020:  c0f88420 init_task ee1c6000 00000000 
    ee1c6030:  00000001 init_mm  ee1c7f5c ee1c7f18 
    ee1c6040:  __schedule+1412 00000000 00000000 00000000 
    ee1c6050:  00000000 00000000 00000000 00000000 
    ee1c6060:  00000000 00000000 00000000 00000000 
    ee1c6070:  00000000 00000000 00000000 00000000 
    ee1c6080:  00000000 00000000 00000000 00000000 
    ee1c6090:  00000000 00000000 00000000 00000000 
    ee1c60a0:  00000000 00000000 00000000 00000000 
    ee1c60b0:  00000000 00000000 00000000 00000000 
    ee1c60c0:  00000000 00000000 00000000 00000000 
    ee1c60d0:  00000000 00000000 00000000 00000000 
    ee1c60e0:  00000000 00000000 00000000 00000000 
    ee1c60f0:  00000000 00000000 00000000 00000000 
    ee1c6100:  00000000 00000000 00000000 00000000 
    ee1c6110:  00000000 00000000 00000000 00000000 
    ee1c6120:  00000000 00000000 00000000 00000000 
    ee1c6130:  00000000 00000000 00000000 00000000 
    ee1c6140:  00000000 00000000 00000000 00000000 
    ee1c6150:  00000000 00000000 00000000 00000000 
    ee1c6160:  00000000 00000000 00000000 00000000 
    ee1c6170:  00000000 00000000 00000000 00000000 
    ee1c6180:  00000000 00000000 00000000 00000000 
    ee1c6190:  00000000 00000000 00000000 00000000 
    ee1c61a0:  00000000 00000000 00000000 00000000 
    ee1c61b0:  00000000 00000000 00000000 00000000 
    ee1c61c0:  00000000 00000000 00000000 00000000 
    ee1c61d0:  00000000 00000000 00000000 00000000 
    ee1c61e0:  00000000 00000000 00000000 00000000 
    ee1c61f0:  00000000 00000000 00000000 00000000 
    ee1c6200:  00000000 00000000 00000000 00000000 
    ee1c6210:  00000000 00000000 00000000 00000000 
    ee1c6220:  00000000 00000000 00000000 00000000 
    ee1c6230:  00000000 00000000 00000000 00000000 
    ee1c6240:  00000000 00000000 00000000 00000000 
    ee1c6250:  00000000 00000000 00000000 00000000 
    ee1c6260:  00000000 00000000 00000000 00000000 
    ee1c6270:  00000000 00000000 00000000 00000000 
    ee1c6280:  00000000 00000000 00000000 00000000 
    ee1c6290:  00000000 00000000 00000000 00000000 
    ee1c62a0:  00000000 00000000 00000000 00000000 
    ee1c62b0:  00000000 00000000 00000000 00000000 
    ee1c62c0:  00000000 00000000 do_no_restart_syscall 00000000 
    ee1c62d0:  00000000 00000000 00000000 00000000 
    ee1c62e0:  00000000 00000000 00000000 00000000 
    ee1c62f0:  57ac6e9d/*STACK_END_MAGIC*/
    
    asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
    
    How to set the new process entry
    int
    
    copy_thread(unsigned long clone_flags, unsigned long stack_start,
            unsigned long stk_sz, struct task_struct *p, struct pt_regs *regs)
    {
        struct thread_info *thread = task_thread_info(p);
        struct pt_regs *childregs = task_pt_regs(p);
    
        *childregs = *regs;
        childregs->ARM_r0 = 0;
        childregs->ARM_sp = stack_start;
    
        memset(&thread->cpu_context, 0, sizeof(struct cpu_context_save));
        thread->cpu_context.sp = (unsigned long)childregs;
        thread->cpu_context.pc = (unsigned long)ret_from_fork;
    
        clear_ptrace_hw_breakpoint(p);
    
        if (clone_flags & CLONE_SETTLS)
            thread->tp_value = regs->ARM_r3;
    
        thread_notify(THREAD_NOTIFY_COPY, thread);
    
        return 0;
    }
    
    /*8K内核栈的最后是 struct pt_regs
     *对它进行赋值:返回到用户空间后使用的栈,返回地址
     **/
    #define task_pt_regs(p) 
        ((struct pt_regs *)(THREAD_START_SP + task_stack_page(p)) - 1)
    
    
    /*
     * low level task data that entry.S needs immediate access to.
     * __switch_to() assumes cpu_context follows immediately after cpu_domain.
     */
    crash> struct thread_info -o
    struct thread_info {
        [0] unsigned long flags;
        [4] int preempt_count;
        [8] mm_segment_t addr_limit;
       [12] struct task_struct *task;
       [16] struct exec_domain *exec_domain;
       [20] __u32 cpu;
       [24] __u32 cpu_domain;
       [28] struct cpu_context_save cpu_context;
       [76] __u32 syscall;
       [80] __u8 used_cp[16];
       [96] unsigned long tp_value;
      [100] struct crunch_state crunchstate;
      [288] union fp_state fpstate;
      [432] union vfp_state vfpstate;
      [712] struct restart_block restart_block;
    }
    
    new process entry point
    /*
     * This is how we return from a fork.
     */
    ENTRY(ret_from_fork)
        bl    schedule_tail
        get_thread_info tsk
        ldr    r1, [tsk, #TI_FLAGS]        @ check for syscall tracing
        mov    why, #1
        tst    r1, #_TIF_SYSCALL_WORK        @ are we tracing syscalls?
        beq    ret_slow_syscall
        mov    r1, sp
        mov    r0, #1                @ trace exit [IP = 1]
        bl    syscall_trace
        b    ret_slow_syscall
    ENDPROC(ret_from_fork)
    
    
    sys_execve
    
    /**************************************************************/
    arch/arm/kernel/sys_arm.c
    
    /* sys_execve() executes a new program.
     * This is called indirectly via a small wrapper
     */
    asmlinkage int sys_execve(const char __user *filenamei,
                  const char __user *const __user *argv,
                  const char __user *const __user *envp, struct pt_regs *regs)
    {
        int error;
        char * filename;
    
        filename = getname(filenamei);
        error = PTR_ERR(filename);
        if (IS_ERR(filename))
            goto out;
        error = do_execve(filename, argv, envp, regs);
        putname(filename);
    out:
        return error;
    }
    
    int do_execve(const char *filename,
        const char __user *const __user *__argv,
        const char __user *const __user *__envp,
        struct pt_regs *regs)
    {
        struct user_arg_ptr argv = { .ptr.native = __argv };
        struct user_arg_ptr envp = { .ptr.native = __envp };
        return do_execve_common(filename, argv, envp, regs);
    }
    
    /**************************************************************/
    
    /*
     * sys_execve() executes a new program.
     */
    static int do_execve_common(const char *filename,
                    struct user_arg_ptr argv,
                    struct user_arg_ptr envp,
                    struct pt_regs *regs)
    {
        struct linux_binprm *bprm;
        struct file *file;
        bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
        file = open_exec(filename);
        sched_exec();
    
        bprm->file = file;
        bprm->filename = filename;
        bprm->interp = filename;
    
        bprm_mm_init(bprm);
    
        bprm->argc = count(argv, MAX_ARG_STRINGS);
    
        bprm->envc = count(envp, MAX_ARG_STRINGS);
        prepare_binprm(bprm);
        search_binary_handler(bprm,regs);
    }
    
    /*
     * Create a new mm_struct and populate it with a temporary stack
     * vm_area_struct.  We don't have enough context at this point to set the stack
     * flags, permissions, and offset, so we use temporary values.  We'll update
     * them later in setup_arg_pages().
     */
    int bprm_mm_init(struct linux_binprm *bprm)
    {
        int err;
        struct mm_struct *mm = NULL;
        /*mm_struct*/
        bprm->mm = mm = mm_alloc();
        /*vma_struct*/
        err = __bprm_mm_init(bprm);
    
    
        return 0;
    }
    
    /*
     * cycle the list of binary formats handler, until one recognizes the image
     */
    int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
    {
        struct linux_binfmt *fmt;
        list_for_each_entry(fmt, &formats, lh)
        int (*fn)(struct linux_binprm *, struct pt_regs *) = fmt->load_binary;
        fn(bprm, regs);
    }
    
    对elf 格式文件而言
    fs/binfmt_elf.c
    static struct linux_binfmt elf_format = {
        .module        = THIS_MODULE,
        .load_binary    = load_elf_binary,
        .load_shlib    = load_elf_library,
        .core_dump    = elf_core_dump,
        .min_coredump    = ELF_EXEC_PAGESIZE,
    };
    
    static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
    {
        ----
        kernel_read();
        start_thread(regs, elf_entry, bprm->p);
    }
    
    #define start_thread(regs,pc,sp)                    
    ({                                    
        unsigned long *stack = (unsigned long *)sp;            
        memset(regs->uregs, 0, sizeof(regs->uregs));            
        if (current->personality & ADDR_LIMIT_32BIT)            
            regs->ARM_cpsr = USR_MODE;                
        else                                
            regs->ARM_cpsr = USR26_MODE;                
        if (elf_hwcap & HWCAP_THUMB && pc & 1)                
            regs->ARM_cpsr |= PSR_T_BIT;                
        regs->ARM_cpsr |= PSR_ENDSTATE;                    
        regs->ARM_pc = pc & ~1;        /* pc */            
        regs->ARM_sp = sp;        /* sp */            
        regs->ARM_r2 = stack[2];    /* r2 (envp) */            
        regs->ARM_r1 = stack[1];    /* r1 (argv) */            
        regs->ARM_r0 = stack[0];    /* r0 (argc) */            
    })
    
    总结:当运行execve时已经运行新创建的进程,不是说在old进程中加载后,再运行新进程的。
     
     

    应用层怎样使用fork and execve

    /**************************************************************************/

    main()
    {
        int ret_from_fork,mypid;
        mypid = getPid();
        printf("before:my pid is d% ",mypid);
        ret_from_fork = fork();
        /*该方法返回生成的子进程的进程id号。用于复制出一个进程后,他们都运行到同样的地方,
         *所以父进程中的ret_from_fork的值是id值,而不时初值0,
         *而子进程的ret_from_fork却没有获得值,还是0.通过这样就可以区别两个进程改变两个进程的走向
             **/

        switch(ret_from_fork){
            case -1:
            perror(" fork failed");
            exit(1);
            /*以下就是子进程要执行的代码,他调用exec载入用户输入的命令指定的程序,
             *清除进程空间执行用户指定的程序
             **/

            case 0:
            execvp(arglist[0],arglist);//arglist[0]中指定用户想执行的命令名
            perror("execvp failed");
            exit(1);

            default:
            while(wait(&exitstatus)!=ret_from_fork);
            /*shell程序,等待子进程运行结束后,再接受用户输入*/
        }

    }

    fork的返回值怎样区分0/pid

    /*
     *用户空间fork函数调用时,返回的0也不是内核的do_fork返回的,do_fork只会返回新进程的pid,
     *而 fork的0返回值是内核在ret_from_fork之后进入用户空间前RESTORE_ALL的时候pop到eax中的,
     *然后库实现的fork将 eax作为返回值;
     *实际上,fork的子进程在进入用户空间前从来不经过do_fork这条路,可以看看它的thread的eip是 ret_from_fork,
     *也就是只要开始运行子进程,就在switch_to中会执行ret_from_fork,而从ret_from_fork顺 着看,
     *一直就到了RESTORE_ALL从 而返回用户空间
    *
    */

    fork系统调用的入口,参数来自哪里?

    入口参数保存在当前的内核栈中:结构为struct pt_regs
    系统调用的入口:
    arch/arm/kernel/entry-common.S
    sys_fork_wrapper:
        add    r0, sp, #S_OFF
        b    sys_fork
    ENDPROC(sys_fork_wrapper)

    crash> dis sys_fork_wrapper
    0xc000e800 <sys_fork_wrapper>:      add     r0, sp, #8
    0xc000e804 <sys_fork_wrapper+4>:        b       0xc0011d28 <sys_fork>

    arch/arm/kernel/sys_arm.c
    /* Fork a new task - this creates a new program thread.
     * This is called indirectly via a small wrapper
     */
    asmlinkage int sys_fork(struct pt_regs *regs)
    {
    #ifdef CONFIG_MMU
        return do_fork(SIGCHLD, regs->ARM_sp, regs, 0, NULL, NULL);
    #else
        /* can not support in nommu mode */
        return(-EINVAL);
    #endif
    }

    crash> dis sys_fork
    0xc0011d28 <sys_fork>:  mov     r12, sp
    0xc0011d2c <sys_fork+4>:        push    {r11, r12, lr, pc}
    0xc0011d30 <sys_fork+8>:        sub     r11, r12, #4
    0xc0011d34 <sys_fork+12>:       sub     sp, sp, #8
    0xc0011d38 <sys_fork+16>:       mov     r12, #0
    0xc0011d3c <sys_fork+20>:       mov     r1, r0
    0xc0011d40 <sys_fork+24>:       ldr     r1, [r1, #52]   ; 0x34
    0xc0011d44 <sys_fork+28>:       mov     r2, r0
    0xc0011d48 <sys_fork+32>:       mov     r3, r12
    0xc0011d4c <sys_fork+36>:       mov     r0, #17
    0xc0011d50 <sys_fork+40>:       str     r12, [sp]
    0xc0011d54 <sys_fork+44>:       str     r12, [sp, #4]
    0xc0011d58 <sys_fork+48>:       bl      0xc0027550 <do_fork>
    0xc0011d5c <sys_fork+52>:       sub     sp, r11, #12
    0xc0011d60 <sys_fork+56>:       ldm     sp, {r11, sp, pc}

    /**************************************************************/
    /arch/arm/kernel/entry-header.s
    @
    @ Most of the stack format comes from struct pt_regs, but with
    @ the addition of 8 bytes for storing syscall args 5 and 6.
    @ This _must_ remain a multiple of 8 for EABI.
    @
    #define S_OFF        8

    /**************************************************************/
    /arch/arm/include/asm/ptrace.h
    /*
     * This struct defines the way the registers are stored on the
     * stack during a system call.  Note that sizeof(struct pt_regs)
     * has to be a multiple of 8.
     */

    struct pt_regs {
        unsigned long uregs[18];
    };


    #define ARM_cpsr    uregs[16]
    #define ARM_pc        uregs[15]
    #define ARM_lr        uregs[14]
    #define ARM_sp        uregs[13]
    #define ARM_ip        uregs[12]/*?*/
    #define ARM_fp        uregs[11]/*frame point*/
    #define ARM_r10        uregs[10]
    #define ARM_r9        uregs[9]
    #define ARM_r8        uregs[8]
    #define ARM_r7        uregs[7]
    #define ARM_r6        uregs[6]
    #define ARM_r5        uregs[5]
    #define ARM_r4        uregs[4]
    #define ARM_r3        uregs[3]
    #define ARM_r2        uregs[2]
    #define ARM_r1        uregs[1]
    #define ARM_r0        uregs[0]
    #define ARM_ORIG_r0    uregs[17]

    how to implement do_fork


    /**************************************************************/
    do_fork(SIGCHLD, regs->ARM_sp, regs, 0, NULL, NULL);
    /*
     *  Ok, this is the main fork-routine.
     *
     * It copies the process, and if successful kick-starts
     * it and waits for it to finish using the VM if required.
     *
    /
    long do_fork(unsigned long clone_flags,
              unsigned long stack_start,
              struct pt_regs *regs,
              unsigned long stack_size,
              int __user *parent_tidptr,
              int __user *child_tidptr)
    {
        struct task_struct *p;
        int trace = 0;
        long nr;

        p = copy_process(clone_flags, stack_start, regs, stack_size,
                 child_tidptr, NULL, trace);

        /*
         * Do this prior waking up the new thread - the thread pointer
         * might get invalid after that point, if the thread exits quickly.
         */
        if (!IS_ERR(p)) {
            nr = task_pid_vnr(p);
            wake_up_new_task(p);
        }
        return nr;
    }

    copy_process

    /*
     * This creates a new process as a copy of the old one,
     * but does not actually start it yet.
     *
     * It copies the registers, and all the appropriate
     * parts of the process environment (as per the clone
     * flags). The actual kick-off is left to the caller.
     *
    /
    static struct task_struct *copy_process(unsigned long clone_flags,
                        unsigned long stack_start,
                        struct pt_regs *regs,
                        unsigned long stack_size,
                        int __user *child_tidptr,
                        struct pid *pid,
                        int trace)
    {/*分配了相关结构体的memory;并用原来的赋值*/
        struct task_struct *p;
        p = dup_task_struct(current);
        ----
        /* Perform scheduler related setup. Assign this task to a CPU. */
        sched_fork(p);

        retval = perf_event_init_task(p);
        if (retval)
            goto bad_fork_cleanup_policy;
        retval = audit_alloc(p);
        if (retval)
            goto bad_fork_cleanup_policy;
        /* copy all the process information */
        retval = copy_semundo(clone_flags, p);
        if (retval)
            goto bad_fork_cleanup_audit;
        retval = copy_files(clone_flags, p);
        if (retval)
            goto bad_fork_cleanup_semundo;
        retval = copy_fs(clone_flags, p);
        if (retval)
            goto bad_fork_cleanup_files;
        retval = copy_sighand(clone_flags, p);
        if (retval)
            goto bad_fork_cleanup_fs;
        retval = copy_signal(clone_flags, p);
        if (retval)
            goto bad_fork_cleanup_sighand;
        retval = copy_mm(clone_flags, p);
        if (retval)
            goto bad_fork_cleanup_signal;
        retval = copy_namespaces(clone_flags, p);
        if (retval)
            goto bad_fork_cleanup_mm;
        retval = copy_io(clone_flags, p);
        if (retval)
            goto bad_fork_cleanup_namespaces;
        retval = copy_thread(clone_flags, stack_start, stack_size, p, regs);
    }


    static struct task_struct *dup_task_struct(struct task_struct *orig)
    {
        struct task_struct *tsk;
        struct thread_info *ti;
        unsigned long *stackend;
        int node = tsk_fork_get_node(orig);
        int err;

        /*分配了memory for task_struct and thread_info*/
        tsk = alloc_task_struct_node(node);
        if (!tsk)
            return NULL;

        ti = alloc_thread_info_node(tsk, node);
        if (!ti) {
            free_task_struct(tsk);
            return NULL;
        }
        /*
        int arch_dup_task_struct(struct task_struct *dst,
                  struct task_struct *src)
        {
            *dst = *src;
            return 0;
        }
        */
        err = arch_dup_task_struct(tsk, orig);
        if (err)
            goto out;

        tsk->stack = ti;

        setup_thread_stack(tsk, orig);
        clear_user_return_notifier(tsk);
        clear_tsk_need_resched(tsk);
        stackend = end_of_stack(tsk);
        *stackend = STACK_END_MAGIC;    /* for overflow detection */


        /*
         * One for us, one for whoever does the "release_task()" (usually
         * parent)
         */
        atomic_set(&tsk->usage, 2);

        tsk->splice_pipe = NULL;

        account_kernel_stack(ti, 1);

        return tsk;

    out:
        free_thread_info(ti);
        free_task_struct(tsk);
        return NULL;
    }

    How to check the kernel stack correctivity

    static inline unsigned long *end_of_stack(struct task_struct *p)
    {
        return (unsigned long *)(task_thread_info(p) + 1);
    }
    #define STACK_END_MAGIC        0x57AC6E9D

    COMMAND: "dwc_otg"
       TASK: ee1a3420  [THREAD_INFO: ee1c6000]
        CPU: 0
      STATE: TASK_INTERRUPTIBLE 
    crash> thread_info ee1c6000
    struct thread_info {
      flags = 0, 
      preempt_count = 1, 
      addr_limit = 0, 
      task = 0xee1a3420,

    crash> struct task_struct.stack 0xee1a3420
      stack = 0xee1c6000

    crash> bt -r
    PID: 760    TASK: ee1a3420  CPU: 0   COMMAND: "dwc_otg"
    ee1c6000:  00000000 00000001 00000000 ee1a3420 
    ee1c6010:  default_exec_domain 00000000 00000015 ee1a3420 
    ee1c6020:  c0f88420 init_task ee1c6000 00000000 
    ee1c6030:  00000001 init_mm  ee1c7f5c ee1c7f18 
    ee1c6040:  __schedule+1412 00000000 00000000 00000000 
    ee1c6050:  00000000 00000000 00000000 00000000 
    ee1c6060:  00000000 00000000 00000000 00000000 
    ee1c6070:  00000000 00000000 00000000 00000000 
    ee1c6080:  00000000 00000000 00000000 00000000 
    ee1c6090:  00000000 00000000 00000000 00000000 
    ee1c60a0:  00000000 00000000 00000000 00000000 
    ee1c60b0:  00000000 00000000 00000000 00000000 
    ee1c60c0:  00000000 00000000 00000000 00000000 
    ee1c60d0:  00000000 00000000 00000000 00000000 
    ee1c60e0:  00000000 00000000 00000000 00000000 
    ee1c60f0:  00000000 00000000 00000000 00000000 
    ee1c6100:  00000000 00000000 00000000 00000000 
    ee1c6110:  00000000 00000000 00000000 00000000 
    ee1c6120:  00000000 00000000 00000000 00000000 
    ee1c6130:  00000000 00000000 00000000 00000000 
    ee1c6140:  00000000 00000000 00000000 00000000 
    ee1c6150:  00000000 00000000 00000000 00000000 
    ee1c6160:  00000000 00000000 00000000 00000000 
    ee1c6170:  00000000 00000000 00000000 00000000 
    ee1c6180:  00000000 00000000 00000000 00000000 
    ee1c6190:  00000000 00000000 00000000 00000000 
    ee1c61a0:  00000000 00000000 00000000 00000000 
    ee1c61b0:  00000000 00000000 00000000 00000000 
    ee1c61c0:  00000000 00000000 00000000 00000000 
    ee1c61d0:  00000000 00000000 00000000 00000000 
    ee1c61e0:  00000000 00000000 00000000 00000000 
    ee1c61f0:  00000000 00000000 00000000 00000000 
    ee1c6200:  00000000 00000000 00000000 00000000 
    ee1c6210:  00000000 00000000 00000000 00000000 
    ee1c6220:  00000000 00000000 00000000 00000000 
    ee1c6230:  00000000 00000000 00000000 00000000 
    ee1c6240:  00000000 00000000 00000000 00000000 
    ee1c6250:  00000000 00000000 00000000 00000000 
    ee1c6260:  00000000 00000000 00000000 00000000 
    ee1c6270:  00000000 00000000 00000000 00000000 
    ee1c6280:  00000000 00000000 00000000 00000000 
    ee1c6290:  00000000 00000000 00000000 00000000 
    ee1c62a0:  00000000 00000000 00000000 00000000 
    ee1c62b0:  00000000 00000000 00000000 00000000 
    ee1c62c0:  00000000 00000000 do_no_restart_syscall 00000000 
    ee1c62d0:  00000000 00000000 00000000 00000000 
    ee1c62e0:  00000000 00000000 00000000 00000000 
    ee1c62f0:  57ac6e9d/*STACK_END_MAGIC*/

    asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");

    How to set the new process entry

    int

    copy_thread(unsigned long clone_flags, unsigned long stack_start,
            unsigned long stk_sz, struct task_struct *p, struct pt_regs *regs)
    {
        struct thread_info *thread = task_thread_info(p);
        struct pt_regs *childregs = task_pt_regs(p);

        *childregs = *regs;
        childregs->ARM_r0 = 0;
        childregs->ARM_sp = stack_start;

        memset(&thread->cpu_context, 0, sizeof(struct cpu_context_save));
        thread->cpu_context.sp = (unsigned long)childregs;
        thread->cpu_context.pc = (unsigned long)ret_from_fork;

        clear_ptrace_hw_breakpoint(p);

        if (clone_flags & CLONE_SETTLS)
            thread->tp_value = regs->ARM_r3;

        thread_notify(THREAD_NOTIFY_COPY, thread);

        return 0;
    }

    /*8K内核栈的最后是 struct pt_regs
     *对它进行赋值:返回到用户空间后使用的栈,返回地址
     **/

    #define task_pt_regs(p)
        ((struct pt_regs *)(THREAD_START_SP + task_stack_page(p)) - 1)


    /*
     * low level task data that entry.S needs immediate access to.
     * __switch_to() assumes cpu_context follows immediately after cpu_domain.
     *
    /
    crash> struct thread_info -o
    struct thread_info {
        [0] unsigned long flags;
        [4] int preempt_count;
        [8] mm_segment_t addr_limit;
       [12] struct task_struct *task;
       [16] struct exec_domain *exec_domain;
       [20] __u32 cpu;
       [24] __u32 cpu_domain;
       [28] struct cpu_context_save cpu_context;
       [76] __u32 syscall;
       [80] __u8 used_cp[16];
       [96] unsigned long tp_value;
      [100] struct crunch_state crunchstate;
      [288] union fp_state fpstate;
      [432] union vfp_state vfpstate;
      [712] struct restart_block restart_block;
    }

    new process entry point

    /*
     * This is how we return from a fork.
     */
    ENTRY(ret_from_fork)
        bl    schedule_tail
        get_thread_info tsk
        ldr    r1, [tsk, #TI_FLAGS]        @ check for syscall tracing
        mov    why, #1
        tst    r1, #_TIF_SYSCALL_WORK        @ are we tracing syscalls?
        beq    ret_slow_syscall
        mov    r1, sp
        mov    r0, #1                @ trace exit [IP = 1]
        bl    syscall_trace
        b    ret_slow_syscall

    ENDPROC(ret_from_fork)

    sys_execve


    /**************************************************************/
    arch/arm/kernel/sys_arm.c

    /* sys_execve() executes a new program.
     * This is called indirectly via a small wrapper
     *
    /
    asmlinkage int sys_execve(const char __user *filenamei,
                  const char __user *const __user *argv,
                  const char __user *const __user *envp, struct pt_regs *regs)
    {
        int error;
        char * filename;

        filename = getname(filenamei);
        error = PTR_ERR(filename);
        if (IS_ERR(filename))
            goto out;
        error = do_execve(filename, argv, envp, regs);
        putname(filename);
    out:
        return error;
    }

    int do_execve(const char *filename,
        const char __user *const __user *__argv,
        const char __user *const __user *__envp,
        struct pt_regs *regs)
    {
        struct user_arg_ptr argv = { .ptr.native = __argv };
        struct user_arg_ptr envp = { .ptr.native = __envp };
        return do_execve_common(filename, argv, envp, regs);
    }

    /**************************************************************/

    /*
     * sys_execve() executes a new program.
     */
    static int do_execve_common(const char *filename,
                    struct user_arg_ptr argv,
                    struct user_arg_ptr envp,
                    struct pt_regs *regs)
    {
        struct linux_binprm *bprm;
        struct file *file;
        bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
        file = open_exec(filename);
        sched_exec();

        bprm->file = file;
        bprm->filename = filename;
        bprm->interp = filename;

        bprm_mm_init(bprm);

        bprm->argc = count(argv, MAX_ARG_STRINGS);

        bprm->envc = count(envp, MAX_ARG_STRINGS);
        prepare_binprm(bprm);
        search_binary_handler(bprm,regs);
    }

    /*
     * Create a new mm_struct and populate it with a temporary stack
     * vm_area_struct.  We don't have enough context at this point to set the stack
     * flags, permissions, and offset, so we use temporary values.  We'll update
     * them later in setup_arg_pages().
     */
    int bprm_mm_init(struct linux_binprm *bprm)
    {
        int err;
        struct mm_struct *mm = NULL;
        /*mm_struct*/
        bprm->mm = mm = mm_alloc();
        /*vma_struct*/
        err = __bprm_mm_init(bprm);


        return 0;
    }

    /*
     * cycle the list of binary formats handler, until one recognizes the image
     */
    int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
    {
        struct linux_binfmt *fmt;
        list_for_each_entry(fmt, &formats, lh)
        int (*fn)(struct linux_binprm *, struct pt_regs *) = fmt->load_binary;
        fn(bprm, regs);
    }

    对elf 格式文件而言

    fs/binfmt_elf.c
    static struct linux_binfmt elf_format = {
        .module        = THIS_MODULE,
        .load_binary    = load_elf_binary,
        .load_shlib    = load_elf_library,
        .core_dump    = elf_core_dump,
        .min_coredump    = ELF_EXEC_PAGESIZE,
    };

    static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
    {
        ----
        kernel_read();
        start_thread(regs, elf_entry, bprm->p);
    }

    #define start_thread(regs,pc,sp)                    
    ({                                    
        unsigned long *stack = (unsigned long *)sp;            
        memset(regs->uregs, 0, sizeof(regs->uregs));            
        if (current->personality & ADDR_LIMIT_32BIT)            
            regs->ARM_cpsr = USR_MODE;                
        else                                
            regs->ARM_cpsr = USR26_MODE;                
        if (elf_hwcap & HWCAP_THUMB && pc & 1)                
            regs->ARM_cpsr |= PSR_T_BIT;                
        regs->ARM_cpsr |= PSR_ENDSTATE;                    
        regs->ARM_pc = pc & ~1;        /* pc */            
        regs->ARM_sp = sp;        /* sp */            
        regs->ARM_r2 = stack[2];    /* r2 (envp) */            
        regs->ARM_r1 = stack[1];    /* r1 (argv) */            
        regs->ARM_r0 = stack[0];    /* r0 (argc) */            
    })

    总结:当运行execve时已经运行新创建的进程,不是说在old进程中加载后,再运行新进程的。

  • 相关阅读:
    centos 8 安装zabbix4.5
    python调用钉钉机器人发送消息
    基于docker-compose部署Prometheus + Grafana监控系统
    在阿里云上单机部署k8s1.18
    python3使用email模块发送邮件
    mongodb mysql es数据迁移
    Dockerfile详解
    docker-compose部署zabbix5.0
    centos8.1部署gitlab+jenkins
    基于docker-compose部署jumpserver
  • 原文地址:https://www.cnblogs.com/sky-heaven/p/5800297.html
Copyright © 2011-2022 走看看