zoukankan      html  css  js  c++  java
  • Linux内核进程创建do_fork()解析

    内核创建进行所进行的工作,本文阅读的内核代码为Linux kernel 2.6。

      进程创建的大部分工作由do_fork这个函数完成,函数原型如下:

    long do_fork(unsigned long clone_flags,
              unsigned long stack_start,
              struct pt_regs *regs,
              unsigned long stack_size,
              int __user *parent_tidptr,
              int __user *child_tidptr)
    View Code

    1、首先进行一些参数及权限的检查。

        if (clone_flags & CLONE_NEWUSER) {
            if (clone_flags & CLONE_THREAD)
                return -EINVAL;
            /* hopefully this check will go away when userns support is
             * complete
             */
            if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SETUID) ||
                    !capable(CAP_SETGID))
                return -EPERM;
        }
    View Code

    2、进行状态的检查。这里主要是进行进程停止状态的检查。

    if (unlikely(clone_flags & CLONE_STOPPED)) {
            static int __read_mostly count = 100;
    
            if (count > 0 && printk_ratelimit()) {
                char comm[TASK_COMM_LEN];
    
                count--;
                printk(KERN_INFO "fork(): process `%s' used deprecated "
                        "clone flags 0x%lx
    ",
                    get_task_comm(comm, current),
                    clone_flags & CLONE_STOPPED);
            }
        }
    View Code

    3、用户空间检查,下面这段代码比较有用。主要是进行准备复制准备工作然后复制当前进程。

    /*   * When called from kernel_thread, don't do user tracing stuff.   */

     if (likely(user_mode(regs)))   trace = tracehook_prepare_clone(clone_flags);

     p = copy_process(clone_flags, stack_start, regs, stack_size,     child_tidptr, NULL, trace);

    首先是用户空间压栈操作,保存寄存器。其中regs是这么一个参数。

    struct pt_regs {
     unsigned long long pc;
     unsigned long long sr;
     long long syscall_nr;
     unsigned long long regs[63];
     unsigned long long tregs[8];
     unsigned long long pad[2];
    };

    从结构体中的成员可以看到,包含

    pc:程序计数器

    sr:scratch寄存器

    syscall_nr:系统调用

    总之,这里是包含进程在退出cpu时所需的最小信息。

     p = copy_process(clone_flags, stack_start, regs, stack_size,     child_tidptr, NULL, trace);

    这里即复制一个进程。返回值为task_struct的结构体,该结构描述了一个进程的基本状态。这里并不进行详细的介绍。

    4,、对创建的进程进行一些错误检查。这里发生的可能性不大,可以暂时先不管,把握住我们的主线。

    if (!IS_ERR(p)) {   struct completion vfork;

      trace_sched_process_fork(current, p);

      nr = task_pid_vnr(p);

      if (clone_flags & CLONE_PARENT_SETTID)    put_user(nr, parent_tidptr);

      if (clone_flags & CLONE_VFORK) {    p->vfork_done = &vfork;    init_completion(&vfork);  

     }

    5、然后接着下面两个函数。

      audit_finish_fork(p);//主要是检查完成的进程的状态。
      tracehook_report_clone(regs, clone_flags, nr, p);//主要是阻塞刚刚创建的子进程,因为现在还是在父进程进程中,子进程并未开始执行,暂时挂起子进程。下面为函数解释。

    /**
     * tracehook_report_clone - in parent, new child is about to start running
     * @regs:  parent's user register state
     * @clone_flags: flags from parent's system call
     * @pid:  new child's PID in the parent's namespace
     * @child:  new child task
     *
     * Called after a child is set up, but before it has been started running.
     * This is not a good place to block, because the child has not started
     * yet.  Suspend the child here if desired, and then block in
     * tracehook_report_clone_complete().  This must prevent the child from
     * self-reaping if tracehook_report_clone_complete() uses the @child
     * pointer; otherwise it might have died and been released by the time
     * tracehook_report_clone_complete() is called.
     *
     * Called with no locks held, but the child cannot run until this returns.
     */

     6、设置进程标志位。

      /*
       * We set PF_STARTING at creation in case tracing wants to
       * use this to distinguish a fully live task from one that
       * hasn't gotten to tracehook_report_clone() yet.  Now we
       * clear it and set the child going.
       */
      p->flags &= ~PF_STARTING;

    (PF_STARTING宏定义解释为:#define PF_STARTING 0x00000002 /* being created */,表明该进程已创建)

    7、唤醒进程。这里先判断复制标志是否为 CLONE_STOPPED状态,但是大多数情形下,并不为CLONE_STOPPED状态。

     if (unlikely(clone_flags & CLONE_STOPPED)) {
       /*
        * We'll start up with an immediate SIGSTOP.
        */
       sigaddset(&p->pending.signal, SIGSTOP);
       set_tsk_thread_flag(p, TIF_SIGPENDING);
       __set_task_state(p, TASK_STOPPED);
      } else {
       wake_up_new_task(p, clone_flags);
      }

      tracehook_report_clone_complete(trace, regs,
          clone_flags, nr, p);/*这个函数主要是报告当前之前阻塞的子进程已经开始运行*/

    下面是wake_up_new_task函数功能解释。主要功能是首次唤醒创建的进程,同时完成一些初始化调度的所需的工作,并将进程放入运行队列中。

    关于一个进程如何添加到队列中去,可以从这里进行研究。本文先不对此进行研究,还是放在进程的创建上来。

    /*
     * wake_up_new_task - wake up a newly created task for the first time.
     *
     * This function will do some initial scheduler statistics housekeeping
     * that must be done for every newly created context, then puts the task
     * on the runqueue and wakes it.
     */

    8、检查clone_flags标志位。若当前标志位为 CLONE_VFORK(#define CLONE_VFORK 0x00004000 /* set if the parent wants the child to wake it up on mm_release */),该标志位表明父进程想在内存释放后唤醒, wait_for_completion(&vfork);中有个自旋锁的操作,主要是等待由用户空间返回内核空间。

    if (clone_flags & CLONE_VFORK) {
       freezer_do_not_count();
       wait_for_completion(&vfork);
       freezer_count();
       tracehook_report_vfork_done(p, nr);
      }

     9、完成所有操作,返回。其中返回值为新的进程的pid。


     } else {
      nr = PTR_ERR(p);
     }
     return nr;

    10、下面是完整的程序。

    /*
     *  Ok, this is the main fork-routine.
     *
     * It copies the process, and if successful kick-starts
     * it and waits for it to finish using the VM if required.
     */
    long do_fork(unsigned long clone_flags,
              unsigned long stack_start,
              struct pt_regs *regs,
              unsigned long stack_size,
              int __user *parent_tidptr,
              int __user *child_tidptr)
    {
        struct task_struct *p;
        int trace = 0;
        long nr;
    
        /*
         * Do some preliminary argument and permissions checking before we
         * actually start allocating stuff
         */
        if (clone_flags & CLONE_NEWUSER) {
            if (clone_flags & CLONE_THREAD)
                return -EINVAL;
            /* hopefully this check will go away when userns support is
             * complete
             */
            if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SETUID) ||
                    !capable(CAP_SETGID))
                return -EPERM;
        }
    
        /*
         * We hope to recycle these flags after 2.6.26
         */
        if (unlikely(clone_flags & CLONE_STOPPED)) {
            static int __read_mostly count = 100;
    
            if (count > 0 && printk_ratelimit()) {
                char comm[TASK_COMM_LEN];
    
                count--;
                printk(KERN_INFO "fork(): process `%s' used deprecated "
                        "clone flags 0x%lx
    ",
                    get_task_comm(comm, current),
                    clone_flags & CLONE_STOPPED);
            }
        }
    
        /*
         * When called from kernel_thread, don't do user tracing stuff.
         */
        if (likely(user_mode(regs)))
            trace = tracehook_prepare_clone(clone_flags);
    
        p = copy_process(clone_flags, stack_start, regs, stack_size,
                 child_tidptr, NULL, trace);
        /*
         * Do this prior waking up the new thread - the thread pointer
         * might get invalid after that point, if the thread exits quickly.
         */
        if (!IS_ERR(p)) {
            struct completion vfork;
    
            trace_sched_process_fork(current, p);
    
            nr = task_pid_vnr(p);
    
            if (clone_flags & CLONE_PARENT_SETTID)
                put_user(nr, parent_tidptr);
    
            if (clone_flags & CLONE_VFORK) {
                p->vfork_done = &vfork;
                init_completion(&vfork);
            }
    
            audit_finish_fork(p);
            tracehook_report_clone(regs, clone_flags, nr, p);
    
            /*
             * We set PF_STARTING at creation in case tracing wants to
             * use this to distinguish a fully live task from one that
             * hasn't gotten to tracehook_report_clone() yet.  Now we
             * clear it and set the child going.
             */
            p->flags &= ~PF_STARTING;
    
            if (unlikely(clone_flags & CLONE_STOPPED)) {
                /*
                 * We'll start up with an immediate SIGSTOP.
                 */
                sigaddset(&p->pending.signal, SIGSTOP);
                set_tsk_thread_flag(p, TIF_SIGPENDING);
                __set_task_state(p, TASK_STOPPED);
            } else {
                wake_up_new_task(p, clone_flags);
            }
    
            tracehook_report_clone_complete(trace, regs,
                            clone_flags, nr, p);
    
            if (clone_flags & CLONE_VFORK) {
                freezer_do_not_count();
                wait_for_completion(&vfork);
                freezer_count();
                tracehook_report_vfork_done(p, nr);
            }
        } else {
            nr = PTR_ERR(p);
        }
        return nr;
    }
    View Code
  • 相关阅读:
    解决调用未定义 swoole_async_readfile函数问题
    7000字 Redis 超详细总结笔记总 | 收藏必备!
    C/C++语言编程修养
    glib 队列
    sprintf 详解
    json 需替换 特殊字符串
    glib 关系
    glib 简介
    gprof 代码效率测量
    glib 树
  • 原文地址:https://www.cnblogs.com/farbeyond/p/4550325.html
Copyright © 2011-2022 走看看