Linux内核进程创建do_fork()解析

zoukankan html css js c++ java

Linux内核进程创建do_fork()解析
内核创建进行所进行的工作，本文阅读的内核代码为Linux kernel 2.6。

　　进程创建的大部分工作由do_fork这个函数完成，函数原型如下：
long do_fork(unsigned long clone_flags, unsigned long stack_start, struct pt_regs *regs, unsigned long stack_size, int __user *parent_tidptr, int __user *child_tidptr)

View Code
1、首先进行一些参数及权限的检查。
if (clone_flags & CLONE_NEWUSER) { if (clone_flags & CLONE_THREAD) return -EINVAL; /* hopefully this check will go away when userns support is * complete */ if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SETUID) || !capable(CAP_SETGID)) return -EPERM; }

View Code
2、进行状态的检查。这里主要是进行进程停止状态的检查。
if (unlikely(clone_flags & CLONE_STOPPED)) { static int __read_mostly count = 100; if (count > 0 && printk_ratelimit()) { char comm[TASK_COMM_LEN]; count--; printk(KERN_INFO "fork(): process `%s' used deprecated " "clone flags 0x%lx ", get_task_comm(comm, current), clone_flags & CLONE_STOPPED); } }

View Code
3、用户空间检查，下面这段代码比较有用。主要是进行准备复制准备工作然后复制当前进程。

/* * When called from kernel_thread, don't do user tracing stuff. */

if (likely(user_mode(regs)))   trace = tracehook_prepare_clone(clone_flags);

p = copy_process(clone_flags, stack_start, regs, stack_size,     child_tidptr, NULL, trace);

首先是用户空间压栈操作，保存寄存器。其中regs是这么一个参数。

struct pt_regs {
unsigned long long pc;
unsigned long long sr;
long long syscall_nr;
unsigned long long regs[63];
unsigned long long tregs[8];
unsigned long long pad[2];
};

从结构体中的成员可以看到，包含

pc：程序计数器

sr：scratch寄存器

syscall_nr：系统调用

总之，这里是包含进程在退出cpu时所需的最小信息。

p = copy_process(clone_flags, stack_start, regs, stack_size,     child_tidptr, NULL, trace);

这里即复制一个进程。返回值为task_struct的结构体，该结构描述了一个进程的基本状态。这里并不进行详细的介绍。

4,、对创建的进程进行一些错误检查。这里发生的可能性不大，可以暂时先不管，把握住我们的主线。

if (!IS_ERR(p)) {   struct completion vfork;

  trace_sched_process_fork(current, p);

  nr = task_pid_vnr(p);

  if (clone_flags & CLONE_PARENT_SETTID)    put_user(nr, parent_tidptr);

  if (clone_flags & CLONE_VFORK) {    p->vfork_done = &vfork;    init_completion(&vfork);

}

5、然后接着下面两个函数。

audit_finish_fork(p);//主要是检查完成的进程的状态。
  tracehook_report_clone(regs, clone_flags, nr, p);//主要是阻塞刚刚创建的子进程，因为现在还是在父进程进程中，子进程并未开始执行，暂时挂起子进程。下面为函数解释。

/**
* tracehook_report_clone - in parent, new child is about to start running
* @regs:  parent's user register state
* @clone_flags: flags from parent's system call
* @pid:  new child's PID in the parent's namespace
* @child:  new child task
*
* Called after a child is set up, but before it has been started running.
* This is not a good place to block, because the child has not started
* yet. Suspend the child here if desired, and then block in
* tracehook_report_clone_complete(). This must prevent the child from
* self-reaping if tracehook_report_clone_complete() uses the @child
* pointer; otherwise it might have died and been released by the time
* tracehook_report_clone_complete() is called.
*
* Called with no locks held, but the child cannot run until this returns.
*/

6、设置进程标志位。

  /*
   * We set PF_STARTING at creation in case tracing wants to
   * use this to distinguish a fully live task from one that
   * hasn't gotten to tracehook_report_clone() yet. Now we
   * clear it and set the child going.
   */
  p->flags &= ~PF_STARTING;

（PF_STARTING宏定义解释为：#define PF_STARTING 0x00000002 /* being created */，表明该进程已创建）

7、唤醒进程。这里先判断复制标志是否为 CLONE_STOPPED状态，但是大多数情形下，并不为CLONE_STOPPED状态。

if (unlikely(clone_flags & CLONE_STOPPED)) {
   /*
    * We'll start up with an immediate SIGSTOP.
    */
   sigaddset(&p->pending.signal, SIGSTOP);
   set_tsk_thread_flag(p, TIF_SIGPENDING);
   __set_task_state(p, TASK_STOPPED);
  } else {
   wake_up_new_task(p, clone_flags);
  }

  tracehook_report_clone_complete(trace, regs,
      clone_flags, nr, p);/*这个函数主要是报告当前之前阻塞的子进程已经开始运行*/

下面是wake_up_new_task函数功能解释。主要功能是首次唤醒创建的进程，同时完成一些初始化调度的所需的工作，并将进程放入运行队列中。

关于一个进程如何添加到队列中去，可以从这里进行研究。本文先不对此进行研究，还是放在进程的创建上来。

/*
* wake_up_new_task - wake up a newly created task for the first time.
*
* This function will do some initial scheduler statistics housekeeping
* that must be done for every newly created context, then puts the task
* on the runqueue and wakes it.
*/

8、检查clone_flags标志位。若当前标志位为 CLONE_VFORK（#define CLONE_VFORK 0x00004000 /* set if the parent wants the child to wake it up on mm_release */），该标志位表明父进程想在内存释放后唤醒， wait_for_completion(&vfork);中有个自旋锁的操作，主要是等待由用户空间返回内核空间。

if (clone_flags & CLONE_VFORK) {
   freezer_do_not_count();
   wait_for_completion(&vfork);
   freezer_count();
   tracehook_report_vfork_done(p, nr);
  }

9、完成所有操作，返回。其中返回值为新的进程的pid。

} else {
  nr = PTR_ERR(p);
}
return nr;

10、下面是完整的程序。
/* * Ok, this is the main fork-routine. * * It copies the process, and if successful kick-starts * it and waits for it to finish using the VM if required. */ long do_fork(unsigned long clone_flags, unsigned long stack_start, struct pt_regs *regs, unsigned long stack_size, int __user *parent_tidptr, int __user *child_tidptr) { struct task_struct *p; int trace = 0; long nr; /* * Do some preliminary argument and permissions checking before we * actually start allocating stuff */ if (clone_flags & CLONE_NEWUSER) { if (clone_flags & CLONE_THREAD) return -EINVAL; /* hopefully this check will go away when userns support is * complete */ if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SETUID) || !capable(CAP_SETGID)) return -EPERM; } /* * We hope to recycle these flags after 2.6.26 */ if (unlikely(clone_flags & CLONE_STOPPED)) { static int __read_mostly count = 100; if (count > 0 && printk_ratelimit()) { char comm[TASK_COMM_LEN]; count--; printk(KERN_INFO "fork(): process `%s' used deprecated " "clone flags 0x%lx ", get_task_comm(comm, current), clone_flags & CLONE_STOPPED); } } /* * When called from kernel_thread, don't do user tracing stuff. */ if (likely(user_mode(regs))) trace = tracehook_prepare_clone(clone_flags); p = copy_process(clone_flags, stack_start, regs, stack_size, child_tidptr, NULL, trace); /* * Do this prior waking up the new thread - the thread pointer * might get invalid after that point, if the thread exits quickly. */ if (!IS_ERR(p)) { struct completion vfork; trace_sched_process_fork(current, p); nr = task_pid_vnr(p); if (clone_flags & CLONE_PARENT_SETTID) put_user(nr, parent_tidptr); if (clone_flags & CLONE_VFORK) { p->vfork_done = &vfork; init_completion(&vfork); } audit_finish_fork(p); tracehook_report_clone(regs, clone_flags, nr, p); /* * We set PF_STARTING at creation in case tracing wants to * use this to distinguish a fully live task from one that * hasn't gotten to tracehook_report_clone() yet. Now we * clear it and set the child going. */ p->flags &= ~PF_STARTING; if (unlikely(clone_flags & CLONE_STOPPED)) { /* * We'll start up with an immediate SIGSTOP. */ sigaddset(&p->pending.signal, SIGSTOP); set_tsk_thread_flag(p, TIF_SIGPENDING); __set_task_state(p, TASK_STOPPED); } else { wake_up_new_task(p, clone_flags); } tracehook_report_clone_complete(trace, regs, clone_flags, nr, p); if (clone_flags & CLONE_VFORK) { freezer_do_not_count(); wait_for_completion(&vfork); freezer_count(); tracehook_report_vfork_done(p, nr); } } else { nr = PTR_ERR(p); } return nr; }

View Code
查看全文

相关阅读:
Python的数据类型--数字--字符串
 python基本--数据类型
 系统分区，硬盘格式化，
linux 用户创建，权限，分组
 协程
 进程
 线程与进程--线程三把锁
 线程
 socket网络编程-字典
 socket网络编程

原文地址：https://www.cnblogs.com/farbeyond/p/4550325.html