进程的描述和进程的创建
操作系统内核实现的操作系统三大管理功能:
- 进程管理
- 内存管理
- 文件系统
进程描述符
linux中用一个数据结构struct task_struct来描述进程,称为进程描述符,具体结构示意图如下:
其中
- state 运行状态
- stack 进程堆栈
- struct list_head tasks 进程链表
- 把所有进程都用双向链表链起来,如下图:
操作系统原理进程状态
操作系统原理进程状态主要有:‘就绪态’,‘运行态’,‘阻塞态’三种。具体状态转换图如下:
0号进程的初始化
init_task为第一个进程(0号进程)的进程描述符结构体变量,其初始化是通过硬编码方式固定下来的。除此之外,其他所有进程的初始化都是通过do_fork复制父进程的方式初始化的。INIT_TASK宏定义摘录如下:
#define INIT_TASK(tsk)
174{
175 .state = 0,
176 .stack = &init_thread_info,
177 .usage = ATOMIC_INIT(2),
178 .flags = PF_KTHREAD,
179 .prio = MAX_PRIO-20,
180 .static_prio = MAX_PRIO-20,
181 .normal_prio = MAX_PRIO-20,
182 .policy = SCHED_NORMAL,
183 .cpus_allowed = CPU_MASK_ALL,
184 .nr_cpus_allowed= NR_CPUS,
185 .mm = NULL,
186 .active_mm = &init_mm,
187 .se = {
188 .group_node = LIST_HEAD_INIT(tsk.se.group_node),
189 },
190 .rt = {
191 .run_list = LIST_HEAD_INIT(tsk.rt.run_list),
192 .time_slice = RR_TIMESLICE,
193 },
194 .tasks = LIST_HEAD_INIT(tsk.tasks),
195 INIT_PUSHABLE_TASKS(tsk)
196 INIT_CGROUP_SCHED(tsk)
197 .ptraced = LIST_HEAD_INIT(tsk.ptraced),
198 .ptrace_entry = LIST_HEAD_INIT(tsk.ptrace_entry),
199 .real_parent = &tsk,
200 .parent = &tsk,
201 .children = LIST_HEAD_INIT(tsk.children),
202 .sibling = LIST_HEAD_INIT(tsk.sibling),
203 .group_leader = &tsk,
204 RCU_POINTER_INITIALIZER(real_cred, &init_cred),
205 RCU_POINTER_INITIALIZER(cred, &init_cred),
206 .comm = INIT_TASK_COMM,
207 .thread = INIT_THREAD,
208 .fs = &init_fs,
209 .files = &init_files,
210 .signal = &init_signals,
211 .sighand = &init_sighand,
212 .nsproxy = &init_nsproxy,
213 .pending = {
214 .list = LIST_HEAD_INIT(tsk.pending.list),
215 .signal = {{0}}},
216 .blocked = {{0}},
217 .alloc_lock = __SPIN_LOCK_UNLOCKED(tsk.alloc_lock),
218 .journal_info = NULL,
219 .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers),
220 .pi_lock = __RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock),
221 .timer_slack_ns = 50000, /* 50 usec default slack */
222 .pids = {
223 [PIDTYPE_PID] = INIT_PID_LINK(PIDTYPE_PID),
224 [PIDTYPE_PGID] = INIT_PID_LINK(PIDTYPE_PGID),
225 [PIDTYPE_SID] = INIT_PID_LINK(PIDTYPE_SID),
226 },
227 .thread_group = LIST_HEAD_INIT(tsk.thread_group),
228 .thread_node = LIST_HEAD_INIT(init_signals.thread_head),
229 INIT_IDS
230 INIT_PERF_EVENTS(tsk)
231 INIT_TRACE_IRQFLAGS
232 INIT_LOCKDEP
233 INIT_FTRACE_GRAPH
234 INIT_TRACE_RECURSION
235 INIT_TASK_RCU_PREEMPT(tsk)
236 INIT_TASK_RCU_TASKS(tsk)
237 INIT_CPUSET_SEQ(tsk)
238 INIT_RT_MUTEXES(tsk)
239 INIT_VTIME(tsk)
240}
进程之间父子兄弟关系
进程的描述符数据结构中记录了当前进程的父进程real_parent,记录当前进程的子进程的是双向链表struct list_head children ,记录当前进程的兄弟进程的是双向链表struct list_head sibling。如下图为父子、兄弟关系示意图:
图中,P0有P1P2P3三个儿子,P1有两个兄弟,P3还有一个儿子,彼此之间用指针或双向链表相连。
进程的创建过程分析
fork系统调用具体过程:
举例演示怎样在用户态下创建一个子进程。
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
int main()
{
int pid;
pid = fork();
if(pid < 0)
{
fprintf(stderr,"Fork Failed!");
exit(-1);
}
else if(pid = 0)
{
printf("This is Child Process!
");
}
else
{
printf("This is Parent Process!
");
wait(NULL);
printf("Child Complete!
");
}
}
运行截图如下:
此时会出现一个疑问,上述代码中,else if 与else都被执行,也就是fork的返回值竟然有两个,实际上fork系统调用把当前进程又复制了一个子进程,只是fork系统调用在父进程和子进程中的返回值不同。
fork、vfork、clone创建新进程
以下代码为fork、vfork、clone三个系统调用内核处理函数。
/*
1694 * Create a kernel thread.
1695 */
1696pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
1697{
1698 return do_fork(flags|CLONE_VM|CLONE_UNTRACED, (unsigned long)fn,
1699 (unsigned long)arg, NULL, NULL);
1700}
1701
1702#ifdef __ARCH_WANT_SYS_FORK
1703SYSCALL_DEFINE0(fork)
1704{
1705#ifdef CONFIG_MMU
1706 return do_fork(SIGCHLD, 0, 0, NULL, NULL);
1707#else
1708 /* can not support in nommu mode */
1709 return -EINVAL;
1710#endif
1711}
1712#endif
1713
1714#ifdef __ARCH_WANT_SYS_VFORK
1715SYSCALL_DEFINE0(vfork)
1716{
1717 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0,
1718 0, NULL, NULL);
1719}
1720#endif
1721
1722#ifdef __ARCH_WANT_SYS_CLONE
1723#ifdef CONFIG_CLONE_BACKWARDS
1724SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
1725 int __user *, parent_tidptr,
1726 int, tls_val,
1727 int __user *, child_tidptr)
1728#elif defined(CONFIG_CLONE_BACKWARDS2)
1729SYSCALL_DEFINE5(clone, unsigned long, newsp, unsigned long, clone_flags,
1730 int __user *, parent_tidptr,
1731 int __user *, child_tidptr,
1732 int, tls_val)
1733#elif defined(CONFIG_CLONE_BACKWARDS3)
1734SYSCALL_DEFINE6(clone, unsigned long, clone_flags, unsigned long, newsp,
1735 int, stack_size,
1736 int __user *, parent_tidptr,
1737 int __user *, child_tidptr,
1738 int, tls_val)
1739#else
1740SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
1741 int __user *, parent_tidptr,
1742 int __user *, child_tidptr,
1743 int, tls_val)
1744#endif
1745{
1746 return do_fork(clone_flags, newsp, 0, parent_tidptr, child_tidptr);
1747}
1748#endif
通过上述代码可看出,三个系统调用都可以创建一个新进程,而且都是通过do_fork函数来创建进程的,只不过传递参数不同。
do_fork函数
源码:
/*
1618 * Ok, this is the main fork-routine.
1619 *
1620 * It copies the process, and if successful kick-starts
1621 * it and waits for it to finish using the VM if required.
1622 */
1623long do_fork(unsigned long clone_flags,
1624 unsigned long stack_start,
1625 unsigned long stack_size,
1626 int __user *parent_tidptr,
1627 int __user *child_tidptr)
1628{
1629 struct task_struct *p;
1630 int trace = 0;
1631 long nr;
1632
1633 /*
1634 * Determine whether and which event to report to ptracer. When
1635 * called from kernel_thread or CLONE_UNTRACED is explicitly
1636 * requested, no event is reported; otherwise, report if the event
1637 * for the type of forking is enabled.
1638 */
1639 if (!(clone_flags & CLONE_UNTRACED)) {
1640 if (clone_flags & CLONE_VFORK)
1641 trace = PTRACE_EVENT_VFORK;
1642 else if ((clone_flags & CSIGNAL) != SIGCHLD)
1643 trace = PTRACE_EVENT_CLONE;
1644 else
1645 trace = PTRACE_EVENT_FORK;
1646
1647 if (likely(!ptrace_event_enabled(current, trace)))
1648 trace = 0;
1649 }
1650
1651 p = copy_process(clone_flags, stack_start, stack_size,
1652 child_tidptr, NULL, trace);
1653 /*
1654 * Do this prior waking up the new thread - the thread pointer
1655 * might get invalid after that point, if the thread exits quickly.
1656 */
1657 if (!IS_ERR(p)) {
1658 struct completion vfork;
1659 struct pid *pid;
1660
1661 trace_sched_process_fork(current, p);
1662
1663 pid = get_task_pid(p, PIDTYPE_PID);
1664 nr = pid_vnr(pid);
1665
1666 if (clone_flags & CLONE_PARENT_SETTID)
1667 put_user(nr, parent_tidptr);
1668
1669 if (clone_flags & CLONE_VFORK) {
1670 p->vfork_done = &vfork;
1671 init_completion(&vfork);
1672 get_task_struct(p);
1673 }
1674
1675 wake_up_new_task(p);
1676
1677 /* forking complete and child started to run, tell ptracer */
1678 if (unlikely(trace))
1679 ptrace_event_pid(trace, pid);
1680
1681 if (clone_flags & CLONE_VFORK) {
1682 if (!wait_for_vfork_done(p, &vfork))
1683 ptrace_event_pid(PTRACE_EVENT_VFORK_DONE, pid);
1684 }
1685
1686 put_pid(pid);
1687 } else {
1688 nr = PTR_ERR(p);
1689 }
1690 return nr;
1691}
do_fork函数参数
- clone_flags:子进程创建相关标志,通过此标志可对父进程的资源进行有选择的复制。
- stack_start:子进程用户态堆栈地址。
- regs:指向pt_regs结构体的指针。
- stack_size:用户堆栈的大小,通常设为零。
- parent_tidptr和child_tidptr:父进程、子进程用户态下的pid地址。
实验跟踪进程创建过程
更换menu文件,进入gdb,分别在sys_clone、do_fork、dup_task_struct、copy_process、copy_thread、ret_from_fork处设置断点,跟踪fork进程创建过程,实验截图如下: