zoukankan      html  css  js  c++  java
  • 《Linux内核分析》 week6作业-Linux内核fork()系统调用的创建过程

      一.进程控制块PCB-stack_struct

     进程在操作系统中都有一个结构,用于表示这个进程。这就是进程控制块(PCB),在Linux中具体实现是task_struct数据结构,它主要记录了以下信息:

    •  状态信息,例如可执行状态、就绪状态、阻塞状态等。
    •  性质,由于unix有很多变种,进行有自己独特的性质。
    •  资源,资源的链接比如内存,还有资源的限制和权限等。
    •  组织,例如按照家族关系建立起来的树(父进程、子进程等)。

    task_struct结构体内容非常庞大,暂时没有去分析源代码,以后有时间再去研究。

    二.Linux fork执行的过程

       在menu中添加一个fork的系统调用,然后用gdb开始调试.执行以下命令

    qemu -kernel linux-3.18.6/arch/x86/boot/bzImage -initrd rootfs.img -s -s
    gdb
    file linux-3.18.6/vmlinux
    target remote:1234

      然后在sys_fork、sys_clone处设置断点,再逐步调试,观察fork系统调用的执行过程。

       

    具体分析fork系统调用执行过程.

     1.fork、vfork和clone三个系统调用都可以创建一个新进程,而且它们都是通过调用do_fork来实现进程的创建,do_fork通过传递不同的clone_flags来实现fork、clone、vfork。

    long do_fork(unsigned long clone_flags,
    1624          unsigned long stack_start,
    1625          unsigned long stack_size,
    1626          int __user *parent_tidptr,
    1627          int __user *child_tidptr)
    1628{
    1629    struct task_struct *p;
    1630    int trace = 0;
    1631    long nr;
    1632
    1633    /*
    1634     * Determine whether and which event to report to ptracer.  When
    1635     * called from kernel_thread or CLONE_UNTRACED is explicitly
    1636     * requested, no event is reported; otherwise, report if the event
    1637     * for the type of forking is enabled.
    1638     */
    1639    if (!(clone_flags & CLONE_UNTRACED)) {
    1640        if (clone_flags & CLONE_VFORK)
    1641            trace = PTRACE_EVENT_VFORK;
    1642        else if ((clone_flags & CSIGNAL) != SIGCHLD)
    1643            trace = PTRACE_EVENT_CLONE;
    1644        else
    1645            trace = PTRACE_EVENT_FORK;
    1646
    1647        if (likely(!ptrace_event_enabled(current, trace)))
    1648            trace = 0;
    1649    }
    1650    
    1651    p = copy_process(clone_flags, stack_start, stack_size,
    1652             child_tidptr, NULL, trace);    #进程复制,核心函数
    1653    /*
    1654     * Do this prior waking up the new thread - the thread pointer
    1655     * might get invalid after that point, if the thread exits quickly.
    1656     */
    1657    if (!IS_ERR(p)) {
    1658        struct completion vfork;
    1659        struct pid *pid;
    1660
    1661        trace_sched_process_fork(current, p);
    1662
    1663        pid = get_task_pid(p, PIDTYPE_PID);
    1664        nr = pid_vnr(pid);
    1665
    1666        if (clone_flags & CLONE_PARENT_SETTID)
    1667            put_user(nr, parent_tidptr);
    1668
    1669        if (clone_flags & CLONE_VFORK) {
    1670            p->vfork_done = &vfork;
    1671            init_completion(&vfork);
    1672            get_task_struct(p);
    1673        }
    1674
    1675        wake_up_new_task(p);
    1676
    1677        /* forking complete and child started to run, tell ptracer */
    1678        if (unlikely(trace))
    1679            ptrace_event_pid(trace, pid);
    1680
    1681        if (clone_flags & CLONE_VFORK) {
    1682            if (!wait_for_vfork_done(p, &vfork))
    1683                ptrace_event_pid(PTRACE_EVENT_VFORK_DONE, pid);
    1684        }
    1685
    1686        put_pid(pid);
    1687    } else {
    1688        nr = PTR_ERR(p);
    1689    }
    1690    return nr;
    1691}
    1692

    do_fork()函数的核心是copy_process(),该函数完成了进程创建的绝大部分。

    /*
    1175 * This creates a new process as a copy of the old one,
    1176 * but does not actually start it yet.
    1177 *
    1178 * It copies the registers, and all the appropriate
    1179 * parts of the process environment (as per the clone
    1180 * flags). The actual kick-off is left to the caller.
    1181 */
    1182static struct task_struct *copy_process(unsigned long clone_flags,
    1183                    unsigned long stack_start,
    1184                    unsigned long stack_size,
    1185                    int __user *child_tidptr,
    1186                    struct pid *pid,
    1187                    int trace)
    1188{
    1189    int retval;
    1190    struct task_struct *p;
    1191
    1192    if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
    1193        return ERR_PTR(-EINVAL);
    1194
    1195    if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS))
    1196        return ERR_PTR(-EINVAL);
    1197
    1198    /*
    1199     * Thread groups must share signals as well, and detached threads
    1200     * can only be started up within the thread group.
    1201     */
    1202    if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND))
    1203        return ERR_PTR(-EINVAL);
    1204
    1205    /*
    1206     * Shared signal handlers imply shared VM. By way of the above,
    1207     * thread groups also imply shared VM. Blocking this case allows
    1208     * for various simplifications in other code.
    1209     */
    1210    if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM))
    1211        return ERR_PTR(-EINVAL);
    1212
    1213    /*
    1214     * Siblings of global init remain as zombies on exit since they are
    1215     * not reaped by their parent (swapper). To solve this and to avoid
    1216     * multi-rooted process trees, prevent global and container-inits
    1217     * from creating siblings.
    1218     */
    1219    if ((clone_flags & CLONE_PARENT) &&
    1220                current->signal->flags & SIGNAL_UNKILLABLE)
    1221        return ERR_PTR(-EINVAL);
    1222
    1223    /*
    1224     * If the new process will be in a different pid or user namespace
    1225     * do not allow it to share a thread group or signal handlers or
    1226     * parent with the forking task.
    1227     */
    1228    if (clone_flags & CLONE_SIGHAND) {
    1229        if ((clone_flags & (CLONE_NEWUSER | CLONE_NEWPID)) ||
    1230            (task_active_pid_ns(current) !=
    1231                current->nsproxy->pid_ns_for_children))
    1232            return ERR_PTR(-EINVAL);
    1233    }
    1234
    1235    retval = security_task_create(clone_flags);
    1236    if (retval)
    1237        goto fork_out;
    1238
    1239    retval = -ENOMEM;
    1240    p = dup_task_struct(current);  #为子进程创建一个新的内核栈,复制task_struct和thread_info结构,此时子进程的进程控制块和父进程完全一致。
    1241    if (!p)
    1242        goto fork_out;
    1243
    1244    ftrace_graph_init_task(p);
    1245
    1246    rt_mutex_init_task(p);
    1247
    1248#ifdef CONFIG_PROVE_LOCKING
    1249    DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);
    1250    DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
    1251#endif
    1252    retval = -EAGAIN;
    1253    if (atomic_read(&p->real_cred->user->processes) >=
    1254            task_rlimit(p, RLIMIT_NPROC)) {
    1255        if (p->real_cred->user != INIT_USER &&
    1256            !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN))
    1257            goto bad_fork_free;
    1258    }
    1259    current->flags &= ~PF_NPROC_EXCEEDED;
    1260
    1261    retval = copy_creds(p, clone_flags);
    1262    if (retval < 0)
    1263        goto bad_fork_free;
    1264
    1265    /*
    1266     * If multiple threads are within copy_process(), then this check
    1267     * triggers too late. This doesn't hurt, the check is only there
    1268     * to stop root fork bombs.
    1269     */
    1270    retval = -EAGAIN;
    1271    if (nr_threads >= max_threads)
    1272        goto bad_fork_cleanup_count;
    1273
    1274    if (!try_module_get(task_thread_info(p)->exec_domain->module))
    1275        goto bad_fork_cleanup_count;
    1276
    1277    delayacct_tsk_init(p);    /* Must remain after dup_task_struct() */
    1278    p->flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER);
    1279    p->flags |= PF_FORKNOEXEC;
    1280    INIT_LIST_HEAD(&p->children);
    1281    INIT_LIST_HEAD(&p->sibling);
    1282    rcu_copy_process(p);
    1283    p->vfork_done = NULL;
    1284    spin_lock_init(&p->alloc_lock);
    1285
    1286    init_sigpending(&p->pending);
    1287
    1288    p->utime = p->stime = p->gtime = 0;
    ....

    通过dup_task_struct()函数,为子进程创建一个新的内核栈,复制task_struct和thread_info结构。 

    ti=alloc_thread_info_node(task,node);
    tsk->stack=ti;
    setup_thread_stack(tsk,orig); //这里只是复制了thread_info

    重点关注下,fork()创建子进程后,父进程从系统调用中返回,而子进程从哪开始返回.

    这主要是在copy_process()中copy_thread()代码.

    int copy_thread(unsigned long clone_flags, unsigned long sp,
    133    unsigned long arg, struct task_struct *p)
    134{
    135    struct pt_regs *childregs = task_pt_regs(p);
    136    struct task_struct *tsk;
    137    int err;
    138
    139    p->thread.sp = (unsigned long) childregs; #记录进程切换时的堆栈指针
    140    p->thread.sp0 = (unsigned long) (childregs+1);
    141    memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
    142
    143    if (unlikely(p->flags & PF_KTHREAD)) {
    144        /* kernel thread */
    145        memset(childregs, 0, sizeof(struct pt_regs));
    146        p->thread.ip = (unsigned long) ret_from_kernel_thread;
    147        task_user_gs(p) = __KERNEL_STACK_CANARY;
    148        childregs->ds = __USER_DS;
    149        childregs->es = __USER_DS;
    150        childregs->fs = __KERNEL_PERCPU;
    151        childregs->bx = sp;    /* function */
    152        childregs->bp = arg;
    153        childregs->orig_ax = -1;
    154        childregs->cs = __KERNEL_CS | get_kernel_rpl();
    155        childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED;
    156        p->thread.io_bitmap_ptr = NULL;
    157        return 0;
    158    }
    159    *childregs = *current_pt_regs();#复制内核堆栈
    160    childregs->ax = 0; #这也是为什么子进程的fork返回0
    161    if (sp)
    162        childregs->sp = sp; 
    163
    164    p->thread.ip = (unsigned long) ret_from_fork; #子进程开始执行处
    165    task_user_gs(p) = get_user_gs(current_pt_regs());
    166
    167    p->thread.io_bitmap_ptr = NULL;
    168    tsk = current;
    169    err = -ENOMEM;
    170
    171    if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
    172        p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr,
    173                        IO_BITMAP_BYTES, GFP_KERNEL);
    174        if (!p->thread.io_bitmap_ptr) {
    175            p->thread.io_bitmap_max = 0;
    176            return -ENOMEM;
    177        }
    178        set_tsk_thread_flag(p, TIF_IO_BITMAP);
    179    }
    180
    181    err = 0;
    182
    183    /*
    184     * Set a new TLS for the child thread?
    185     */
    186    if (clone_flags & CLONE_SETTLS)
    187        err = do_set_thread_area(p, -1,
    188            (struct user_desc __user *)childregs->si, 0);
    189
    190    if (err && p->thread.io_bitmap_ptr) {
    191        kfree(p->thread.io_bitmap_ptr);
    192        p->thread.io_bitmap_max = 0;
    193    }
    194    return err;
       } 

       然后回到do_fork()函数中,唤醒子进程并开始运行。至此,一个进程创建就完成了。

    三.实验总结

     中间虽然的很多细节还不是很清楚,但是对linux 创建子进程的大体流程有了一个宏观的认识,更加深刻地理解了底层Linux 内核进程运行的机制。  

  • 相关阅读:
    Java.Util.List(List接口)
    在VMware安装Centos7
    java中原生的发送http请求(无任何的jar包导入)
    二叉树算法的收集
    javascript将list转换成树状结构
    CSS实现鼠标悬浮无限向下级展示的简单代码
    Jquery的框架解析
    mybaits插入时的一些总结
    苹果手机在有滚动条的情况下,滑动不顺畅的原因
    tomcat下jndi的三种配置方式
  • 原文地址:https://www.cnblogs.com/sixue/p/4469177.html
Copyright © 2011-2022 走看看