zoukankan      html  css  js  c++  java
  • Linux进程管理 (1)进程的诞生

    专题:Linux进程管理专题

    目录:

    Linux进程管理 (1)进程的诞生

    Linux进程管理 (2)CFS调度器

    Linux进程管理 (3)SMP负载均衡

    Linux进程管理 (4)HMP调度器

    Linux进程管理 (5)NUMA调度器

    Linux进程管理 (6)EAS绿色节能调度器

    Linux进程管理 (7)实时调度

    Linux进程管理 (8)最新更新与展望

    Linux进程管理 (篇外)内核线程

    关键词:swapper、init_task、fork

    Linux内核通常把进程叫作任务,进程控制块(PCB Processing Control Block)用struct task_struct表示。

    线程是轻量级进程,是操作系统做小调度单元,一个进程可以拥有多个线程。

    线程之所以被称为轻量级,是因为共享进程的资源空间。线程和进程使用相同的进程PCB数据结构。

    内核使用clone方法创建线程,类似于fork方法,但会确定哪些资源和父进程共享,哪些资源为线程独享。

    1. init进程

    init进程也称为swapper进程或者idle进程,是在Linux启动是的第一个进程。

    idle进程在内核启动(start_kernel())时静态创建,所有的核心数据结构都静态赋值。

    当系统没有进程需要调度时,调度器就会执行idle进程。

    start_kernel
      ->rest_init
        ->cpu_startup_entry
          ->cpu_idle_loop

      

    1.1 init_task

    init_task进程的task_struct数据结构通过INIT_TASK宏来赋值。

    /* Initial task structure */
    struct task_struct init_task = INIT_TASK(init_task);
    EXPORT_SYMBOL(init_task);

    INIT_TASK用来填充init_task数据结构。

    #define INIT_TASK(tsk)    
    {                                    
        .state        = 0,                        
        .stack        = &init_thread_info,                -------#define init_thread_info (init_thread_union.thread_info)
        .usage        = ATOMIC_INIT(2),                
        .flags        = PF_KTHREAD,                    ----------表明是一个内核线程
        .prio        = MAX_PRIO-20,                    ----------MAX_PRIO为140,此处prio为120,对应的nice值为0.关于prio和nice参考:prio和nice之间的关系
        .static_prio    = MAX_PRIO-20,                    
        .normal_prio    = MAX_PRIO-20,                    
        .policy        = SCHED_NORMAL,                    -------调度策略是SCHED_NORMAL。
        .cpus_allowed    = CPU_MASK_ALL,                    
        .nr_cpus_allowed= NR_CPUS,                    
        .mm        = NULL,                        
        .active_mm    = &init_mm,                    ------------idle进程的内存管理结构数据
        .restart_block = {                        
            .fn = do_no_restart_syscall,                
        },                                
        .se        = {                        
            .group_node     = LIST_HEAD_INIT(tsk.se.group_node),    
        },                                
        .rt        = {                        
            .run_list    = LIST_HEAD_INIT(tsk.rt.run_list),    
            .time_slice    = RR_TIMESLICE,                
        },                                
        .tasks        = LIST_HEAD_INIT(tsk.tasks),            
        INIT_PUSHABLE_TASKS(tsk)                    
        INIT_CGROUP_SCHED(tsk)                        
        .ptraced    = LIST_HEAD_INIT(tsk.ptraced),            
        .ptrace_entry    = LIST_HEAD_INIT(tsk.ptrace_entry),        
        .real_parent    = &tsk,                        
        .parent        = &tsk,                        
        .children    = LIST_HEAD_INIT(tsk.children),            
        .sibling    = LIST_HEAD_INIT(tsk.sibling),            
        .group_leader    = &tsk,                        
        RCU_POINTER_INITIALIZER(real_cred, &init_cred),            
        RCU_POINTER_INITIALIZER(cred, &init_cred),            
        .comm        = INIT_TASK_COMM,                
        .thread        = INIT_THREAD,                    
        .fs        = &init_fs,                    
        .files        = &init_files,                    
        .signal        = &init_signals,                
        .sighand    = &init_sighand,                
        .nsproxy    = &init_nsproxy,                
        .pending    = {                        
            .list = LIST_HEAD_INIT(tsk.pending.list),        
            .signal = {{0}}},                    
        .blocked    = {{0}},                    
        .alloc_lock    = __SPIN_LOCK_UNLOCKED(tsk.alloc_lock),        
        .journal_info    = NULL,                        
        .cpu_timers    = INIT_CPU_TIMERS(tsk.cpu_timers),        
        .pi_lock    = __RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock),    
        .timer_slack_ns = 50000, /* 50 usec default slack */        
        .pids = {                            
            [PIDTYPE_PID]  = INIT_PID_LINK(PIDTYPE_PID),        
            [PIDTYPE_PGID] = INIT_PID_LINK(PIDTYPE_PGID),        
            [PIDTYPE_SID]  = INIT_PID_LINK(PIDTYPE_SID),        
        },                                
        .thread_group    = LIST_HEAD_INIT(tsk.thread_group),        
        .thread_node    = LIST_HEAD_INIT(init_signals.thread_head),    
        INIT_IDS                            
        INIT_PERF_EVENTS(tsk)                        
        INIT_TRACE_IRQFLAGS                        
        INIT_LOCKDEP                            
        INIT_FTRACE_GRAPH                        
        INIT_TRACE_RECURSION                        
        INIT_TASK_RCU_PREEMPT(tsk)                    
        INIT_TASK_RCU_TASKS(tsk)                    
        INIT_CPUSET_SEQ(tsk)                        
        INIT_RT_MUTEXES(tsk)                        
        INIT_PREV_CPUTIME(tsk)                        
        INIT_VTIME(tsk)                            
        INIT_NUMA_BALANCING(tsk)                    
        INIT_KASAN(tsk)                            
    }

    1.2 thread_info、thread_union、task_struct关系

    thread_union包括thread_info和内核栈;

    task_struct的stack指向init_thread_union.thread_info。

     

    内核栈示意图

    1.2.1 init_thread_info

    init_thread_info被__init_task_data修饰,所以它会被固定在.data..init_task段中。

    /*
     * Initial thread structure. Alignment of this is handled by a special
     * linker map entry.
     */
    union thread_union init_thread_union __init_task_data =
        { INIT_THREAD_INFO(init_task) };
    
    
    #define __init_task_data __attribute__((__section__(".data..init_task")))

    下面看看.data..init_task段,在vmlinux.lds.S链接文件中定义了大小和位置。

    可以看出在_data开始的地方保留了一块2页大小的空间,存放init_task_info。

    SECTIONS
    {
    ...
        .data : AT(__data_loc) {
            _data = .;        /* address in memory */
            _sdata = .;
    
            /*
             * first, the init task union, aligned
             * to an 8192 byte boundary.
             */
            INIT_TASK_DATA(THREAD_SIZE)------------------------------存放在_data开始地方,2页大小,即8KB。
    ...
            _edata = .;
        }
        _edata_loc = __data_loc + SIZEOF(.data);
    ...
    }
    
    #define INIT_TASK_DATA(align)                        
        . = ALIGN(align);                        
        *(.data..init_task)
    
    
    #define THREAD_SIZE_ORDER    1
    #define THREAD_SIZE        (PAGE_SIZE << THREAD_SIZE_ORDER)
    #define THREAD_START_SP        (THREAD_SIZE - 8)

    init_thread_info是thread_union联合体,被固定为8KB大小。

    union thread_union {
        struct thread_info thread_info;
        unsigned long stack[THREAD_SIZE/sizeof(long)];
    };

     

    init_thread_info中包含了struct thread_info类型数据结构,它是由INIT_THREAD_INFO进行初始化。

    struct thread_info {
        unsigned long        flags;        /* low level flags */
        int            preempt_count;    /* 0 => preemptable, <0 => bug */
        mm_segment_t        addr_limit;    /* address limit */
        struct task_struct    *task;        /* main task structure */
        struct exec_domain    *exec_domain;    /* execution domain */
        __u32            cpu;        /* cpu */
        __u32            cpu_domain;    /* cpu domain */
        struct cpu_context_save    cpu_context;    /* cpu context */
        __u32            syscall;    /* syscall number */
        __u8            used_cp[16];    /* thread used copro */
        unsigned long        tp_value[2];    /* TLS registers */
    #ifdef CONFIG_CRUNCH
        struct crunch_state    crunchstate;
    #endif
        union fp_state        fpstate __attribute__((aligned(8)));
        union vfp_state        vfpstate;
    #ifdef CONFIG_ARM_THUMBEE
        unsigned long        thumbee_state;    /* ThumbEE Handler Base register */
    #endif
    };
    
    #define INIT_THREAD_INFO(tsk)                        
    {                                    
        .task        = &tsk,                        
        .exec_domain    = &default_exec_domain,                
        .flags        = 0,                        
        .preempt_count    = INIT_PREEMPT_COUNT,                
        .addr_limit    = KERNEL_DS,                    
        .cpu_domain    = domain_val(DOMAIN_USER, DOMAIN_MANAGER) |    
                  domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) |    
                  domain_val(DOMAIN_IO, DOMAIN_CLIENT),        
    }

    1.2.2 init_task内核栈

    ARM32处理器从汇编跳转到C语言的入口点start_kernel()函数之前,设置了SP寄存器指向8KB内核栈顶部区域,其中预留了8B空洞。

    /*
     * The following fragment of code is executed with the MMU on in MMU mode,
     * and uses absolute addresses; this is not position independent.
     *
     *  r0  = cp#15 control register
     *  r1  = machine ID
     *  r2  = atags/dtb pointer
     *  r9  = processor ID
     */
        __INIT
    __mmap_switched:
        adr    r3, __mmap_switched_data
    
        ldmia    r3!, {r4, r5, r6, r7}
    ...
     ARM(    ldmia    r3, {r4, r5, r6, r7, sp})
     THUMB(    ldmia    r3, {r4, r5, r6, r7}    )
     THUMB(    ldr    sp, [r3, #16]        )
    ...
        b    start_kernel------------------------------------------------跳转到start_kernel函数
    ENDPROC(__mmap_switched)
    
        .align    2
        .type    __mmap_switched_data, %object
    __mmap_switched_data:
        .long    __data_loc            @ r4
        .long    _sdata                @ r5
        .long    __bss_start            @ r6
        .long    _end                @ r7
        .long    processor_id            @ r4
        .long    __machine_arch_type        @ r5
        .long    __atags_pointer            @ r6
    #ifdef CONFIG_CPU_CP15
        .long    cr_alignment            @ r7
    #else
        .long    0                @ r7
    #endif
        .long    init_thread_union + THREAD_START_SP @ sp-----------------定义了SP寄存器的值,指向8KB栈空间顶部。
        .size    __mmap_switched_data, . - __mmap_switched_data

    1.2.3 从sp到current逆向查找

    内核中用一个current常量获取当前进程task_structg数据结构,从sp到current的流程如下:

    1. 通过SP寄存器获取当前内核栈指针。
    2. 栈指针对齐后获取struct thread_info数据结构指针
    3. 通过thread_info->task成员获取task_struct数据结构

    可以和内核栈示意图结合看。

    #define get_current() (current_thread_info()->task)
    #define current get_current()
    
    /*
     * how to get the current stack pointer in C
     */
    register unsigned long current_stack_pointer asm ("sp");
    
    /*
     * how to get the thread information struct from C
     */
    static inline struct thread_info *current_thread_info(void) __attribute_const__;
    
    static inline struct thread_info *current_thread_info(void)
    {
        return (struct thread_info *)
            (current_stack_pointer & ~(THREAD_SIZE - 1));
    }

    2. fork

     Linux通过fork、vfork、clone等系统调用来建立线程或进程,在内核中这三个系统调用都通过一个函数来实现,即do_fork()。也包括内核线程kernel_thread。

    do_fork定义在fork.c中,下面四个封装接口的区别就在于其传递的参数。

    /*
     * Create a kernel thread.
     */
    pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
    {
        return do_fork(flags|CLONE_VM|CLONE_UNTRACED, (unsigned long)fn,
            (unsigned long)arg, NULL, NULL);
    }
    
    SYSCALL_DEFINE0(fork)
    {
        return do_fork(SIGCHLD, 0, 0, NULL, NULL);
    }
    
    SYSCALL_DEFINE0(vfork)
    {
        return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0,
                0, NULL, NULL);
    }
    
    SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
             int __user *, parent_tidptr,
             int, tls_val,
             int __user *, child_tidptr)
    {
        return do_fork(clone_flags, newsp, 0, parent_tidptr, child_tidptr);
    }
    

    fork只使用用了SIGCHLD标志位在紫禁城终止后发送SIGCHLD信号通知父进程。fork是重量级应用,为子进程建立了一个基于父进程的完整副本,然后子进程基于此运行。

    但是采用了COW技术,子进程只复制父进程页表,而不复制页面内容。当子进程需要写入内容时才触发写时复制机制,为子进程创建一个副本。

    vfork比fork多了连个标志位:CLONE_VFORK表示父进程会被挂起,直至子进程释放虚拟内存资源;CLONE_VM表示父子进程运行在相同的内存空空间中。

    在fork实现COW技术后,vfork意义已经不大。

    clone用于创建线程,并且参数通过寄存器从用户空间传递下来,通常会指定新的栈地址newsp。借助clone_flags,clone给了用户更大的选择空间,他可以是fork/vfork,也可以和父进程共用资源。

    kernel_thread用于创建内核线程,CLONE_VM表示和父进程共享内存资源;CLONE_UNTRACED表示线程不能被设置CLONE_PTRACE。

    简单来说fork重,vfork趋淘汰,clone轻,kernel_thread内核。

    2.1 do_fork及其参数解释

    do_fork有5个参数:

    • clone_flags:创建进程的标志位集合
    • stack_start:用户态栈的起始地址
    • stack_size:用户态栈的大小
    • parent_tidptr和child_tidptr:指向用户空间地址的两个指针,分别指向父子进程PID。

    其中clone_flags是影响do_fork行为的重要参数:

    /*
     * cloning flags:
     */
    #define CSIGNAL        0x000000ff    /* signal mask to be sent at exit */
    #define CLONE_VM    0x00000100    /* set if VM shared between processes */-------------------------父子进程运行在同一个虚拟空间
    #define CLONE_FS    0x00000200    /* set if fs info shared between processes */--------------------父子进程共享文件系统信息
    #define CLONE_FILES    0x00000400    /* set if open files shared between processes */--------------父子进程共享文件描述符表
    #define CLONE_SIGHAND    0x00000800    /* set if signal handlers and blocked signals shared */-----父子进程共享信号处理函数表
    #define CLONE_PTRACE    0x00002000    /* set if we want to let tracing continue on the child too */---------父进程被跟踪ptrace,子进程也会被跟踪。
    #define CLONE_VFORK    0x00004000    /* set if the parent wants the child to wake it up on mm_release */----在创建子进程时启动完成机制completion,wait_for_completion()会使父进程进入睡眠等待,知道子进程调用execve()或exit()释放虚拟内存资源。
    #define CLONE_PARENT    0x00008000    /* set if we want to have the same parent as the cloner */------------新创建的进程是兄弟关系,而不是父子关系。
    #define CLONE_THREAD    0x00010000    /* Same thread group? */
    #define CLONE_NEWNS    0x00020000    /* New mount namespace group */------------父子进程不共享mount namespace
    #define CLONE_SYSVSEM    0x00040000    /* share system V SEM_UNDO semantics */--
    #define CLONE_SETTLS    0x00080000    /* create a new TLS for the child */
    #define CLONE_PARENT_SETTID    0x00100000    /* set the TID in the parent */
    #define CLONE_CHILD_CLEARTID    0x00200000    /* clear the TID in the child */
    #define CLONE_DETACHED        0x00400000    /* Unused, ignored */
    #define CLONE_UNTRACED        0x00800000    /* set if the tracing process can't force CLONE_PTRACE on this clone */
    #define CLONE_CHILD_SETTID    0x01000000    /* set the TID in the child */
    /* 0x02000000 was previously the unused CLONE_STOPPED (Start in stopped state)
       and is now available for re-use. */
    #define CLONE_NEWUTS        0x04000000    /* New utsname namespace */
    #define CLONE_NEWIPC        0x08000000    /* New ipc namespace */
    #define CLONE_NEWUSER        0x10000000    /* New user namespace */----------子进程要创建新的User Namespace。
    #define CLONE_NEWPID        0x20000000    /* New pid namespace */------------创建一个新的PID namespace。
    #define CLONE_NEWNET        0x40000000    /* New network namespace */
    #define CLONE_IO        0x80000000    /* Clone io context */

     主要函数调用路径如下:

    do_fork------------------------------------------
      ->copy_process---------------------------------
        ->dup_task_struct----------------------------
        ->sched_fork---------------------------------
        ->copy_files
        ->copy_fs
        ->copy_sighand
        ->copy_signal
        ->copy_mm------------------------------------
          ->dup_mm-----------------------------------
        ->copy_namespaces
        ->copy_io
        ->copy_thread--------------------------------

    do_fork()先对CLONE_UNTRACED进行简单检查,主要将工作交给copy_process进行处理,最后唤醒创建的进程。

    /*
     *  Ok, this is the main fork-routine.
     *
     * It copies the process, and if successful kick-starts
     * it and waits for it to finish using the VM if required.
     */
    long do_fork(unsigned long clone_flags,
              unsigned long stack_start,
              unsigned long stack_size,
              int __user *parent_tidptr,
              int __user *child_tidptr)
    {
        struct task_struct *p;
        int trace = 0;
        long nr;
    
        /*
         * Determine whether and which event to report to ptracer.  When
         * called from kernel_thread or CLONE_UNTRACED is explicitly
         * requested, no event is reported; otherwise, report if the event
         * for the type of forking is enabled.
         */
        if (!(clone_flags & CLONE_UNTRACED)) {
            if (clone_flags & CLONE_VFORK)
                trace = PTRACE_EVENT_VFORK;
            else if ((clone_flags & CSIGNAL) != SIGCHLD)
                trace = PTRACE_EVENT_CLONE;
            else
                trace = PTRACE_EVENT_FORK;
    
            if (likely(!ptrace_event_enabled(current, trace)))
                trace = 0;
        }
    
        p = copy_process(clone_flags, stack_start, stack_size,
                 child_tidptr, NULL, trace);
        /*
         * Do this prior waking up the new thread - the thread pointer
         * might get invalid after that point, if the thread exits quickly.
         */
        if (!IS_ERR(p)) {
            struct completion vfork;
            struct pid *pid;
    
            trace_sched_process_fork(current, p);
    
            pid = get_task_pid(p, PIDTYPE_PID);
            nr = pid_vnr(pid);
    
            if (clone_flags & CLONE_PARENT_SETTID)
                put_user(nr, parent_tidptr);
    
            if (clone_flags & CLONE_VFORK) {------------------对于CLONE_VFORK标志位,初始化vfork完成量
                p->vfork_done = &vfork;
                init_completion(&vfork);
                get_task_struct(p);
            }
    
            wake_up_new_task(p);------------------------------唤醒新创建的进程p,也即把进程加入调度器里接受调度执行。
    
            /* forking complete and child started to run, tell ptracer */
            if (unlikely(trace))
                ptrace_event_pid(trace, pid);
    
            if (clone_flags & CLONE_VFORK) {
                if (!wait_for_vfork_done(p, &vfork))---------等待子进程释放p->vfork_done完成量
                    ptrace_event_pid(PTRACE_EVENT_VFORK_DONE, pid);
            }
    
            put_pid(pid);
        } else {
            nr = PTR_ERR(p);
        }
        return nr;
    }

    2.2 copy_process

     include/linux/sched.h中定义了进程标志位:

    /*
     * Per process flags
     */
    #define PF_EXITING    0x00000004    /* getting shut down */
    #define PF_EXITPIDONE    0x00000008    /* pi exit done on shut down */
    #define PF_VCPU        0x00000010    /* I'm a virtual CPU */
    #define PF_WQ_WORKER    0x00000020    /* I'm a workqueue worker */
    #define PF_FORKNOEXEC    0x00000040    /* forked but didn't exec */
    #define PF_MCE_PROCESS  0x00000080      /* process policy on mce errors */
    #define PF_SUPERPRIV    0x00000100    /* used super-user privileges */
    #define PF_DUMPCORE    0x00000200    /* dumped core */
    #define PF_SIGNALED    0x00000400    /* killed by a signal */
    #define PF_MEMALLOC    0x00000800    /* Allocating memory */
    #define PF_NPROC_EXCEEDED 0x00001000    /* set_user noticed that RLIMIT_NPROC was exceeded */
    #define PF_USED_MATH    0x00002000    /* if unset the fpu must be initialized before use */
    #define PF_USED_ASYNC    0x00004000    /* used async_schedule*(), used by module init */
    #define PF_NOFREEZE    0x00008000    /* this thread should not be frozen */
    #define PF_FROZEN    0x00010000    /* frozen for system suspend */
    #define PF_FSTRANS    0x00020000    /* inside a filesystem transaction */
    #define PF_KSWAPD    0x00040000    /* I am kswapd */
    #define PF_MEMALLOC_NOIO 0x00080000    /* Allocating memory without IO involved */
    #define PF_LESS_THROTTLE 0x00100000    /* Throttle me less: I clean memory */
    #define PF_KTHREAD    0x00200000    /* I am a kernel thread */
    #define PF_RANDOMIZE    0x00400000    /* randomize virtual address space */
    #define PF_SWAPWRITE    0x00800000    /* Allowed to write to swap */
    #define PF_NO_SETAFFINITY 0x04000000    /* Userland is not allowed to meddle with cpus_allowed */
    #define PF_MCE_EARLY    0x08000000      /* Early kill for mce process policy */
    #define PF_MUTEX_TESTER    0x20000000    /* Thread belongs to the rt mutex tester */
    #define PF_FREEZER_SKIP    0x40000000    /* Freezer should not count it as freezable */
    #define PF_SUSPEND_TASK 0x80000000      /* this thread called freeze_processes and should not be frozen */

    copy_process借助current获取当前进程的task_struct数据结构,然后创建新进程数据结构task_struct并复制父进程内容,继续初始化进程主要部分,比如内存空间、文件句柄、文件系统、IO、等等。 

    /*
     * This creates a new process as a copy of the old one,
     * but does not actually start it yet.
     *
     * It copies the registers, and all the appropriate
     * parts of the process environment (as per the clone
     * flags). The actual kick-off is left to the caller.
     */
    static struct task_struct *copy_process(unsigned long clone_flags,
                        unsigned long stack_start,
                        unsigned long stack_size,
                        int __user *child_tidptr,
                        struct pid *pid,
                        int trace)
    {
        int retval;
        struct task_struct *p;
    
        if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
            return ERR_PTR(-EINVAL);
    
        if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS))---------------CLONE_FS(父子进程共享文件系统)和CLONE_NEWNS/CLONE_NEWUSER(父子进程不共享mount/user namespace)冲突,
            return ERR_PTR(-EINVAL);
    
        /*
         * Thread groups must share signals as well, and detached threads
         * can only be started up within the thread group.
         */
        if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND))--------------------线程组共享信号处理函数
            return ERR_PTR(-EINVAL);
    
        /*
         * Shared signal handlers imply shared VM. By way of the above,
         * thread groups also imply shared VM. Blocking this case allows
         * for various simplifications in other code.
         */
        if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM))----------------------共享信号处理函数需要共享内存空间
            return ERR_PTR(-EINVAL);
    
        /*
         * Siblings of global init remain as zombies on exit since they are
         * not reaped by their parent (swapper). To solve this and to avoid
         * multi-rooted process trees, prevent global and container-inits
         * from creating siblings.
         */
        if ((clone_flags & CLONE_PARENT) &&
                    current->signal->flags & SIGNAL_UNKILLABLE)-----------------------------init是所有用户空间进程父进程,如果和init兄弟关系,那么进程将无法被回收,从而变成僵尸进程。
            return ERR_PTR(-EINVAL);
    
        /*
         * If the new process will be in a different pid or user namespace
         * do not allow it to share a thread group or signal handlers or
         * parent with the forking task.
         */
        if (clone_flags & CLONE_SIGHAND) {---------------------------------------------------新的pid或user命名空间和共享信号处理以及线程组冲突,因为他们在namespace中访问隔离。
            if ((clone_flags & (CLONE_NEWUSER | CLONE_NEWPID)) ||
                (task_active_pid_ns(current) !=
                    current->nsproxy->pid_ns_for_children))
                return ERR_PTR(-EINVAL);
        }
    
        retval = security_task_create(clone_flags);
        if (retval)
            goto fork_out;
    
        retval = -ENOMEM;
        p = dup_task_struct(current);-------------------------------------------------------分配一个task_struct实例,将当前进程current作为母板。
        if (!p)
            goto fork_out;
    
        ftrace_graph_init_task(p);
    
        rt_mutex_init_task(p);
    
    #ifdef CONFIG_PROVE_LOCKING
        DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);
        DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
    #endif
        retval = -EAGAIN;
        if (atomic_read(&p->real_cred->user->processes) >=
                task_rlimit(p, RLIMIT_NPROC)) {
            if (p->real_cred->user != INIT_USER &&
                !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN))
                goto bad_fork_free;
        }
        current->flags &= ~PF_NPROC_EXCEEDED;
    
        retval = copy_creds(p, clone_flags);
        if (retval < 0)
            goto bad_fork_free;
    
        /*
         * If multiple threads are within copy_process(), then this check
         * triggers too late. This doesn't hurt, the check is only there
         * to stop root fork bombs.
         */
        retval = -EAGAIN;
        if (nr_threads >= max_threads)----------------------------------------------max_threads是系统允许最多线程个数,nr_threads是系统当前进程个数。
            goto bad_fork_cleanup_count;
    
        if (!try_module_get(task_thread_info(p)->exec_domain->module))
            goto bad_fork_cleanup_count;
    
        delayacct_tsk_init(p);    /* Must remain after dup_task_struct() */
        p->flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER);---------------------------------告诉系统不使用超级用户权限,并且不是workqueue内核线程。
        p->flags |= PF_FORKNOEXEC;--------------------------------------------------执行fork但不立即执行
        INIT_LIST_HEAD(&p->children);-----------------------------------------------新进程的子进程链表
        INIT_LIST_HEAD(&p->sibling);------------------------------------------------新进程的兄弟进程链表
        rcu_copy_process(p);
        p->vfork_done = NULL;
        spin_lock_init(&p->alloc_lock);
    
        init_sigpending(&p->pending);
    
        p->utime = p->stime = p->gtime = 0;
        p->utimescaled = p->stimescaled = 0;
    #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
        p->prev_cputime.utime = p->prev_cputime.stime = 0;
    #endif
    #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
        seqlock_init(&p->vtime_seqlock);
        p->vtime_snap = 0;
        p->vtime_snap_whence = VTIME_SLEEPING;
    #endif
    
    #if defined(SPLIT_RSS_COUNTING)
        memset(&p->rss_stat, 0, sizeof(p->rss_stat));
    #endif
    
        p->default_timer_slack_ns = current->timer_slack_ns;
    
        task_io_accounting_init(&p->ioac);
        acct_clear_integrals(p);
    
        posix_cpu_timers_init(p);
    
        p->start_time = ktime_get_ns();
        p->real_start_time = ktime_get_boot_ns();
        p->io_context = NULL;
        p->audit_context = NULL;
        if (clone_flags & CLONE_THREAD)
            threadgroup_change_begin(current);
        cgroup_fork(p);
    #ifdef CONFIG_NUMA
        p->mempolicy = mpol_dup(p->mempolicy);
        if (IS_ERR(p->mempolicy)) {
            retval = PTR_ERR(p->mempolicy);
            p->mempolicy = NULL;
            goto bad_fork_cleanup_threadgroup_lock;
        }
    #endif...
    #ifdef CONFIG_BCACHE
        p->sequential_io    = 0;
        p->sequential_io_avg    = 0;
    #endif
    
        /* Perform scheduler related setup. Assign this task to a CPU. */
        retval = sched_fork(clone_flags, p);-----------------------------------------初始化进程调度相关数据结构,将进程指定到某一CPU上。
        if (retval)
            goto bad_fork_cleanup_policy;
    
        retval = perf_event_init_task(p);                                                                                         
        if (retval)
            goto bad_fork_cleanup_policy;
        retval = audit_alloc(p);
        if (retval)
            goto bad_fork_cleanup_perf;
        /* copy all the process information */
        shm_init_task(p);
        retval = copy_semundo(clone_flags, p);
        if (retval)
            goto bad_fork_cleanup_audit;
        retval = copy_files(clone_flags, p);-----------------------------------------复制父进程打开的文件信息
        if (retval)
            goto bad_fork_cleanup_semundo;
        retval = copy_fs(clone_flags, p);--------------------------------------------复制父进程fs_struct信息
        if (retval)
            goto bad_fork_cleanup_files;
        retval = copy_sighand(clone_flags, p);
        if (retval)
            goto bad_fork_cleanup_fs;
        retval = copy_signal(clone_flags, p);
        if (retval)
            goto bad_fork_cleanup_sighand;
        retval = copy_mm(clone_flags, p);--------------------------------------------复制父进程的内存管理相关信息
        if (retval)
            goto bad_fork_cleanup_signal;
        retval = copy_namespaces(clone_flags, p);
        if (retval)
            goto bad_fork_cleanup_mm;
        retval = copy_io(clone_flags, p);--------------------------------------------复制父进程的io_context上下文信息
        if (retval)
            goto bad_fork_cleanup_namespaces;
        retval = copy_thread(clone_flags, stack_start, stack_size, p);
        if (retval)
            goto bad_fork_cleanup_io;
    
        if (pid != &init_struct_pid) {
            retval = -ENOMEM;
            pid = alloc_pid(p->nsproxy->pid_ns_for_children);
            if (!pid)
                goto bad_fork_cleanup_io;
        }
    
        p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
        /*
         * Clear TID on mm_release()?
         */
        p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr : NULL;
    #ifdef CONFIG_BLOCK
        p->plug = NULL;
    #endif
    #ifdef CONFIG_FUTEX
        p->robust_list = NULL;
    #ifdef CONFIG_COMPAT
        p->compat_robust_list = NULL;
    #endif
        INIT_LIST_HEAD(&p->pi_state_list);
        p->pi_state_cache = NULL;
    #endif
        /*
         * sigaltstack should be cleared when sharing the same VM
         */
        if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM)
            p->sas_ss_sp = p->sas_ss_size = 0;
    
        /*
         * Syscall tracing and stepping should be turned off in the
         * child regardless of CLONE_PTRACE.
         */
        user_disable_single_step(p);
        clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE);
    #ifdef TIF_SYSCALL_EMU
        clear_tsk_thread_flag(p, TIF_SYSCALL_EMU);
    #endif
        clear_all_latency_tracing(p);
    
        /* ok, now we should be set up.. */
        p->pid = pid_nr(pid);-------------------------------------------------------获取新进程的pid
        if (clone_flags & CLONE_THREAD) {
            p->exit_signal = -1;
            p->group_leader = current->group_leader;
            p->tgid = current->tgid;
        } else {
            if (clone_flags & CLONE_PARENT)
                p->exit_signal = current->group_leader->exit_signal;
            else
                p->exit_signal = (clone_flags & CSIGNAL);
            p->group_leader = p;
            p->tgid = p->pid;
        }
    
        p->nr_dirtied = 0;
        p->nr_dirtied_pause = 128 >> (PAGE_SHIFT - 10);
        p->dirty_paused_when = 0;
    
        p->pdeath_signal = 0;
        INIT_LIST_HEAD(&p->thread_group);
        p->task_works = NULL;
    
        /*
         * Make it visible to the rest of the system, but dont wake it up yet.
         * Need tasklist lock for parent etc handling!
         */
        write_lock_irq(&tasklist_lock);
    
        /* CLONE_PARENT re-uses the old parent */
        if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) {
            p->real_parent = current->real_parent;
            p->parent_exec_id = current->parent_exec_id;
        } else {
            p->real_parent = current;
            p->parent_exec_id = current->self_exec_id;
        }
    
        spin_lock(&current->sighand->siglock);
    
        /*
         * Copy seccomp details explicitly here, in case they were changed
         * before holding sighand lock.
         */
        copy_seccomp(p);
    
        /*
         * Process group and session signals need to be delivered to just the
         * parent before the fork or both the parent and the child after the
         * fork. Restart if a signal comes in before we add the new process to
         * it's process group.
         * A fatal signal pending means that current will exit, so the new
         * thread can't slip out of an OOM kill (or normal SIGKILL).
        */
        recalc_sigpending();
        if (signal_pending(current)) {
            spin_unlock(&current->sighand->siglock);
            write_unlock_irq(&tasklist_lock);
            retval = -ERESTARTNOINTR;
            goto bad_fork_free_pid;
        }
    
        if (likely(p->pid)) {
            ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace);
    
            init_task_pid(p, PIDTYPE_PID, pid);
            if (thread_group_leader(p)) {
                init_task_pid(p, PIDTYPE_PGID, task_pgrp(current));
                init_task_pid(p, PIDTYPE_SID, task_session(current));
    
                if (is_child_reaper(pid)) {
                    ns_of_pid(pid)->child_reaper = p;
                    p->signal->flags |= SIGNAL_UNKILLABLE;
                }
    
                p->signal->leader_pid = pid;
                p->signal->tty = tty_kref_get(current->signal->tty);
                list_add_tail(&p->sibling, &p->real_parent->children);
                list_add_tail_rcu(&p->tasks, &init_task.tasks);
                attach_pid(p, PIDTYPE_PGID);
                attach_pid(p, PIDTYPE_SID);
                __this_cpu_inc(process_counts);
            } else {
                current->signal->nr_threads++;
                atomic_inc(&current->signal->live);
                atomic_inc(&current->signal->sigcnt);
                list_add_tail_rcu(&p->thread_group,
                          &p->group_leader->thread_group);
                list_add_tail_rcu(&p->thread_node,
                          &p->signal->thread_head);
            }
            attach_pid(p, PIDTYPE_PID);
            nr_threads++;---------------------------------------------------------当前进程计数递增
        }
    
        total_forks++;
        spin_unlock(&current->sighand->siglock);
        syscall_tracepoint_update(p);
        write_unlock_irq(&tasklist_lock);
    
        proc_fork_connector(p);
        cgroup_post_fork(p);
        if (clone_flags & CLONE_THREAD)
            threadgroup_change_end(current);
        perf_event_fork(p);
    
        trace_task_newtask(p, clone_flags);
        uprobe_copy_process(p, clone_flags);
    
        return p;----------------------------------------------------------------成功返回新进程的task_struct。
    ...return ERR_PTR(retval);---------------------------------------------------各种错误处理
    }

     dup_task_struct从父进程复制task_struct和thread_info。

    static struct task_struct *dup_task_struct(struct task_struct *orig)
    {
        struct task_struct *tsk;
        struct thread_info *ti;
        int node = tsk_fork_get_node(orig);
        int err;
    
        tsk = alloc_task_struct_node(node);-------------------------------------------------分配一个task_struct结构体
        if (!tsk)
            return NULL;
    
        ti = alloc_thread_info_node(tsk, node);---------------------------------------------分配一个thread_info结构体
        if (!ti)
            goto free_tsk;
    
        err = arch_dup_task_struct(tsk, orig);----------------------------------------------将父进程的task_struct拷贝到新进程tsk
        if (err)
            goto free_ti;
    
        tsk->stack = ti;--------------------------------------------------------------------将新进程的栈指向创建的thread_info。
    #ifdef CONFIG_SECCOMP
        /*
         * We must handle setting up seccomp filters once we're under
         * the sighand lock in case orig has changed between now and
         * then. Until then, filter must be NULL to avoid messing up
         * the usage counts on the error path calling free_task.
         */
        tsk->seccomp.filter = NULL;
    #endif
    
        setup_thread_stack(tsk, orig);------------------------------------------------------将父进程的thread_info复制到子进程thread_info,并将子进程thread_info->task指向子进程
        clear_user_return_notifier(tsk);
        clear_tsk_need_resched(tsk);
        set_task_stack_end_magic(tsk);
    ...return tsk;
    ...
    }

    进程相关运行状态有:

    #define TASK_RUNNING        0
    #define TASK_INTERRUPTIBLE    1
    #define TASK_UNINTERRUPTIBLE    2
    #define __TASK_STOPPED        4
    #define __TASK_TRACED        8

     sched_fork的主要任务交给__sched_fork(),然后根据优先级选择调度sched_class类,并执行其task_fork。

    最后设置新进程运行的CPU,如果不是当前CPU则需要迁移过来。

    /*
     * fork()/clone()-time setup:
     */
    int sched_fork(unsigned long clone_flags, struct task_struct *p)
    {
        unsigned long flags;
        int cpu = get_cpu();-------------------------------------------------------首先关闭内核抢占,然后获取当前CPU id。
    
        __sched_fork(clone_flags, p);----------------------------------------------填充sched_entity数据结构,初始化调度相关设置。
        /*
         * We mark the process as running here. This guarantees that
         * nobody will actually run it, and a signal or other external
         * event cannot wake it up and insert it on the runqueue either.
         */
        p->state = TASK_RUNNING;---------------------------------------------------设置为运行状态,虽然还没有实际运行。
    
        /*
         * Make sure we do not leak PI boosting priority to the child.
         */
        p->prio = current->normal_prio;--------------------------------------------继承父进程normal_prio作为子进程prio
    
        /*
         * Revert to default priority/policy on fork if requested.
         */
        if (unlikely(p->sched_reset_on_fork)) {
            if (task_has_dl_policy(p) || task_has_rt_policy(p)) {
                p->policy = SCHED_NORMAL;
                p->static_prio = NICE_TO_PRIO(0);
                p->rt_priority = 0;
            } else if (PRIO_TO_NICE(p->static_prio) < 0)
                p->static_prio = NICE_TO_PRIO(0);
    
            p->prio = p->normal_prio = __normal_prio(p);
            set_load_weight(p);
    
            /*
             * We don't need the reset flag anymore after the fork. It has
             * fulfilled its duty:
             */
            p->sched_reset_on_fork = 0;
        }
    
        if (dl_prio(p->prio)) {---------------------------------------------------SCHED_DEADLINE优先级应该是负值,即小于0。
            put_cpu();
            return -EAGAIN;
        } else if (rt_prio(p->prio)) {--------------------------------------------SCHED_RT优先级为0-99
            p->sched_class = &rt_sched_class;
        } else {------------------------------------------------------------------SCHED_FAIR优先级为100-139
            p->sched_class = &fair_sched_class;
        }
    
        if (p->sched_class->task_fork)
            p->sched_class->task_fork(p);
    
        /*
         * The child is not yet in the pid-hash so no cgroup attach races,
         * and the cgroup is pinned to this child due to cgroup_fork()
         * is ran before sched_fork().
         *
         * Silence PROVE_RCU.
         */
        raw_spin_lock_irqsave(&p->pi_lock, flags);
        set_task_cpu(p, cpu);------------------------------------------------------重要一点就是检查p->stack->cpu是不是当期CPU,如果不是则需要进行迁移。迁移函数使用之前确定的sched_class->migrate_task_rq。
        raw_spin_unlock_irqrestore(&p->pi_lock, flags);
    
    #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
        if (likely(sched_info_on()))
            memset(&p->sched_info, 0, sizeof(p->sched_info));
    #endif
    #if defined(CONFIG_SMP)
        p->on_cpu = 0;
    #endif
        init_task_preempt_count(p);
    #ifdef CONFIG_SMP
        plist_node_init(&p->pushable_tasks, MAX_PRIO);
        RB_CLEAR_NODE(&p->pushable_dl_tasks);
    #endif
    
        put_cpu();-----------------------------------------------------------------再次允许内核抢占。
        return 0;
    }

    copy_mm首先设置MM相关参数,然后使用dup_mm来分配mm_struct数据结构,并从父进程复制到新进程mm_struct。

    最后将创建的mm_struct复制给task_struct->mm。

    static int copy_mm(unsigned long clone_flags, struct task_struct *tsk)
    {
        struct mm_struct *mm, *oldmm;
        int retval;
    
        tsk->min_flt = tsk->maj_flt = 0;
        tsk->nvcsw = tsk->nivcsw = 0;
    #ifdef CONFIG_DETECT_HUNG_TASK
        tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw;
    #endif
    
        tsk->mm = NULL;
        tsk->active_mm = NULL;
    
        /*
         * Are we cloning a kernel thread?
         *
         * We need to steal a active VM for that..
         */
        oldmm = current->mm;
        if (!oldmm)-----------------------------------------------如果current->mm为NULL,表示是内核线程。
            return 0;
    
        /* initialize the new vmacache entries */
        vmacache_flush(tsk);
    
        if (clone_flags & CLONE_VM) {----------------------------CLONE_VM表示父子进程共享内存空间,依次没必要新建内存空间,直接使用oldmm。
            atomic_inc(&oldmm->mm_users);
            mm = oldmm;
            goto good_mm;
        }
    
        retval = -ENOMEM;
        mm = dup_mm(tsk);---------------------------------------为子进程单独创建一个新的内存空间mm_struct。
        if (!mm)
            goto fail_nomem;
    
    good_mm:
        tsk->mm = mm;-------------------------------------------对新进程内存空间进行赋值。
        tsk->active_mm = mm;
        return 0;
    
    fail_nomem:
        return retval;
    }

     dup_task从父进程复制mm_struct,然后进行初始化等操作,将完成的mm_struct返回给copy_mm。

    /*
     * Allocate a new mm structure and copy contents from the
     * mm structure of the passed in task structure.
     */
    static struct mm_struct *dup_mm(struct task_struct *tsk)
    {
        struct mm_struct *mm, *oldmm = current->mm;
        int err;
    
        mm = allocate_mm();-----------------------------------分配一个mm_struct数据结构
        if (!mm)
            goto fail_nomem;
    
        memcpy(mm, oldmm, sizeof(*mm));-----------------------将父进程mm_struct复制到新进程mm_struct。
    
        if (!mm_init(mm, tsk))--------------------------------主要对子进程的mm_struct成员进行初始化,虽然从父进程复制了相关数据,但是对于子进程需要重新进行初始化。
            goto fail_nomem;
    
        dup_mm_exe_file(oldmm, mm);
    
        err = dup_mmap(mm, oldmm);----------------------------将父进程种所有VMA对应的pte页表项内容都复制到子进程对应的PTE页表项中。
        if (err)
            goto free_pt;
    
        mm->hiwater_rss = get_mm_rss(mm);
        mm->hiwater_vm = mm->total_vm;
    
        if (mm->binfmt && !try_module_get(mm->binfmt->module))
            goto free_pt;
    
        return mm;
    ...
    }

     对ARM体系结构,Linux内核栈顶存放着ARM通用寄存器struct pt_regs。

    struct pt_regs {
        unsigned long uregs[18];
    };
    
    #define ARM_cpsr    uregs[16]
    #define ARM_pc        uregs[15]
    #define ARM_lr        uregs[14]
    #define ARM_sp        uregs[13]
    #define ARM_ip        uregs[12]
    #define ARM_fp        uregs[11]
    #define ARM_r10        uregs[10]
    #define ARM_r9        uregs[9]
    #define ARM_r8        uregs[8]
    #define ARM_r7        uregs[7]
    #define ARM_r6        uregs[6]
    #define ARM_r5        uregs[5]
    #define ARM_r4        uregs[4]
    #define ARM_r3        uregs[3]
    #define ARM_r2        uregs[2]
    #define ARM_r1        uregs[1]
    #define ARM_r0        uregs[0]
    #define ARM_ORIG_r0    uregs[17]

    关于pt_regs在内核栈的位置,可以看出首先通过task_stack_page(p)站到内核栈起始地址,即底部。

    然后加上地址THREAD_START_SP,即THREAD_SIZE两个页面8KB减去8字节空洞。

    所以childregs指向的位置是栈顶部。

    #define task_pt_regs(p) 
        ((struct pt_regs *)(THREAD_START_SP + task_stack_page(p)) - 1)

    copy_thread首先获取栈顶pt_regs位置,然后填充thread_info->cpu_context进程上下文。

    asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
    
    int
    copy_thread(unsigned long clone_flags, unsigned long stack_start,
            unsigned long stk_sz, struct task_struct *p)
    {
        struct thread_info *thread = task_thread_info(p);--------------------------获取当前进程的thread_info。
        struct pt_regs *childregs = task_pt_regs(p);-------------------------------获取当前进程的pt_regs
    
        memset(&thread->cpu_context, 0, sizeof(struct cpu_context_save));----------cpu_context中保存了进程上下文相关的通用寄存器。
    
        if (likely(!(p->flags & PF_KTHREAD))) {------------------------------------内核线程处理
            *childregs = *current_pt_regs();
            childregs->ARM_r0 = 0;
            if (stack_start)
                childregs->ARM_sp = stack_start;
        } else {-------------------------------------------------------------------普通线程处理,r4等于stk_sz,r5指向start_start。
            memset(childregs, 0, sizeof(struct pt_regs));
            thread->cpu_context.r4 = stk_sz;
            thread->cpu_context.r5 = stack_start;
            childregs->ARM_cpsr = SVC_MODE;
        }
        thread->cpu_context.pc = (unsigned long)ret_from_fork;---------------------cpu_context中pc指向ret_from_fork
        thread->cpu_context.sp = (unsigned long)childregs;-------------------------cpu_context中sp指向新进程的内核栈
    
        clear_ptrace_hw_breakpoint(p);
    
        if (clone_flags & CLONE_SETTLS)
            thread->tp_value[0] = childregs->ARM_r3;
        thread->tp_value[1] = get_tpuser();
    
        thread_notify(THREAD_NOTIFY_COPY, thread);
    
        return 0;
    }

    3. 关于fork()、vfork()、clone()测试

    3.1 fork()嵌套打印

    3.1.1 代码

    #include <stdio.h>
    
    int main(void)
    {
      int i;
    
      for(i = 0; i<2; i++) {
        fork();
        printf("_%d-%d-%d
    ", getppid(), getpid(), i);
      }
      wait(NULL);
      wait(NULL);
      return 0;
    }

     3.1.2 执行程序,记录log

    执行输出结果如下:

    sudo trace-cmd record  -e all  ./fork
    /sys/kernel/tracing/events/*/filter
    Current:4293-i=0
    Current:4293-i=1
    Current:4294-i=0
    Current:4294-i=1
    Current:4295-i=1
    Current:4296-i=1

    相关Trace记录在trace.dat中。

    3.1.3 流程分析

    使用kernelshark trace.dat,过滤sched_process_fork/sys_enter_write/sys_enter_wait4后结果如下。

    其中sched_process_fork对应fork,sys_enter_write对应printf,sys_enter_wait4对应wait开始,sys_exit_wait4对应wait结束。

    下图是不同进程的流程:

     将fork进程关系流程图画出如下:

     

    参考文档:《linux中fork()函数详解(原创!!实例讲解)

    3.2 fork()、vfork()、clone()对比

    对于fork()、vfork()、clone()三者的区别,前面已经有介绍,下面通过实例来看他们之间的区别。

    3.2.1 fork()和vfork()对比

    #include "stdio.h"
    
    int main() {
      int count = 1;
      int child;
      printf("Father, initial count = %d, pid = %d
    ", count, getpid());
      if(!(child = fork())) {
        int i;
        for(i = 0; i < 2; i++) {
          printf("Son, count = %d pid = %d
    ", ++count, getpid());
        }
        exit(1);
      } else {
    sleep(1); printf(
    "Father, count = %d pid = %d child = %d ", count, getpid(), child); } } #include "stdio.h" int main() { int count = 1; int child; printf("Father, initial count = %d, pid = %d ", count, getpid()); if(!(child = vfork())) { int i; for(i = 0; i < 2; i++) { printf("Son, count = %d pid = %d ", ++count, getpid()); } exit(1); } else { printf("Father, count = %d pid = %d child = %d ", count, getpid(), child); } }

    fork输出结果如下:

    Father, initial count = 1, pid = 4721
    Father, count = 1 pid = 4721 child = 4722
    Son, count = 2 pid = 4722
    Son, count = 3 pid = 4722

    vfork输出结果如下:

    Father, initial count = 1, pid = 4726
    Son, count = 2 pid = 4727
    Son, count = 3 pid = 4727
    Father, count = 3 pid = 4726 child = 4727

    将fork代码加sleep(1);之后结果如下:

    Father, initial count = 1, pid = 4858
    Son, count = 2 pid = 4859
    Son, count = 3 pid = 4859
    Father, count = 1 pid = 4858 child = 4859

    1. 可以看出vfork父进程在等待子进程结束,然后继续执行。

    2. vfork父子进程之间共享地址空间,父进程的count被子进程修改。

    3. fork将父进程打印延时后,可以看出主进程任然打印count=1,说明父子进程空间独立。

    3.2.2 clone不同flag对比

    clone的flag决定了clone的行为,比如是否共享空间、是否vfork等

    #define _GNU_SOURCE
    
    #include "stdio.h"
    #include "sched.h"
    #include "signal.h"
    #define FIBER_STACK 8192
    int count;
    void * stack;
    int do_something(){
      int i;
      for(i = 0; i < 2; i++) {
        printf("Son, pid = %d, count = %d
    ", getpid(), ++count);
      }
      free(stack); //这里我也不清楚,如果这里不释放,不知道子线程死亡后,该内存是否会释放,知情者可以告诉下,谢谢
      exit(1);
    }
    
    int main() {
      void * stack;
      count = 1;
      stack = malloc(FIBER_STACK);//为子进程申请系统堆栈
      if(!stack) {
        printf("The stack failed
    ");
        exit(0);
      }
      printf("Father, initial count = %d, pid = %d
    ", count, getpid());
      clone(&do_something, (char *)stack + FIBER_STACK, CLONE_VM|CLONE_VFORK, 0);//创建子线程
      printf("Father, pid = %d count = %d
    ", getpid(), count);
      exit(1);
    }
    下面是不同flag组合的输出结果:

    1. CLONE_VM|CLONE_VFORK
    父子进程共享内存空间,并且父进程要等待子进程结束。
    所以4968在4969结束之后才继续运行,并且count=3。

    Father, initial count = 1, pid = 4968
    Son, pid = 4969, count = 2
    Son, pid = 4969, count = 3
    Father, pid = 4968 count = 3


    2. CLONE_VM
    父子进程共享内存空间,但是父进程结束时强制子进程退出。

    Father, initial count = 1, pid = 5017
    Father, pid = 5017 count = 1

    将父进程printf前加一个sleep(1),可以看出父进程count=1。

    Father, initial count = 1, pid = 5065
    Son, pid = 5066, count = 2
    Son, pid = 5066, count = 3
    Father, pid = 5065 count = 3

    
    
    3. CLONE_VFORK
    这里没有共享内存空间,但是父进程要等待子进程结束。
    所以父进程在子进程后打印,且count=3。

    Father, initial count = 1, pid = 4998
    Son, pid = 4999, count = 2
    Son, pid = 4999, count = 3
    Father, pid = 4998 count = 1

    4. 0

    父子进程不共享内存,但是父进程在结束时继续等待子进程退出。

    这里看不出count是否共享。

    Father, initial count = 1, pid = 5174
    Father, pid = 5174 count = 1
    Son, pid = 5175, count = 2
    Son, pid = 5175, count = 3

    在父进程printf之前加sleep(1),结果如下:

    和预期一样,主进程count是单独一份,而没有和子进程共用。

    Father, initial count = 1, pid = 5257
    Son, pid = 5258, count = 2
    Son, pid = 5258, count = 3
    Father, pid = 5257 count = 1

    参考文档:linux系统调用fork, vfork, clone

  • 相关阅读:
    This counter can increment, decrement or skip ahead by an arbitrary amount
    LUT4/MUXF5/MUXF6 logic : Multiplexer 8:1
    synthesisable VHDL for a fixed ratio frequency divider
    Bucket Brigade FIFO SRL16E ( VHDL )
    srl16e fifo verilog
    DualPort Block RAM with Two Write Ports and Bytewide Write Enable in ReadFirst Mode
    Parametrilayze based on SRL16 shift register FIFO
    stm32 spi sdcard fatfs
    SPI bus master for System09 (2)
    SQLSERVER中的自旋锁
  • 原文地址:https://www.cnblogs.com/arnoldlu/p/8466928.html
Copyright © 2011-2022 走看看