zoukankan html css js c++ java

struct pid & pid_namespace

alloc_pid() & task_struct插入pid struct tasks[] hash list

fork进程/线程时，copy_process()会给此线程alloc一个struct pid结构体。当是fork进程/线程时，copy_process()的pid参数将是null，所以会call alloc_pid()

static __latent_entropy struct task_struct *copy_process(
                    unsigned long clone_flags,
                    unsigned long stack_start,
                    unsigned long stack_size,
                    int __user *child_tidptr,
                    struct pid *pid,
                    int trace,
                    unsigned long tls,
                    int node)
{
    if (pid != &init_struct_pid) {
        pid = alloc_pid(p->nsproxy->pid_ns_for_children);
        if (IS_ERR(pid)) {
            retval = PTR_ERR(pid);
            goto bad_fork_cleanup_thread;
        }
    }

看下alloc_pid干了些啥。首先它会alloc一个pid struct，然后设置这个pid struct：

调用idr_alloc_cyclic()，这个函数的返回值就是当前fork线程的pid；

设置pid里numbers成员（nr和ns）

ns->level次数的for循环，这个对于没有开CONFIG_PID_NS时，pid namespace将只有一个level，所以ns->level都会是0，所以此时只有有一次循环，此时将只会设置pid numbers[0]，0 index即是全局的pid，在整个系统中唯一；

如果开启了CONFIG_PID_NS，此时ns->level将有可能不是0，此时pid->members[0]是全局的upid，其它pid->numbers[1]则是numbers[0]的child namespace，pid->numbers[2]等依次类推。
alloc_pid()的参数ns在没有开启CONFIG_PID_NS的情况下，都是一样的，即指向init_pid_ns

设置完pid struct后，调用idr_replace将此pid struct和alloc的pid作为一对mapping值保存起来：

struct pid *alloc_pid(struct pid_namespace *ns)
{
    struct pid *pid;
    enum pid_type type;
    int i, nr;
    struct pid_namespace *tmp;
    struct upid *upid;
    int retval = -ENOMEM;

    pid = kmem_cache_alloc(ns->pid_cachep, GFP_KERNEL);
    if (!pid)
        return ERR_PTR(retval);

    tmp = ns;
    pid->level = ns->level;

    for (i = ns->level; i >= 0; i--) {
        int pid_min = 1;

        idr_preload(GFP_KERNEL);
        spin_lock_irq(&pidmap_lock);

        /*
         * init really needs pid 1, but after reaching the maximum
         * wrap back to RESERVED_PIDS
         */
        if (idr_get_cursor(&tmp->idr) > RESERVED_PIDS)
            pid_min = RESERVED_PIDS;

        /*
         * Store a null pointer so find_pid_ns does not find
         * a partially initialized PID (see below).
         */
        nr = idr_alloc_cyclic(&tmp->idr, NULL, pid_min,
                      pid_max, GFP_ATOMIC);
        spin_unlock_irq(&pidmap_lock);
        idr_preload_end();

        if (nr < 0) {
            retval = (nr == -ENOSPC) ? -EAGAIN : nr;
            goto out_free;
        }

        pid->numbers[i].nr = nr;
        pid->numbers[i].ns = tmp;
        tmp = tmp->parent;
    }

    if (unlikely(is_child_reaper(pid))) {
        if (pid_ns_prepare_proc(ns))
            goto out_free;
    }

    get_pid_ns(ns);
    atomic_set(&pid->count, 1);
    for (type = 0; type < PIDTYPE_MAX; ++type)
        INIT_HLIST_HEAD(&pid->tasks[type]);

    upid = pid->numbers + ns->level;
    spin_lock_irq(&pidmap_lock);
    if (!(ns->pid_allocated & PIDNS_ADDING))
        goto out_unlock;
    for ( ; upid >= pid->numbers; --upid) {
        /* Make the PID visible to find_pid_ns. */
        idr_replace(&upid->ns->idr, pid, upid->nr);  
        upid->ns->pid_allocated++;
    }
    spin_unlock_irq(&pidmap_lock);

    return pid;

alloc_pid()后，会设置当前fork的task_struct的pid成员，此pid成员就是当前fork出的线程的pid，这个pid数值即是上面alloc_pid()里分配的pid结构体里的numbers[0].nr，即系统全局的线程的pid，具有唯一性

static inline pid_t pid_nr(struct pid *pid)
{
    pid_t nr = 0;
    if (pid)
        nr = pid->numbers[0].nr;
    return nr;
}

接下来则会将当前fork的task_struct和上面alloc的pid struct关联起来。如果当前fork的线程是进程的主线程（thread group leader），则会将主线程链接到上面alloc给它的struct pid的tasks[PIDTYPE_PID] & tasks[PIDTYPE_TGID] hash list上，以及将它链接到其父进程所链接到的tasks[PGID]和tasks[PIDTYPE_SID] hash list上；

如果不是主线程，则只会将此task_struct插入上面给它alloc的pid struct的tasks[PIDTYPE_PID] hash list。

copy_process()
    init_task_pid_links(p);
    if (likely(p->pid)) {
        ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace);

        init_task_pid(p, PIDTYPE_PID, pid);
        if (thread_group_leader(p)) {
            init_task_pid(p, PIDTYPE_TGID, pid);
            init_task_pid(p, PIDTYPE_PGID, task_pgrp(current));
            init_task_pid(p, PIDTYPE_SID, task_session(current));

            if (is_child_reaper(pid)) {
                ns_of_pid(pid)->child_reaper = p;
                p->signal->flags |= SIGNAL_UNKILLABLE;
            }
            p->signal->shared_pending.signal = delayed.signal;
            p->signal->tty = tty_kref_get(current->signal->tty);
            /*
             * Inherit has_child_subreaper flag under the same
             * tasklist_lock with adding child to the process tree
             * for propagate_has_child_subreaper optimization.
             */
            p->signal->has_child_subreaper = p->real_parent->signal->has_child_subreaper ||
                             p->real_parent->signal->is_child_subreaper;
            list_add_tail(&p->sibling, &p->real_parent->children);
            list_add_tail_rcu(&p->tasks, &init_task.tasks);
            attach_pid(p, PIDTYPE_TGID);
            attach_pid(p, PIDTYPE_PGID);
            attach_pid(p, PIDTYPE_SID);
            __this_cpu_inc(process_counts);
        } else {
            current->signal->nr_threads++;
            atomic_inc(&current->signal->live);
            atomic_inc(&current->signal->sigcnt);
            task_join_group_stop(p);
            list_add_tail_rcu(&p->thread_group,
                      &p->group_leader->thread_group);
            list_add_tail_rcu(&p->thread_node,
                      &p->signal->thread_head);
        }
        attach_pid(p, PIDTYPE_PID);
        nr_threads++;
    }

setpgid创建进程组或者迁移某个进程到另外一个进程组

1. setpgid创建新的进程组

此时setpgid(pid_t pid, pid_t pgid) pid参数和pgid参数要相等，并且此pid要是thread group leader，比如user space调用setpgid(getpid(), getpid())或者setpgid(0, 0)或者setpgid(getpid(), 0)。此后此进程将脱离其父进程所在的进程组，自己创建了一个独立的进程组。

2. setpgid()迁移一个进程到另外的进程组

此时pgid参数不能为0，setpgid()的pgid参数是另外一个进程组的组长进程的pid，同时要迁移的进程所在的进程组和要迁往的进程组要在同一个session里，此后要迁移的进程将迁入目标进程组，其task_struct将链接到目标进程组组长进程的pid struct的tasks[PIDTYPE_PGID] hash list

SYSCALL_DEFINE2(setpgid, pid_t, pid, pid_t, pgid)
{
    struct task_struct *p;
    struct task_struct *group_leader = current->group_leader;
    struct pid *pgrp;
    int err;

    if (!pid)
        pid = task_pid_vnr(group_leader);
    if (!pgid)
        pgid = pid;
    if (pgid < 0)
        return -EINVAL;
    rcu_read_lock();

    /* From this point forward we keep holding onto the tasklist lock
     * so that our parent does not change from under us. -DaveM
     */
    write_lock_irq(&tasklist_lock);

    err = -ESRCH;
    p = find_task_by_vpid(pid);
    if (!p)
        goto out;

    err = -EINVAL;
    if (!thread_group_leader(p))
        goto out;

    if (same_thread_group(p->real_parent, group_leader)) {
        err = -EPERM;
        if (task_session(p) != task_session(group_leader))
            goto out;
        err = -EACCES;
        if (!(p->flags & PF_FORKNOEXEC))
            goto out;
    } else {
        err = -ESRCH;
        if (p != group_leader)
            goto out;
    }

    err = -EPERM;
    if (p->signal->leader)
        goto out;

    pgrp = task_pid(p);
    if (pgid != pid) {
        struct task_struct *g;

        pgrp = find_vpid(pgid);
        g = pid_task(pgrp, PIDTYPE_PGID);
        if (!g || task_session(g) != task_session(group_leader))
            goto out;
    }

    err = security_task_setpgid(p, pgid);
    if (err)
        goto out;

    if (task_pgrp(p) != pgrp)
        change_pid(p, PIDTYPE_PGID, pgrp);

    err = 0;
out:
    /* All paths lead to here, thus we are safe. -DaveM */
    write_unlock_irq(&tasklist_lock);
    rcu_read_unlock();
    return err;
}

进程的task_struct所插入的pid struct tasks[] hash list

1. 如果进程没有调用setpgid系统调用，并且其父进程也没有执行此系统调用，则其链接关系如下图，task_struct通过其pid_links[PIDTYPE_PID]/[PIDTYPE_TGID]插入它自己的struct pid的tasks[PIDTYPE_PID]/[PIDTYPE_TGID] hash list，其它pid_links[PIDTYPE_PGID]/[PIDTYPE_SID]应该是插入了init_struct_pid的tasks[PIDTYPE_PGID]/[PIDTYPE_SID] hash list:

2. 如果进程有执行setpgid创建了进程组，则pid_links[]的链接关系如下图。

Struct pid是某个进程fork时分配的，后面通过setpgid(0,0)创建一个进程组，首先将自己的task_struct通过pid_links[PIDTYPE_PGID]链接到自己pid struct的tasks[PIDTYPE_PGID] hash list上。后面此进程创建子进程时子进程也都会类似这样将其task_struct链入此pid struct的tasks[PIDTYPE_PGID] hash list上，这样同一个进程组中的所有进程将会被链接到组长进程的pid struct的tasks[PIDTYPE_PGID] hash list上：

　　　　　　　　　　　　　　　　　　　　　　　　　　　　　　　　（1）进程组struct pid tasks[] hash list链接关系

* 进程组中的成员进程是以进程的主线程的task_struct/struct pid来表示

从上述两图可以看出，对于主线程，线程自己的pid struct里的tasks[PIDTYPE_PID]/[PIDTYPE_TGID] hash list长度只有1，即只有一个list node，即为自己本身的task_struct.pid_links[PIDTYPE_PID]/[PIDTYPE_TGID]。

3. 非主线程的struct pid.tasks[] hash list链接关系

如果是非主线程，则只会用到一个hash list，即tasks[PIDTYPE_PID] hash list，并且此hash list也只有一个node，即此非主线程的task_struct.pid_links[PIDTYPE_PID]，同事没有和所在进程内的其它线程以及其它进程有链接关系，所以非主线程的struct pid.tasks[]链接关系很简单

注意：

1. 不管是主线程还是非主线程，如果属于user space的，均会给它alloc一个struct pid；

2. 不管是主线程和非主线程，因为struct pid.task[PIDTYPE_PID] hash list上只有一个node，所以find_task_by_vpid()在tasks[PIDTYPE_PID] hash list上取第一个node就得到了pid_t对应的task_struct

CONFIG_PID_NS开启条件下的多级pid_namespace

上述level 1是level 2的parent；level 0是level 1的parent.

一个level 2的线程fork时，会从level 2开始alloc pid，一直到level 0，所以这里它会alloc 3个pid，即会alloc3个pid namespace的pid number。

level 0是全局的，在通过pid_nr()设置task_struct pid_t成员时，其就是取的level 0 pid_namespace的pid number。

常用pid struct相关API

static inline pid_t task_pid_vnr(struct task_struct *tsk)：根据task_struct得到对应的pid
struct task_struct *find_task_by_vpid(pid_t vnr)：根据pid num得到对应的task_struct

查看全文

相关阅读:
算法与数据结构基础
 算法与数据结构基础
 算法与数据结构基础
 算法与数据结构基础
 算法与数据结构基础
 算法与数据结构基础
 最佳实践根据状态操作,这样能避免吃掉异常
 最佳实践状态设计
 Android HTTPS如何10分钟实现自签名SSL证书
 马桶选购

原文地址：https://www.cnblogs.com/aspirs/p/15753834.html