struct pid & pid_namespace
alloc_pid() & task_struct插入pid struct tasks[] hash list
fork进程/线程时,copy_process()会给此线程alloc一个struct pid结构体。当是fork进程/线程时,copy_process()的pid参数将是null,所以会call alloc_pid()
static __latent_entropy struct task_struct *copy_process( unsigned long clone_flags, unsigned long stack_start, unsigned long stack_size, int __user *child_tidptr, struct pid *pid, int trace, unsigned long tls, int node) { if (pid != &init_struct_pid) { pid = alloc_pid(p->nsproxy->pid_ns_for_children); if (IS_ERR(pid)) { retval = PTR_ERR(pid); goto bad_fork_cleanup_thread; } }
看下alloc_pid干了些啥。首先它会alloc一个pid struct,然后设置这个pid struct:
调用idr_alloc_cyclic(),这个函数的返回值就是当前fork线程的pid;
设置pid里numbers成员(nr和ns)
ns->level次数的for循环,这个对于没有开CONFIG_PID_NS时,pid namespace将只有一个level,所以ns->level都会是0,所以此时只有有一次循环,此时将只会设置pid numbers[0],0 index即是全局的pid,在整个系统中唯一;
如果开启了CONFIG_PID_NS,此时ns->level将有可能不是0,此时pid->members[0]是全局的upid,其它pid->numbers[1]则是numbers[0]的child namespace,pid->numbers[2]等依次类推。
alloc_pid()的参数ns在没有开启CONFIG_PID_NS的情况下,都是一样的,即指向init_pid_ns
设置完pid struct后,调用idr_replace将此pid struct和alloc的pid作为一对mapping值保存起来:
struct pid *alloc_pid(struct pid_namespace *ns) { struct pid *pid; enum pid_type type; int i, nr; struct pid_namespace *tmp; struct upid *upid; int retval = -ENOMEM; pid = kmem_cache_alloc(ns->pid_cachep, GFP_KERNEL); if (!pid) return ERR_PTR(retval); tmp = ns; pid->level = ns->level; for (i = ns->level; i >= 0; i--) { int pid_min = 1; idr_preload(GFP_KERNEL); spin_lock_irq(&pidmap_lock); /* * init really needs pid 1, but after reaching the maximum * wrap back to RESERVED_PIDS */ if (idr_get_cursor(&tmp->idr) > RESERVED_PIDS) pid_min = RESERVED_PIDS; /* * Store a null pointer so find_pid_ns does not find * a partially initialized PID (see below). */ nr = idr_alloc_cyclic(&tmp->idr, NULL, pid_min, pid_max, GFP_ATOMIC); spin_unlock_irq(&pidmap_lock); idr_preload_end(); if (nr < 0) { retval = (nr == -ENOSPC) ? -EAGAIN : nr; goto out_free; } pid->numbers[i].nr = nr; pid->numbers[i].ns = tmp; tmp = tmp->parent; } if (unlikely(is_child_reaper(pid))) { if (pid_ns_prepare_proc(ns)) goto out_free; } get_pid_ns(ns); atomic_set(&pid->count, 1); for (type = 0; type < PIDTYPE_MAX; ++type) INIT_HLIST_HEAD(&pid->tasks[type]); upid = pid->numbers + ns->level; spin_lock_irq(&pidmap_lock); if (!(ns->pid_allocated & PIDNS_ADDING)) goto out_unlock; for ( ; upid >= pid->numbers; --upid) { /* Make the PID visible to find_pid_ns. */ idr_replace(&upid->ns->idr, pid, upid->nr); upid->ns->pid_allocated++; } spin_unlock_irq(&pidmap_lock); return pid;
alloc_pid()后,会设置当前fork的task_struct的pid成员,此pid成员就是当前fork出的线程的pid,这个pid数值即是上面alloc_pid()里分配的pid结构体里的numbers[0].nr,即系统全局的线程的pid,具有唯一性
static inline pid_t pid_nr(struct pid *pid) { pid_t nr = 0; if (pid) nr = pid->numbers[0].nr; return nr; }
接下来则会将当前fork的task_struct和上面alloc的pid struct关联起来。如果当前fork的线程是进程的主线程(thread group leader),则会将主线程链接到上面alloc给它的struct pid的tasks[PIDTYPE_PID] & tasks[PIDTYPE_TGID] hash list上,以及将它链接到其父进程所链接到的tasks[PGID]和tasks[PIDTYPE_SID] hash list上;
如果不是主线程,则只会将此task_struct插入上面给它alloc的pid struct的tasks[PIDTYPE_PID] hash list。
copy_process() init_task_pid_links(p); if (likely(p->pid)) { ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace); init_task_pid(p, PIDTYPE_PID, pid); if (thread_group_leader(p)) { init_task_pid(p, PIDTYPE_TGID, pid); init_task_pid(p, PIDTYPE_PGID, task_pgrp(current)); init_task_pid(p, PIDTYPE_SID, task_session(current)); if (is_child_reaper(pid)) { ns_of_pid(pid)->child_reaper = p; p->signal->flags |= SIGNAL_UNKILLABLE; } p->signal->shared_pending.signal = delayed.signal; p->signal->tty = tty_kref_get(current->signal->tty); /* * Inherit has_child_subreaper flag under the same * tasklist_lock with adding child to the process tree * for propagate_has_child_subreaper optimization. */ p->signal->has_child_subreaper = p->real_parent->signal->has_child_subreaper || p->real_parent->signal->is_child_subreaper; list_add_tail(&p->sibling, &p->real_parent->children); list_add_tail_rcu(&p->tasks, &init_task.tasks); attach_pid(p, PIDTYPE_TGID); attach_pid(p, PIDTYPE_PGID); attach_pid(p, PIDTYPE_SID); __this_cpu_inc(process_counts); } else { current->signal->nr_threads++; atomic_inc(¤t->signal->live); atomic_inc(¤t->signal->sigcnt); task_join_group_stop(p); list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group); list_add_tail_rcu(&p->thread_node, &p->signal->thread_head); } attach_pid(p, PIDTYPE_PID); nr_threads++; }
setpgid创建进程组或者迁移某个进程到另外一个进程组
1. setpgid创建新的进程组
此时setpgid(pid_t pid, pid_t pgid) pid参数和pgid参数要相等,并且此pid要是thread group leader,比如user space调用setpgid(getpid(), getpid())或者setpgid(0, 0)或者setpgid(getpid(), 0)。此后此进程将脱离其父进程所在的进程组,自己创建了一个独立的进程组。
2. setpgid()迁移一个进程到另外的进程组
此时pgid参数不能为0,setpgid()的pgid参数是另外一个进程组的组长进程的pid,同时要迁移的进程所在的进程组和要迁往的进程组要在同一个session里,此后要迁移的进程将迁入目标进程组,其task_struct将链接到目标进程组组长进程的pid struct的tasks[PIDTYPE_PGID] hash list
SYSCALL_DEFINE2(setpgid, pid_t, pid, pid_t, pgid) { struct task_struct *p; struct task_struct *group_leader = current->group_leader; struct pid *pgrp; int err; if (!pid) pid = task_pid_vnr(group_leader); if (!pgid) pgid = pid; if (pgid < 0) return -EINVAL; rcu_read_lock(); /* From this point forward we keep holding onto the tasklist lock * so that our parent does not change from under us. -DaveM */ write_lock_irq(&tasklist_lock); err = -ESRCH; p = find_task_by_vpid(pid); if (!p) goto out; err = -EINVAL; if (!thread_group_leader(p)) goto out; if (same_thread_group(p->real_parent, group_leader)) { err = -EPERM; if (task_session(p) != task_session(group_leader)) goto out; err = -EACCES; if (!(p->flags & PF_FORKNOEXEC)) goto out; } else { err = -ESRCH; if (p != group_leader) goto out; } err = -EPERM; if (p->signal->leader) goto out; pgrp = task_pid(p); if (pgid != pid) { struct task_struct *g; pgrp = find_vpid(pgid); g = pid_task(pgrp, PIDTYPE_PGID); if (!g || task_session(g) != task_session(group_leader)) goto out; } err = security_task_setpgid(p, pgid); if (err) goto out; if (task_pgrp(p) != pgrp) change_pid(p, PIDTYPE_PGID, pgrp); err = 0; out: /* All paths lead to here, thus we are safe. -DaveM */ write_unlock_irq(&tasklist_lock); rcu_read_unlock(); return err; }
进程的task_struct所插入的pid struct tasks[] hash list
1. 如果进程没有调用setpgid系统调用,并且其父进程也没有执行此系统调用,则其链接关系如下图,task_struct通过其pid_links[PIDTYPE_PID]/[PIDTYPE_TGID]插入它自己的struct pid的tasks[PIDTYPE_PID]/[PIDTYPE_TGID] hash list,其它pid_links[PIDTYPE_PGID]/[PIDTYPE_SID]应该是插入了init_struct_pid的tasks[PIDTYPE_PGID]/[PIDTYPE_SID] hash list:
2. 如果进程有执行setpgid创建了进程组,则pid_links[]的链接关系如下图。
Struct pid是某个进程fork时分配的,后面通过setpgid(0,0)创建一个进程组,首先将自己的task_struct通过pid_links[PIDTYPE_PGID]链接到自己pid struct的tasks[PIDTYPE_PGID] hash list上。后面此进程创建子进程时子进程也都会类似这样将其task_struct链入此pid struct的tasks[PIDTYPE_PGID] hash list上,这样同一个进程组中的所有进程将会被链接到组长进程的pid struct的tasks[PIDTYPE_PGID] hash list上:
(1)进程组struct pid tasks[] hash list链接关系
* 进程组中的成员进程是以进程的主线程的task_struct/struct pid来表示
从上述两图可以看出,对于主线程,线程自己的pid struct里的tasks[PIDTYPE_PID]/[PIDTYPE_TGID] hash list长度只有1,即只有一个list node,即为自己本身的task_struct.pid_links[PIDTYPE_PID]/[PIDTYPE_TGID]。
3. 非主线程的struct pid.tasks[] hash list链接关系
如果是非主线程,则只会用到一个hash list,即tasks[PIDTYPE_PID] hash list,并且此hash list也只有一个node,即此非主线程的task_struct.pid_links[PIDTYPE_PID],同事没有和所在进程内的其它线程以及其它进程有链接关系,所以非主线程的struct pid.tasks[]链接关系很简单
注意:
1. 不管是主线程还是非主线程,如果属于user space的,均会给它alloc一个struct pid;
2. 不管是主线程和非主线程,因为struct pid.task[PIDTYPE_PID] hash list上只有一个node,所以find_task_by_vpid()在tasks[PIDTYPE_PID] hash list上取第一个node就得到了pid_t对应的task_struct
CONFIG_PID_NS开启条件下的多级pid_namespace
上述level 1是level 2的parent;level 0是level 1的parent.
一个level 2的线程fork时,会从level 2开始alloc pid,一直到level 0,所以这里它会alloc 3个pid,即会alloc3个pid namespace的pid number。
level 0是全局的,在通过pid_nr()设置task_struct pid_t成员时,其就是取的level 0 pid_namespace的pid number。
常用pid struct相关API
-
static inline pid_t task_pid_vnr(struct task_struct *tsk):根据task_struct得到对应的pid
-
struct task_struct *find_task_by_vpid(pid_t vnr):根据pid num得到对应的task_struct