参考:
2. Linux下2号进程的kthreadd--Linux进程的管理与调度(七)
本文中代码内核版本:3.2.0
kthreadd:这种内核线程只有一个,它的作用是管理调度其它的内核线程。这个线程不能关闭。它在内核初始化的时候被创建,会循环运行一个叫做kthreadd的函数,该函数的作用是运行kthread_create_list全局链表中维护的kthread。其他任务或代码想创建内核线程时需要调用kthread_create(或kthread_create_on_node)创建一个kthread,该kthread会被加入到kthread_create_list链表中,同时kthread_create会weak up kthreadd_task(即kthreadd)(增链表)。kthreadd再执行kthread时会调用老的接口——kernel_thread运行一个名叫“kthread”的内核线程去运行创建的kthread,被执行过的kthread会从kthread_create_list链表中删除(减链表),并且kthreadd会不断调用scheduler 让出CPU。kthreadd创建的kthread执行完后,会调到kthread_create()执行,之后再执行最初原任务或代码。
创建
在linux启动的C阶段start_kernel()的最后,rest_init()会开启两个进程:kernel_init,kthreadd,之后主线程变成idle线程,init/main.c。
linux下的3个特殊的进程:idle进程(PID=0),init进程(PID=1)和kthreadd(PID=2)。
* idle进程由系统自动创建, 运行在内核态 PID=0
idle进程其pid=0,其前身是系统创建的第一个进程,也是唯一一个没有通过fork或者kernel_thread产生的进程。完成加载系统后,演变为进程调度、交换。
* init进程由idle通过kernel_thread创建,在内核空间完成初始化后, 加载init程序, 并最终用户空间运行 PID=1 PPID=0
由0进程创建,完成系统的初始化. 是系统中所有其它用户进程的祖先进程 。
Linux中的所有进程都是有init进程创建并运行的。首先Linux内核启动,然后在用户空间中启动init进程,再启动其他系统进程。在系统启动完成完成后,init将变为守护进程监视系统其他进程。
* kthreadd进程由idle通过kernel_thread创建,并始终运行在内核空间, 负责所有内核线程的调度和管理 PID=2 PPID=0
它的任务就是管理和调度其他内核线程kernel_thread, 会循环执行一个kthreadd的函数,该函数的作用就是运行kthread_create_list全局链表中维护的kthread, 当我们调用kthread_create创建的内核线程会被加入到此链表中,因此所有的内核线程都是直接或者间接的以kthreadd为父进程。所有的内核线程的PPID都是2。
注:所有的内核线程在大部分时间里都处于阻塞状态(TASK_INTERRUPTIBLE)只有在系统满足进程需要的某种资源的情况下才会运行。
/*
* We need to finalize in a non-__init function, or else race conditions
* between the root thread and the init thread may cause start_kernel to
* be reaped by free_initmem before the root thread has proceeded to
* cpu_idle.
*
* gcc-3.4 accidentally inlines this function, so use noinline.
*/
static __initdata DECLARE_COMPLETION(kthreadd_done); static noinline void __init_refok rest_init(void) { int pid; rcu_scheduler_starting(); /* * We need to spawn init first so that it obtains pid 1, however * the init task will end up wanting to create kthreads, which, if * we schedule it before we create kthreadd, will OOPS. */ kernel_thread(kernel_init, NULL, CLONE_FS | CLONE_SIGHAND); numa_default_policy(); pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES); rcu_read_lock(); kthreadd_task = find_task_by_pid_ns(pid, &init_pid_ns); rcu_read_unlock(); complete(&kthreadd_done); /* * The boot idle thread must execute schedule() * at least once to get things moving: */ init_idle_bootup_task(current); preempt_enable_no_resched(); schedule(); /* Call into cpu_idle with preempt disabled */ preempt_disable(); cpu_idle(); }
kthreadd任务
函数体定义在kernel/kthread.c中。
static DEFINE_SPINLOCK(kthread_create_lock);
static LIST_HEAD(kthread_create_list);
struct task_struct *kthreadd_task;
struct kthread_create_info { /* Information passed to kthread() from kthreadd. */ int (*threadfn)(void *data); void *data; int node; /* Result passed back to kthread_create() from kthreadd. */ struct task_struct *result; struct completion done; struct list_head list; }; struct kthread { int should_stop; void *data; struct completion exited; };
int kthreadd(void *unused) { struct task_struct *tsk = current; /* Setup a clean context for our children to inherit. */ set_task_comm(tsk, "kthreadd"); ignore_signals(tsk); set_cpus_allowed_ptr(tsk, cpu_all_mask); set_mems_allowed(node_states[N_HIGH_MEMORY]); current->flags |= PF_NOFREEZE | PF_FREEZER_NOSIG; for (;;) { set_current_state(TASK_INTERRUPTIBLE); if (list_empty(&kthread_create_list)) schedule(); __set_current_state(TASK_RUNNING); spin_lock(&kthread_create_lock); while (!list_empty(&kthread_create_list)) { struct kthread_create_info *create; create = list_entry(kthread_create_list.next, struct kthread_create_info, list); list_del_init(&create->list); spin_unlock(&kthread_create_lock); create_kthread(create); spin_lock(&kthread_create_lock); } spin_unlock(&kthread_create_lock); } return 0; }
static void create_kthread(struct kthread_create_info *create) { int pid; #ifdef CONFIG_NUMA current->pref_node_fork = create->node; #endif /* We want our own signal handler (we take no signals by default). */ pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD); if (pid < 0) { create->result = ERR_PTR(pid); complete(&create->done); } }
kthread任务
static int kthread(void *_create) { /* Copy data: it's on kthread's stack */ struct kthread_create_info *create = _create; int (*threadfn)(void *data) = create->threadfn; void *data = create->data; struct kthread self; int ret; self.should_stop = 0; self.data = data; init_completion(&self.exited); current->vfork_done = &self.exited; /* OK, tell user we're spawned, wait for stop or wakeup */ __set_current_state(TASK_UNINTERRUPTIBLE); create->result = current; complete(&create->done); schedule(); ret = -EINTR; if (!self.should_stop) ret = threadfn(data); /* we can't just return, we must preserve "self" on stack */ do_exit(ret); }
/** * kthread_create_on_node - create a kthread. * @threadfn: the function to run until signal_pending(current). * @data: data ptr for @threadfn. * @node: memory node number. * @namefmt: printf-style name for the thread. * * Description: This helper function creates and names a kernel * thread. The thread will be stopped: use wake_up_process() to start * it. See also kthread_run(). * * If thread is going to be bound on a particular cpu, give its node * in @node, to get NUMA affinity for kthread stack, or else give -1. * When woken, the thread will run @threadfn() with @data as its * argument. @threadfn() can either call do_exit() directly if it is a * standalone thread for which no one will call kthread_stop(), or * return when 'kthread_should_stop()' is true (which means * kthread_stop() has been called). The return value should be zero * or a negative error number; it will be passed to kthread_stop(). * * Returns a task_struct or ERR_PTR(-ENOMEM). */ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data), void *data, int node, const char namefmt[], ...) { struct kthread_create_info create; create.threadfn = threadfn; create.data = data; create.node = node; init_completion(&create.done); spin_lock(&kthread_create_lock); list_add_tail(&create.list, &kthread_create_list); spin_unlock(&kthread_create_lock); wake_up_process(kthreadd_task); wait_for_completion(&create.done); if (!IS_ERR(create.result)) { static const struct sched_param param = { .sched_priority = 0 }; va_list args; va_start(args, namefmt); vsnprintf(create.result->comm, sizeof(create.result->comm), namefmt, args); va_end(args); /* * root may have changed our (kthreadd's) priority or CPU mask. * The kernel thread should not inherit these properties. */ sched_setscheduler_nocheck(create.result, SCHED_NORMAL, ¶m); set_cpus_allowed_ptr(create.result, cpu_all_mask); } return create.result; } EXPORT_SYMBOL(kthread_create_on_node);
kernel/kthread.c的头文件include/linux/kthread.h定义kthread_create():
#define kthread_create(threadfn, data, namefmt, arg...)
kthread_create_on_node(threadfn, data, -1, namefmt, ##arg)