转自:https://www.cnblogs.com/arnoldlu/p/11160510.html
应用程序在运行过程中由于各种异常或者bug导致退出,在满足一定条件下产生一个core文件。
通常core文件包含了程序运行时内存、寄存器状态、堆栈指针、内存管理信息以及函数调用堆栈信息。
core就是程序当前工作转改存储生成的一个文件,通过工具分析这个文件,可以定位到程序异常退出的时候对应的堆栈调用等信息,找出问题点并解决。
1. 配置coredump
如果需要使用需要通过ulimit进行设置,可以通过ulimit -c查看当前系统是否支持coredump。如果为0,则表示coredump被关闭。
通过ulimit -c unlimited可以打开coredump。
coredump文件默认存储位置与可执行文件在同一目录下,文件名为core。
可以通过/proc/sys/kernel/core_pattern进行设置。
%p 出Core进程的PID
%u 出Core进程的UID
%s 造成Core的signal号
%t 出Core的时间,从1970-01-0100:00:00开始的秒数
%e 出Core进程对应的可执行文件名
通过echo "core-%e-%p-%s-%t" > /proc/sys/kernel/core_pattern。
在每个进程下都有coredump_filter节点/proc/<pid>/coredump_filter。
通过配置coredump_filter可以选择需在coredump的时候,将哪些内容dump到core文件中。
- (bit 0) anonymous private memory
- (bit 1) anonymous shared memory
- (bit 2) file-backed private memory
- (bit 3) file-backed shared memory
- (bit 4) ELF header pages in file-backed private memory areas (it is effective only if the bit 2 is cleared)
- (bit 5) hugetlb private memory
- (bit 6) hugetlb shared memory
- (bit 7) DAX private memory
- (bit 8) DAX shared memory
coredump_filter的默认值是0x33,也即发生coredump时会将所有anonymous内存、ELF头页面、hugetlb private memory内容保存。
coredump_filter可以被子进程继承,可以echo 0xXX > /proc/self/coredump_filter设置当前进程的coredump_filter。
static ssize_t proc_coredump_filter_write(struct file *file,
const char __user *buf,
size_t count,
loff_t *ppos)
{
...
ret = kstrtouint_from_user(buf, count, 0, &val);-------------------------将buf转换成val值。
if (ret < 0)
return ret;
...
for (i = 0, mask = 1; i < MMF_DUMP_FILTER_BITS; i++, mask <<= 1) {
if (val & mask)
set_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags);------------------将coredump_filter的值映射到mm->flags上,后续coredump时使用。
else
clear_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags);
}
...
}
其中MMF_DUMP_FILTER_SHIFT为2,所以flags和coredump_filter存在如下对应关系。
#define MMF_DUMP_ANON_PRIVATE 2
#define MMF_DUMP_ANON_SHARED 3
#define MMF_DUMP_MAPPED_PRIVATE 4
#define MMF_DUMP_MAPPED_SHARED 5
#define MMF_DUMP_ELF_HEADERS 6
#define MMF_DUMP_HUGETLB_PRIVATE 7
#define MMF_DUMP_HUGETLB_SHARED 8
#define MMF_DUMP_DAX_PRIVATE 9
#define MMF_DUMP_DAX_SHARED 10
2. coredump原理
在do_signal()中根据信号判断是否触发coredump,当然还跟coredump limit、mm->flags等等相关。
满足coredump条件后,由do_coredump()进行coredump文件生成,核心是由binfmt->core_dump()进行的。
2.1 触发coredump的条件?
在内核返回用户空间的时候,会调用do_signal()处理信号。
static void do_signal(struct pt_regs *regs, int syscall)
{
unsigned int retval = 0, continue_addr = 0, restart_addr = 0;
struct ksignal ksig;
...
if (get_signal(&ksig)) {
...
}
...
}
int get_signal(struct ksignal *ksig)
{
...
for (;;) {
struct k_sigaction *ka;
...
signr = dequeue_signal(current, ¤t->blocked, &ksig->info);
...
/* Trace actually delivered signals. */
trace_signal_deliver(signr, &ksig->info, ka);
...
if (sig_kernel_coredump(signr)) {
if (print_fatal_signals)------------------------------可以通过kernel.print-fatal-signals = 1进行设置,对应的节点是/proc/sys/kernel/print-fatal-signals。
print_fatal_signal(ksig->info.si_signo);----------打印当前信号及当前场景的栈信息。
proc_coredump_connector(current);
do_coredump(&ksig->info);
}
...
}
spin_unlock_irq(&sighand->siglock);
ksig->sig = signr;
return ksig->sig > 0;
}
#define sig_kernel_coredump(sig) siginmask(sig, SIG_KERNEL_COREDUMP_MASK)
#define SIG_KERNEL_COREDUMP_MASK (
rt_sigmask(SIGQUIT) | rt_sigmask(SIGILL) |
rt_sigmask(SIGTRAP) | rt_sigmask(SIGABRT) |
rt_sigmask(SIGFPE) | rt_sigmask(SIGSEGV) |
rt_sigmask(SIGBUS) | rt_sigmask(SIGSYS) |
rt_sigmask(SIGXCPU) | rt_sigmask(SIGXFSZ) |
SIGEMT_MASK )
在get_signal()中,判断信号是否会导致coredump。这些信号包括SIGQUIT、SIGILL、SIGTRAP、SIGABRT、SIGFPE、SIGSEGV、SIGBUS、SIGSYS、SIGXCPU、SIGXFSZ。
“终止w/core”表示在进程当前工作目录的core文件中复制了该进程的存储图像(该文件名为core,由此可以看出这种功能很久之前就是UNIX功能的一部分)。
void proc_coredump_connector(struct task_struct *task)
{
struct cn_msg *msg;
struct proc_event *ev;
__u8 buffer[CN_PROC_MSG_SIZE] __aligned(8);
if (atomic_read(&proc_event_num_listeners) < 1)
return;
msg = buffer_to_cn_msg(buffer);
ev = (struct proc_event *)msg->data;
memset(&ev->event_data, 0, sizeof(ev->event_data));
ev->timestamp_ns = ktime_get_ns();
ev->what = PROC_EVENT_COREDUMP;
ev->event_data.coredump.process_pid = task->pid;
ev->event_data.coredump.process_tgid = task->tgid;
memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id));
msg->ack = 0; /* not used */
msg->len = sizeof(*ev);
msg->flags = 0; /* not used */
send_msg(msg);
}
2.2 coredump如何生成?
void do_coredump(const siginfo_t *siginfo)
{
struct core_state core_state;
struct core_name cn;
struct mm_struct *mm = current->mm;
struct linux_binfmt * binfmt;
const struct cred *old_cred;
struct cred *cred;
int retval = 0;
int ispipe;
struct files_struct *displaced;
/* require nonrelative corefile path and be extra careful */
bool need_suid_safe = false;
bool core_dumped = false;
static atomic_t core_dump_count = ATOMIC_INIT(0);
struct coredump_params cprm = {
.siginfo = siginfo,
.regs = signal_pt_regs(),
.limit = rlimit(RLIMIT_CORE),-----------------------------------获取系统对于coredump的限制。
/*
* We must use the same mm->flags while dumping core to avoid
* inconsistency of bit flags, since this flag is not protected
* by any locks.
*/
.mm_flags = mm->flags,
};
audit_core_dumps(siginfo->si_signo);
binfmt = mm->binfmt;------------------------------------------------获取当前进程所使用的程序加载器。
if (!binfmt || !binfmt->core_dump)
goto fail;
if (!__get_dumpable(cprm.mm_flags))---------------------------------从当前进程的mm->flags中取低两位判断是否可以coredump,SUID_DUMP_DISABLE(0)不可以,其他情况都可以。
goto fail;
cred = prepare_creds();
if (!cred)
goto fail;
/*
* We cannot trust fsuid as being the "true" uid of the process
* nor do we know its entire history. We only know it was tainted
* so we dump it as root in mode 2, and only into a controlled
* environment (pipe handler or fully qualified path).
*/
if (__get_dumpable(cprm.mm_flags) == SUID_DUMP_ROOT) {--------------区分SUID_DUMP_USER和SUID_DUMP_ROOT。
/* Setuid core dump mode */
cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */
need_suid_safe = true;
}
retval = coredump_wait(siginfo->si_signo, &core_state);
if (retval < 0)
goto fail_creds;
old_cred = override_creds(cred);
ispipe = format_corename(&cn, &cprm);-------------------------------根据core_pattern判断是否是ispipe,然后根据core_pattern的设置生成coredump文件名称。
if (ispipe) {-------------------------------------------------------通过管道处理coredump信息。
int dump_count;
char **helper_argv;
struct subprocess_info *sub_info;
if (ispipe < 0) {
printk(KERN_WARNING "format_corename failed
");
printk(KERN_WARNING "Aborting core
");
goto fail_unlock;
}
if (cprm.limit == 1) {
printk(KERN_WARNING
"Process %d(%s) has RLIMIT_CORE set to 1
",
task_tgid_vnr(current), current->comm);
printk(KERN_WARNING "Aborting core
");
goto fail_unlock;
}
cprm.limit = RLIM_INFINITY;
dump_count = atomic_inc_return(&core_dump_count);
if (core_pipe_limit && (core_pipe_limit < dump_count)) {
printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit
",
task_tgid_vnr(current), current->comm);
printk(KERN_WARNING "Skipping core dump
");
goto fail_dropcount;
}
helper_argv = argv_split(GFP_KERNEL, cn.corename, NULL);----------将cn.corename参数进行拆分。
if (!helper_argv) {
printk(KERN_WARNING "%s failed to allocate memory
",
__func__);
goto fail_dropcount;
}
retval = -ENOMEM;
sub_info = call_usermodehelper_setup(helper_argv[0],
helper_argv, NULL, GFP_KERNEL,
umh_pipe_setup, NULL, &cprm);---------------------通过usermodehelper调用用户空间的helper_argv[0]程序进行core_pattern。
if (sub_info)
retval = call_usermodehelper_exec(sub_info,
UMH_WAIT_EXEC);-----------------------------UMH_WAIT_EXEC表示在内核exec用户空间程序之后就退出,此时用户空间程序就通过pipe等待接收数据。
argv_free(helper_argv);
if (retval) {
printk(KERN_INFO "Core dump to |%s pipe failed
",
cn.corename);
goto close_fail;
}
} else {
struct inode *inode;
int open_flags = O_CREAT | O_RDWR | O_NOFOLLOW |
O_LARGEFILE | O_EXCL;
if (cprm.limit < binfmt->min_coredump)
goto fail_unlock;
if (need_suid_safe && cn.corename[0] != '/') {
printk(KERN_WARNING "Pid %d(%s) can only dump core "
"to fully qualified path!
",
task_tgid_vnr(current), current->comm);
printk(KERN_WARNING "Skipping core dump
");
goto fail_unlock;
}
if (!need_suid_safe) {
mm_segment_t old_fs;
old_fs = get_fs();
set_fs(KERNEL_DS);
/*
* If it doesn't exist, that's fine. If there's some
* other problem, we'll catch it at the filp_open().
*/
(void) sys_unlink((const char __user *)cn.corename);
set_fs(old_fs);
}
if (need_suid_safe) {---------------------------------------------创建coredump文件。
struct path root;
task_lock(&init_task);
get_fs_root(init_task.fs, &root);
task_unlock(&init_task);
cprm.file = file_open_root(root.dentry, root.mnt,
cn.corename, open_flags, 0600);
path_put(&root);
} else {
cprm.file = filp_open(cn.corename, open_flags, 0600);
}
if (IS_ERR(cprm.file))
goto fail_unlock;
inode = file_inode(cprm.file);
if (inode->i_nlink > 1)------------------------------------------coredummp文件不能有多个硬链接。
goto close_fail;
if (d_unhashed(cprm.file->f_path.dentry))
goto close_fail;
if (!S_ISREG(inode->i_mode))--------------------------------------coredump文件必须为普通文件。