crash> bt
PID: 410629 TASK: ffff883fea379fa0 CPU: 10 COMMAND: "jupyter-lab"
#0 [ffff8823ca927828] machine_kexec at ffffffff8105c4cb
#1 [ffff8823ca927888] __crash_kexec at ffffffff81104a32
#2 [ffff8823ca927958] crash_kexec at ffffffff81104b20
#3 [ffff8823ca927970] oops_end at ffffffff816ad278
#4 [ffff8823ca927998] no_context at ffffffff8169d29a
#5 [ffff8823ca9279e8] __bad_area_nosemaphore at ffffffff8169d330
#6 [ffff8823ca927a30] bad_area_nosemaphore at ffffffff8169d49a
#7 [ffff8823ca927a40] __do_page_fault at ffffffff816b013e
#8 [ffff8823ca927aa0] do_page_fault at ffffffff816b02e5
#9 [ffff8823ca927ad0] page_fault at ffffffff816ac508
[exception RIP: ida_remove+23]
RIP: ffffffff81323c87 RSP: ffff8823ca927b80 RFLAGS: 00010246
RAX: ffff883fea379fa0 RBX: 0000000000000000 RCX: ffff8823ca927fd8
RDX: 8888888888888889 RSI: 0000000000000003 RDI: 0000000000000000
RBP: ffff8823ca927b90 R8: 000000000000000a R9: 0000000000000003
R10: 000000000001f9e2 R11: ffff8823ca9278ee R12: 0000000000000000
R13: 0000000000000003 R14: ffff880613365720 R15: 0000000000000003
ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018
#10 [ffff8823ca927b98] devpts_kill_index at ffffffff8128bc09
#11 [ffff8823ca927bb8] pty_unix98_shutdown at ffffffff813fcece
#12 [ffff8823ca927bd0] release_tty at ffffffff813f20f7
#13 [ffff8823ca927be8] tty_init_dev at ffffffff813f4de7
#14 [ffff8823ca927c18] ptmx_open at ffffffff813fd2a5
#15 [ffff8823ca927c58] chrdev_open at ffffffff81206062
#16 [ffff8823ca927ca0] do_dentry_open at ffffffff811fe667
#17 [ffff8823ca927ce8] vfs_open at ffffffff811fe83a
#18 [ffff8823ca927d10] do_last at ffffffff8120f88d
#19 [ffff8823ca927db0] path_openat at ffffffff81210c91
#20 [ffff8823ca927e48] do_filp_open at ffffffff81212f4b
#21 [ffff8823ca927f18] do_sys_open at ffffffff811ffc03
#22 [ffff8823ca927f70] sys_open at ffffffff811ffd1e
#23 [ffff8823ca927f80] system_call_fastpath at ffffffff816b4fc9
RIP: 00007fc77358405d RSP: 00007ffe40981398 RFLAGS: 00010206
RAX: 0000000000000002 RBX: ffffffff816b4fc9 RCX: 0000000000000000
根据代码行号,反汇编可以看到
void ida_remove(struct ida *ida, int id)
{
struct idr_layer *p = ida->idr.top;
int shift = (ida->idr.layers - 1) * IDR_BITS;---------出错
根据调用链:
void devpts_kill_index(struct pts_fs_info *fsi, int idx)
{
mutex_lock(&allocated_ptys_lock);
ida_remove(&fsi->allocated_ptys, idx);----------确定传入了空指针
pty_count--;
mutex_unlock(&allocated_ptys_lock);
}
表面上看,取地址不应该出现空指针,但由于gcc的优化,这个取地址动作其实是作为寄存器传值。
pty_unix98_shutdown-->devpts_kill_index 的调用链,确定:
static void pty_unix98_shutdown(struct tty_struct *tty)
{
struct pts_fs_info *fsi;
if (tty->driver->subtype == PTY_TYPE_MASTER)
fsi = tty->driver_data;
else
fsi = tty->link->driver_data;
devpts_kill_index(fsi, tty->index);-----可以确定fsi为空指针,并没有保护fsi
走查代码,确定 tty_init_dev 在申请内存异常之后,进入 release_tty 的流程,此时 tty->driver_data 和 tty->link→driver_data 并未赋值,此时在释放流程
流程中,并没有保护这个指针,所以出现crash。
这个问题在c1e33af1ed552258405f2e5a72509af039c0441c 进行了代码重构,但并没有修复。
在5353ed8deedee9e5acb9f896e9032158f5d998de 修复好,将 pty_unix98_remove 中的指针加了保护。
修改后的代码如下:
static void pty_unix98_remove(struct tty_driver *driver, struct tty_struct *tty)
{
struct pts_fs_info *fsi;
if (tty->driver->subtype == PTY_TYPE_MASTER)
fsi = tty->driver_data;
else
fsi = tty->link->driver_data;
if (fsi) {-----------保护动作
devpts_kill_index(fsi, tty->index);
devpts_release(fsi);
}
}