最近看的一个问题,消息队列可以创建,但是不能获取属性,也不能发消息,返回错误为:EBADF Bad file descriptor
经过打点,确认走入了这个流程:
SYSCALL_DEFINE3(mq_getsetattr, mqd_t, mqdes, const struct mq_attr __user *, u_mqstat, struct mq_attr __user *, u_omqstat) { int ret; struct mq_attr mqstat, omqstat; struct fd f; struct inode *inode; struct mqueue_inode_info *info; if (u_mqstat != NULL) { if (copy_from_user(&mqstat, u_mqstat, sizeof(struct mq_attr))) return -EFAULT; if (mqstat.mq_flags & (~O_NONBLOCK)) return -EINVAL; } f = fdget(mqdes); if (!f.file) { ret = -EBADF; goto out; } inode = file_inode(f.file); if (unlikely(f.file->f_op != &mqueue_file_operations)) {--------走的这个分支,导致错误 ret = -EBADF; goto out_fput; }
这个分支判断按道理也没什么问题,因为既然是消息队列,那么file->f_op就应该是 mqueue_file_operations 。
static int do_dentry_open(struct file *f, int (*open)(struct inode *, struct file *), const struct cred *cred) { 。。。。。 f->f_op = fops_get(inode->i_fop); }
根据alloc_inode的流程:
static struct inode *alloc_inode(struct super_block *sb) { 。。。。 if (unlikely(inode_init_always(sb, inode))) { 。。。。 } int inode_init_always(struct super_block *sb, struct inode *inode) { ... inode->i_op = &empty_iops;----------初始化默认值 inode->i_fop = &no_open_fops;-------初始化的默认值 ... }
正常情况下在mq_open-->mqueue_create-->mqueue_get_inode 中会将
inode->i_fop = &mqueue_file_operations;
这样在 do_dentry_open 的时候,会将对应的 f->f_op = fops_get(inode->i_fop);
也就是正常的file.f_op为:
struct file.f_op ffff883f53079500 f_op = 0xffffffff81690fa0 <mqueue_file_operations>
mqueue_file_operations = $1 = {
owner = 0x0,
llseek = 0xffffffff811e00d0 <default_llseek>,
read = 0xffffffff81278220 <mqueue_read_file>,
write = 0x0,
aio_read = 0x0,
aio_write = 0x0,
readdir = 0x0,
poll = 0xffffffff812781a0 <mqueue_poll_file>,
unlocked_ioctl = 0x0,
compat_ioctl = 0x0,
mmap = 0x0,
open = 0x0,
flush = 0xffffffff812790f0 <mqueue_flush_file>,
release = 0x0,
fsync = 0x0,
aio_fsync = 0x0,
fasync = 0x0,
lock = 0x0,
sendpage = 0x0,
get_unmapped_area = 0x0,
check_flags = 0x0,
flock = 0x0,
splice_write = 0x0,
splice_read = 0x0,
{
setlease = 0x0,
__UNIQUE_ID_rh_kabi_hide5 = {
setlease = 0x0
},
{<No data fields>}
},
fallocate = 0x0,
show_fdinfo = 0x0
}
而异常情况下的file.f_op为:
struct file.f_op 0xffff883e8d075b00 f_op = 0xffff883ba95251b8
struct file_operations {
owner = 0x0,
llseek = 0xffffffff811e00d0 <default_llseek>,
read = 0xffffffff81278220 <mqueue_read_file>,
write = 0xffffffffa04e97f0 <rfs_write>,--------------redirfs模块修改的
aio_read = 0x0,
aio_write = 0x0,
readdir = 0x0,
poll = 0xffffffff812781a0 <mqueue_poll_file>,
unlocked_ioctl = 0x0,
compat_ioctl = 0x0,
mmap = 0xffffffffa04e9380 <rfs_mmap>,---------------redirfs模块修改的
open = 0xffffffffa04ea3a0 <rfs_open>,---------------redirfs模块修改的
flush = 0xffffffff812790f0 <mqueue_flush_file>,
release = 0xffffffffa04e9d50 <rfs_release>,---------redirfs模块修改的
fsync = 0x0,
aio_fsync = 0x0,
fasync = 0x0,
lock = 0x0,
sendpage = 0x0,
get_unmapped_area = 0x0,
check_flags = 0x0,
flock = 0x0,
splice_write = 0x0,
splice_read = 0x0,
{
setlease = 0x0,
__UNIQUE_ID_rh_kabi_hide5 = {
setlease = 0x0
},
{<No data fields>}
},
fallocate = 0x0,
show_fdinfo = 0x0
}
而修改这个的模块为redirfs,是趋势科技的一个内核模块,看起来是用来给他们的filter服务的,普通的文件打开时,只是检查file.f_op不为NULL,但是msgqueue则多了一项检查,导致检查不通过。
lsmod |grep -i redirfs redirfs 79430 1 gsch lsmod |grep -i gsch gsch 93171 8 redirfs 79430 1 gsch locate redirfs /opt/ds_agent/3.10.0-229.11.1.el7.x86_64/redirfs.ko
虽然这个问题,趋势可以使用最暴力的直接修改sys_call_table 的方式来规避这个问题:
0xffffffffa0514cf0 <gsch_write_hook_fn> 0xffffffffa0516a80 <gsch_open_hook_fn> 0xffffffffa05158a0 <gsch_close_hook_fn> 0xffffffffa0514dd0 <gsch_pwrite64_hook_fn> 0xffffffffa0514ec0 <gsch_writev_hook_fn> 0xffffffffa0515a80 <gsch_dup2_hook_fn> 0xffffffffa05149a0 <gsch_exit_hook_fn> 0xffffffffa0514fa0 <gsch_unlink_hook_fn> 0xffffffffa0514940 <gsch_getpgid_hook_fn> 0xffffffffa0514a20 <gsch_exit_group_hook_fn>
但是这样明显侵入性太强了,毕竟这年头通过修改sys_call来完成功能的内核模块太多了,这样的话存在加载顺序的bug之类的。
那假设是我们使用redirfs来完成filter这种辅助功能,应该怎么修改呢?把判断去掉么,因为判断 mqueue_file_operations 这个指针的地方还蛮多的
几乎所有的mq开头的系统调用都需要篡改,似乎也没有好的办法,回到问题的原点,趋势干嘛不对msgqueue这种文件类型过滤掉呢?