Linux pipe 源代码分析
管道pipe作为Unix中历史最悠久的IPC机制,存在各个版本号的Unix中,主要用于父子进程之间的通信(使用fork,从而子进程会获得父进程的打开文件表)。pipe()系统调用底层的实现就相当于一个特殊的文件系统,每次调用的时候创建一个inode关联着两个file。一个用于读,一个用于写。从而实现数据的单向流动。
用户层API:
#include <unistd.h> int pipe(int pipefd[2]); #define _GNU_SOURCE /* See feature_test_macros(7) */ #include <unistd.h> int pipe2(int pipefd[2], int flags);
内核源代码路径例如以下:
// sys_pipe(.......) SYSCALL_DEFINE1(pipe, int __user *, fildes) { return sys_pipe2(fildes, 0); } SYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags) { struct file *files[2]; int fd[2]; int error; // 核心是do_pipe error = __do_pipe_flags(fd, files, flags); if (!error) { // 一切准备就绪后 把刚才和管道关联的2个fd复制到用户空间 if (unlikely(copy_to_user(fildes, fd, sizeof(fd)))) { fput(files[0]); fput(files[1]); put_unused_fd(fd[0]); put_unused_fd(fd[1]); error = -EFAULT; } else { // 把fd和file的映射关系更新到该进程的文件描写叙述表中fdtable fd_install(fd[0], files[0]); fd_install(fd[1], files[1]); } } return error; } static int __do_pipe_flags(int *fd, struct file **files, int flags) { int error; int fdw, fdr; if (flags & ~(O_CLOEXEC | O_NONBLOCK | O_DIRECT)) return -EINVAL; // 为该管道创建俩struct file error = create_pipe_files(files, flags); if (error) return error; // 获得两个能用的文件描写叙述符 error = get_unused_fd_flags(flags); if (error < 0) goto err_read_pipe; fdr = error; error = get_unused_fd_flags(flags); if (error < 0) goto err_fdr; fdw = error; audit_fd_pair(fdr, fdw); fd[0] = fdr; fd[1] = fdw; return 0; err_fdr: put_unused_fd(fdr); err_read_pipe: fput(files[0]); fput(files[1]); return error; } /* * 为管道创建两个file实例 */ int create_pipe_files(struct file **res, int flags) { int err; // 为pipe创建一个inode并做一定的初始化 struct inode *inode = get_pipe_inode(); struct file *f; struct path path; static struct qstr name = { .name = "" }; // quick string ?? if (!inode) return -ENFILE; err = -ENOMEM; // 分配一个directory entry path.dentry = d_alloc_pseudo(pipe_mnt->mnt_sb, &name); if (!path.dentry) goto err_inode; path.mnt = mntget(pipe_mnt); // 引用计数加1 d_instantiate(path.dentry, inode); err = -ENFILE; f = alloc_file(&path, FMODE_WRITE, &pipefifo_fops); if (IS_ERR(f)) goto err_dentry; f->f_flags = O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT)); f->private_data = inode->i_pipe; // 所以你会明确 fd[0]是读 fd[1]是写 res[0] = alloc_file(&path, FMODE_READ, &pipefifo_fops); if (IS_ERR(res[0])) goto err_file; path_get(&path); res[0]->private_data = inode->i_pipe; res[0]->f_flags = O_RDONLY | (flags & O_NONBLOCK); res[1] = f; return 0; err_file: put_filp(f); err_dentry: free_pipe_info(inode->i_pipe); path_put(&path); return err; err_inode: free_pipe_info(inode->i_pipe); iput(inode); return err; } static struct inode * get_pipe_inode(void) { struct inode *inode = new_inode_pseudo(pipe_mnt->mnt_sb); struct pipe_inode_info *pipe; if (!inode) goto fail_inode; // 分配一个inode号 inode->i_ino = get_next_ino(); // 分配一个pipe的内核级对象 pipe = alloc_pipe_info(); if (!pipe) goto fail_iput; inode->i_pipe = pipe; pipe->files = 2; pipe->readers = pipe->writers = 1; inode->i_fop = &pipefifo_fops; /* * Mark the inode dirty from the very beginning, * that way it will never be moved to the dirty * list because "mark_inode_dirty()" will think * that it already _is_ on the dirty list. */ inode->i_state = I_DIRTY; inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR; inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; return inode; fail_iput: iput(inode); fail_inode: return NULL; } // 针对pipe的文件操作实例 const struct file_operations pipefifo_fops = { .open = fifo_open, .llseek = no_llseek, .read = new_sync_read, .read_iter = pipe_read, .write = new_sync_write, .write_iter = pipe_write, .poll = pipe_poll, .unlocked_ioctl = pipe_ioctl, .release = pipe_release, .fasync = pipe_fasync, };
总体的逻辑图能够这样:
TODO:详细读写的实现细节new_sync_read/write()有待分析。
參考:
(1)Linux kernel 3.18 source code
(2)Linux man page
(3)Linux内核源代码情景分析