zoukankan      html  css  js  c++  java
  • 用crash来分析一下proc的文件访问

    一般来说,用户通过fd的传入,调用open系统调用,来获取fd,然后read的时候,通过这个fd来查找对应的file*

    SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, int, mode)
    {
        long ret;
    
        if (force_o_largefile())
            flags |= O_LARGEFILE;
    
        ret = do_sys_open(AT_FDCWD, filename, flags, mode);
        /* avoid REGPARM breakage on x86: */
        asmlinkage_protect(3, ret, filename, flags, mode);
        return ret;
    }
    
    SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
    {
        struct file *file;
        ssize_t ret = -EBADF;
        int fput_needed;
    
        file = fget_light(fd, &fput_needed);
        if (file) {
            loff_t pos = file_pos_read(file);
            ret = vfs_read(file, buf, count, &pos);
            file_pos_write(file, pos);
            fput_light(file, fput_needed);
        }
    
        return ret;
    }

    所以file*是fget_light的返回值,下面通过一个crash来分析对应的file指针

    file指针是fget_light的返回值,默认在rax中,调用完之后,rax赋值给了rbx,且rbx在调用vfs_read之前未更改,所以rbx里面存放的file指针,调用vfs_read之后,rvx压栈在rbp-0x18的位置
    
    crash> dis -l vfs_read
    /usr/src/debug/kernel-2.6.32-220.el6/linux-2.6.32-220.el6.x86_64/fs/read_write.c: 278
    0xffffffff8117e9c0 <vfs_read>:  push   %rbp
    0xffffffff8117e9c1 <vfs_read+1>:        mov    %rsp,%rbp
    0xffffffff8117e9c4 <vfs_read+4>:        sub    $0x30,%rsp
    0xffffffff8117e9c8 <vfs_read+8>:        mov    %rbx,-0x18(%rbp)
    crash> px 0xffff8817fc9e1f28-0x18
    $2 = 0xffff8817fc9e1f10
    crash>
    crash> struct file ffff880c9ddbba80
    struct file {
      f_u = {
        fu_list = {
          next = 0xffff880c860d8500,
          prev = 0xffff880c1188cce8
        },
        fu_rcuhead = {
          next = 0xffff880c860d8500,
          func = 0xffff880c1188cce8
        }
      },
      f_path = {
        mnt = 0xffff88180ec5eec0,
        dentry = 0xffff8818014b1540
      },
      f_op = 0xffffffff8161f980 <proc_sops+128>,---------------------这个f_op是我们要继续分析的:
      f_lock = {
        raw_lock = {
          slock = 0
        }
      },
      f_count = {
        counter = 1
      },
      f_flags = 32768,
      f_mode = 13,
      f_pos = 20480,
      f_owner = {
        lock = {
          raw_lock = {
            lock = 16777216
          }
        },
        pid = 0x0,
        pid_type = PIDTYPE_PID,
        uid = 0,
        euid = 0,
        signum = 0
      },
      f_cred = 0xffff880c0ecd8b00,
      f_ra = {
        start = 0,
        size = 0,
        async_size = 0,
        ra_pages = 32,
        mmap_miss = 0,
        prev_pos = -1
      },
      f_version = 0,
      f_security = 0x0,
      private_data = 0xffff8817f3eee740,
      f_ep_links = {
        next = 0xffff880c9ddbbb28,
        prev = 0xffff880c9ddbbb28
      },
      f_mapping = 0xffff88100a811918
    }

    我们继续取对应的f_op成员分析,这个是不同的文件系统有不同的函数,proc文件系统就是

    crash> struct file_operations 0xffffffff8161f980
    struct file_operations {
      owner = 0x0,
      llseek = 0xffffffff811e3600 <proc_reg_llseek>,
      read = 0xffffffff811e3540 <proc_reg_read>,-----------------这个read要继续跟进
      write = 0xffffffff811e3480 <proc_reg_write>,
      aio_read = 0x0,
      aio_write = 0x0,
      readdir = 0x0,
      poll = 0xffffffff811e33d0 <proc_reg_poll>,
      ioctl = 0x0,
      unlocked_ioctl = 0xffffffff811e36b0 <proc_reg_unlocked_ioctl>,
      compat_ioctl = 0x0,
      mmap = 0xffffffff811e3320 <proc_reg_mmap>,
      open = 0xffffffff811e3a50 <proc_reg_open>,
      flush = 0x0,
      release = 0xffffffff811e31f0 <proc_reg_release>,
      fsync = 0x0,
      aio_fsync = 0x0,
      fasync = 0x0,
      lock = 0x0,
      sendpage = 0x0,
      get_unmapped_area = 0x0,
      check_flags = 0x0,
      flock = 0x0,
      splice_write = 0x0,
      splice_read = 0x0,
      setlease = 0x0
    }
    ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
    {
        ssize_t ret;
    
        if (!(file->f_mode & FMODE_READ))
            return -EBADF;
        if (!file->f_op || (!file->f_op->read && !file->f_op->aio_read))
            return -EINVAL;
        if (unlikely(!access_ok(VERIFY_WRITE, buf, count)))
            return -EFAULT;
    
        ret = rw_verify_area(READ, file, pos, count);
        if (ret >= 0) {
            count = ret;
            if (file->f_op->read)
                ret = file->f_op->read(file, buf, count, pos);-------proc文件系统,这个为proc_ref_read
            else
    static ssize_t proc_reg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
    {
        struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);-----------根据inode来获取pde

    一个proc文件系统的entry结构是通过PDE宏来获取的。

    PDE的实现如下:

    static inline struct proc_inode *PROC_I(const struct inode *inode)
    {
        return container_of(inode, struct proc_inode, vfs_inode);
    }
    
    static inline struct proc_dir_entry *PDE(const struct inode *inode)
    {
        return PROC_I(inode)->pde;
    }

    所以vfs层的inode结构其实就是嵌入到了proc_inode中;

    crash> struct -xo proc_inode
    struct proc_inode {
        [0x0] struct pid *pid;
        [0x8] int fd;
       [0x10] union proc_op op;
       [0x18] struct proc_dir_entry *pde;
       [0x20] struct ctl_table_header *sysctl;
       [0x28] struct ctl_table *sysctl_entry;
       [0x30] struct inode vfs_inode;-------------------vfs层的inode
    }

    通过vfs_inode获取到proc_inode之后,就会获取proc_dir_entry

    crash> struct -xo proc_dir_entry
    struct proc_dir_entry {
       [0x0] unsigned int low_ino;
       [0x4] unsigned short namelen;
       [0x8] const char *name;
      [0x10] mode_t mode;
      [0x18] nlink_t nlink;
      [0x20] uid_t uid;
      [0x24] gid_t gid;
      [0x28] loff_t size;
      [0x30] const struct inode_operations *proc_iops;
      [0x38] const struct file_operations *proc_fops;
      [0x40] struct proc_dir_entry *next;
      [0x48] struct proc_dir_entry *parent;
      [0x50] struct proc_dir_entry *subdir;
      [0x58] void *data;
      [0x60] read_proc_t *read_proc;
      [0x68] write_proc_t *write_proc;
      [0x70] atomic_t count;
      [0x74] int pde_users;
      [0x78] spinlock_t pde_unload_lock;
      [0x80] struct completion *pde_unload_completion;
      [0x88] struct list_head pde_openers;
    }
    SIZE: 0x98

     所以我们找出inode,就可以找出pde。

    crash> struct file.f_path ffff880c9ddbba80
      f_path = {
        mnt = 0xffff88180ec5eec0,
        dentry = 0xffff8818014b1540
      }

    crash> struct file.f_path.dentry ffff880c9ddbba80 f_path.dentry = 0xffff8818014b1540 crash> struct dentry.d_inode 0xffff8818014b1540 d_inode = 0xffff88100a8117f8

    crash> px 0xffff88100a8117f8-0x30 $11 = 0xffff88100a8117c8 crash> struct proc_inode.pde 0xffff88100a8117c8 pde = 0xffff880c0ef13b00
    crash> struct proc_dir_entry.proc_fops 0xffff880c0ef13b00
      proc_fops = 0xffffffff8161cdc0
    crash> struct file_operations 0xffffffff8161cdc0
    struct file_operations {
      owner = 0x0,
      llseek = 0xffffffff811a0490 <seq_lseek>,
      read = 0xffffffff811a0950 <seq_read>,
      write = 0xffffffff811687c0 <slabinfo_write>,
      aio_read = 0x0,
      aio_write = 0x0,
      readdir = 0x0,
      poll = 0x0,
      ioctl = 0x0,
      unlocked_ioctl = 0x0,
      compat_ioctl = 0x0,
      mmap = 0x0,
      open = 0xffffffff81165640 <slabinfo_open>,----------------最终调用的open
      flush = 0x0,
      release = 0xffffffff8119fde0 <seq_release>,
      fsync = 0x0,
      aio_fsync = 0x0,
      fasync = 0x0,
      lock = 0x0,
      sendpage = 0x0,
      get_unmapped_area = 0x0,
      check_flags = 0x0,
      flock = 0x0,
      splice_write = 0x0,
      splice_read = 0x0,
      setlease = 0x0
    }
    static ssize_t proc_reg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
    {
        struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
        ssize_t rv = -EIO;
        ssize_t (*read)(struct file *, char __user *, size_t, loff_t *);
    
        spin_lock(&pde->pde_unload_lock);
        if (!pde->proc_fops) {
            spin_unlock(&pde->pde_unload_lock);
            return rv;
        }
        pde->pde_users++;
        read = pde->proc_fops->read;--------------------调用的read是seq_read
        spin_unlock(&pde->pde_unload_lock);
    
        if (read)
            rv = read(file, buf, count, ppos);
    
        pde_users_dec(pde);
        return rv;
    }

    所以本案例中,proc文件系统最终调用的read是seq_read,open是:slabinfo_open,当然,这个跟案例相关,因为proc的封装是到proc_reg_read 为止,函数proc_reg_read是vfs read进入proc的入口。看linux源码的时候,不能只见树木,不见森林,要有清晰的层次概念。后面的实现与不同的proc文件相关,不同的层次完成不同的功能是linux的设计哲学,由于seq_read是seq类文件的封装,实现在seq_file.c中,主要功能如下:

    ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
    {
        struct seq_file *m = (struct seq_file *)file->private_data;
    。。。。。
        p = m->op->start(m, &pos);
        while (1) {
            err = PTR_ERR(p);
            if (!p || IS_ERR(p))
                break;
            err = m->op->show(m, p);
            if (err < 0)
                break;
            if (unlikely(err))
                m->count = 0;
            if (unlikely(!m->count)) {
                p = m->op->next(m, p, &pos);
                m->index = pos;
                continue;
            }
            if (m->count < m->size)
                goto Fill;
            m->op->stop(m, p);
            kfree(m->buf);
            m->buf = kmalloc(m->size <<= 1, GFP_KERNEL);
            if (!m->buf)
                goto Enomem;
            m->count = 0;
            m->version = 0;
            pos = m->index;
            p = m->op->start(m, &pos);
        }
        m->op->stop(m, p);
        m->count = 0;
        goto Done;
    Fill:
    ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
    {
        struct seq_file *m = (struct seq_file *)file->private_data;
    。。。。。
        p = m->op->start(m, &pos);------------标准的start
        while (1) {
            err = PTR_ERR(p);
            if (!p || IS_ERR(p))
                break;
            err = m->op->show(m, p);
            if (err < 0)
                break;
            if (unlikely(err))
                m->count = 0;
            if (unlikely(!m->count)) {
                p = m->op->next(m, p, &pos);----next动作
                m->index = pos;
                continue;
            }
            if (m->count < m->size)
                goto Fill;
            m->op->stop(m, p);----stop动作
            kfree(m->buf);
            m->buf = kmalloc(m->size <<= 1, GFP_KERNEL);
            if (!m->buf)
                goto Enomem;
            m->count = 0;
            m->version = 0;
            pos = m->index;
            p = m->op->start(m, &pos);-----循环start
        }
        m->op->stop(m, p);
        m->count = 0;
        goto Done;
    Fill:
    crash> struct file.private_data ffff880c9ddbba80
      private_data = 0xffff8817f3eee740
    crash> struct seq_file 0xffff8817f3eee740
    struct seq_file {
      buf = 0xffff8817f4448000 "size-65536(DMA)        0      0  65536    1   16 : tunables    8    4    0 : slabdata      0      0      0
    size-65536            19     19  65536    1   16 : tunables    8    4    0 : slabdata     19     19      0
    size-32768(DMA)        0      0  32768    "...,
      size = 4096,
      from = 963,
      count = 0,
      index = 189,
      read_pos = 20480,
      version = 0,
      lock = {
        count = {
          counter = 0
        },
        wait_lock = {
          raw_lock = {
            slock = 0
          }
        },
        wait_list = {
          next = 0xffff8817f3eee780,
          prev = 0xffff8817f3eee780
        },
        owner = 0xffff8817fc9e0000
      },
      op = 0xffffffff8161cea0 <proc_slabinfo_operations+192>,
      private = 0x0
    }
    crash> struct seq_file
    struct seq_file {
        char *buf;
        size_t size;
        size_t from;
        size_t count;
        loff_t index;
        loff_t read_pos;
        u64 version;
        struct mutex lock;
        const struct seq_operations *op;
        void *private;
    }
    SIZE: 104
    crash> struct seq_operations 0xffffffff8161cea0
    struct seq_operations {
      start = 0xffffffff81165680 <s_start>,
      stop = 0xffffffff811652f0 <s_stop>,
      next = 0xffffffff81165660 <s_next>,
      show = 0xffffffff81166420 <s_show>
    }

    有seq_file,一般就会设计seq_operations,上面就是分析对应seq_operations中的函数的情况。由于后面已经不是proc文件系统层的范畴,本文结束。

    水平有限,如果有错误,请帮忙提醒我。如果您觉得本文对您有帮助,可以点击下面的 推荐 支持一下我。版权所有,需要转发请带上本文源地址,博客一直在更新,欢迎 关注 。
  • 相关阅读:
    mysql 5.6
    mysql5.7 二进制包安装
    centos 6 编译安装httpd-2.4
    mysql 5.5源码包安装
    BZOJ4945 & 洛谷3825 & UOJ317:[NOI2017]游戏——题解
    BZOJ4943 & 洛谷3823 & UOJ315:[NOI2017]蚯蚓排队——题解
    BZOJ3435 & 洛谷3920 & UOJ55:[WC2014]紫荆花之恋
    BZOJ5343 & 洛谷4602 & LOJ2555:[CTSC2018]混合果汁——题解
    真·APIO2018滚粗记
    BZOJ4518:[SDOI2016]征途——题解
  • 原文地址:https://www.cnblogs.com/10087622blog/p/9516104.html
Copyright © 2011-2022 走看看