zoukankan      html  css  js  c++  java
  • PROC文件系统

    1. seq_file

    参考: http://blog.chinaunix.net/uid-26084833-id-1754437.html

    seq_file的结构体定义:

       1: struct seq_file {
       2:     char *buf;
       3:     size_t size;
       4:     size_t from;
       5:     size_t count;
       6:     loff_t index;
       7:     loff_t read_pos;
       8:     u64 version;
       9:     struct mutex lock;
      10:     const struct seq_operations *op;
      11:     void *private;
      12: };

    seq_operations的定义:

       1: struct seq_operations {
       2:     void * (*start) (struct seq_file *m, loff_t *pos);
       3:     void (*stop) (struct seq_file *m, void *v);
       4:     void * (*next) (struct seq_file *m, void *v, loff_t *pos);
       5:     int (*show) (struct seq_file *m, void *v);
       6: };

    start函数

    用于指定seq_file文件的读开始位置,返回实际读开始位置,如果指定的位置超过文件末尾,应当返回NULL,start函数可以有一个特殊的返回SEQ_START_TOKEN,它用于让show函数输出文件头,但这只能在pos为0时使用;

    next函数

    用于把seq_file 文件的当前读位置移动到下一个读位置,返回实际的下一个读位置,如果已经到达文件末尾,返回NULL;

    stop函数

    用于在读完seq_file文件后调 用,它类似于文件操作close,用于做一些必要的清理,如释放内存等;

    show函数

    用于格式化输出,如果成功返回0,否则返回出错码。

    我们查看一下用来打印/proc/mounts信息对应的seq_file操作函数:

       1: const struct seq_operations mounts_op = {
       2:     .start    = m_start,
       3:     .next    = m_next,
       4:     .stop    = m_stop,
       5:     .show    = show_vfsmnt
       6: };

    依次来看各个函数的实现:

       1: static void *m_start(struct seq_file *m, loff_t *pos)
       2: {
       3:     struct proc_mounts *p = m->private;
       4:  
       5:     down_read(&namespace_sem);
       6:     return seq_list_start(&p->ns->list, *pos);
       7: }
       8:  

    down_read(&namespace_sem);

    用来将可用的信号量降低一个数值,表示占用一个信号量,用来读取namespace相关的信息。

       1: static struct list_head *mount_hashtable __read_mostly;
       2: static struct kmem_cache *mnt_cache __read_mostly;
       3: static struct rw_semaphore namespace_sem;

    namespace_sem用来保护对mount_hashtable的并发读写。

    struct proc_mounts *p = m->private;

    这里可以知道,给mountinfo使用的seq_file的成员private用来保存proc_mounts结构体指针。

       1: struct proc_mounts {
       2:     struct seq_file m; /* must be the first element */
       3:     struct mnt_namespace *ns;
       4:     struct path root;
       5:     int event;
       6: };

    顾名思义,proc_mounts保存的是我们想要的/proc/mounts信息的数据结构。

    那么,proc_mounts结构体中的数据是从哪里得到的呢

       1: static int mounts_open_common(struct inode *inode, struct file *file,
       2:                   const struct seq_operations *op)
       3: {
       4:     struct task_struct *task = get_proc_task(inode);
       5:     struct nsproxy *nsp;
       6:     struct mnt_namespace *ns = NULL;
       7:     struct path root;
       8:     struct proc_mounts *p;
       9:     int ret = -EINVAL;
      10:  
      11:     if (task) {
      12:         rcu_read_lock();
      13:         nsp = task_nsproxy(task);
      14:         if (nsp) {
      15:             ns = nsp->mnt_ns;
      16:             if (ns)
      17:                 get_mnt_ns(ns);
      18:         }
      19:         rcu_read_unlock();
      20:         if (ns && get_task_root(task, &root) == 0)
      21:             ret = 0;
      22:         put_task_struct(task);
      23:     }
      24:  
      25:     if (!ns)
      26:         goto err;
      27:     if (ret)
      28:         goto err_put_ns;
      29:  
      30:     ret = -ENOMEM;
      31:     p = kmalloc(sizeof(struct proc_mounts), GFP_KERNEL);
      32:     if (!p)
      33:         goto err_put_path;
      34:  
      35:     file->private_data = &p->m;
      36:     ret = seq_open(file, op);
      37:     if (ret)
      38:         goto err_free;
      39:  
      40:     p->m.private = p;
      41:     p->ns = ns;
      42:     p->root = root;
      43:     p->event = ns->event;
      44:  
      45:     return 0;
      46:  
      47:  err_free:
      48:     kfree(p);
      49:  err_put_path:
      50:     path_put(&root);
      51:  err_put_ns:
      52:     put_mnt_ns(ns);
      53:  err:
      54:     return ret;
      55: }

    首先看到这段代码

        p = kmalloc(sizeof(struct proc_mounts), GFP_KERNEL);
        if (!p)
            goto err_put_path;

        file->private_data = &p->m;
        ret = seq_open(file, op);
        if (ret)
            goto err_free;

        p->m.private = p;
        p->ns = ns;
        p->root = root;
        p->event = ns->event;

    可以确定,proc_mounts结构体是在这里创建并且初始化的。其中最重要的数据ns是怎么来的呢?

        if (task) {
            rcu_read_lock();
            nsp = task_nsproxy(task);
            if (nsp) {
                ns = nsp->mnt_ns;
                if (ns)
                    get_mnt_ns(ns);
            }
            rcu_read_unlock();
            if (ns && get_task_root(task, &root) == 0)
                ret = 0;
            put_task_struct(task);
        }

       1: /*
       2:  * A structure to contain pointers to all per-process
       3:  * namespaces - fs (mount), uts, network, sysvipc, etc.
       4:  *
       5:  * 'count' is the number of tasks holding a reference.
       6:  * The count for each namespace, then, will be the number
       7:  * of nsproxies pointing to it, not the number of tasks.
       8:  *
       9:  * The nsproxy is shared by tasks which share all namespaces.
      10:  * As soon as a single namespace is cloned or unshared, the
      11:  * nsproxy is copied.
      12:  */
      13: struct nsproxy {
      14:     atomic_t count;
      15:     struct uts_namespace *uts_ns;
      16:     struct ipc_namespace *ipc_ns;
      17:     struct mnt_namespace *mnt_ns;
      18:     struct pid_namespace *pid_ns;
      19:     struct net          *net_ns;
      20: };
    这段代码就是通过当前任务的nsproxy结构体得到mnt_ns数据。

    那么是哪里调到了mounts_open_common函数呢?

       1: static int mounts_open(struct inode *inode, struct file *file)
       2: {
       3:     return mounts_open_common(inode, file, &mounts_op);
       4: }
       5:  
       6: static const struct file_operations proc_mounts_operations = {
       7:     .open        = mounts_open,
       8:     .read        = seq_read,
       9:     .llseek        = seq_lseek,
      10:     .release    = mounts_release,
      11:     .poll        = mounts_poll,
      12: };

    我们又在fs/proc/base.c中有了大发现:

       1: static const struct pid_entry tgid_base_stuff[] = {
       2:     DIR("task",       S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations),
       3:     DIR("fd",         S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
       4:     DIR("fdinfo",     S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
       5:     DIR("ns",      S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
       6: #ifdef CONFIG_NET
       7:     DIR("net",        S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations),
       8: #endif
       9:     REG("environ",    S_IRUSR, proc_environ_operations),
      10:     INF("auxv",       S_IRUSR, proc_pid_auxv),
      11:     ONE("status",     S_IRUGO, proc_pid_status),
      12:     ONE("personality", S_IRUGO, proc_pid_personality),
      13:     INF("limits",      S_IRUGO, proc_pid_limits),
      14: #ifdef CONFIG_SCHED_DEBUG
      15:     REG("sched",      S_IRUGO|S_IWUSR, proc_pid_sched_operations),
      16: #endif
      17: #ifdef CONFIG_SCHED_AUTOGROUP
      18:     REG("autogroup",  S_IRUGO|S_IWUSR, proc_pid_sched_autogroup_operations),
      19: #endif
      20:     REG("comm",      S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
      21: #ifdef CONFIG_HAVE_ARCH_TRACEHOOK
      22:     INF("syscall",    S_IRUGO, proc_pid_syscall),
      23: #endif
      24:     INF("cmdline",    S_IRUGO, proc_pid_cmdline),
      25:     ONE("stat",       S_IRUGO, proc_tgid_stat),
      26:     ONE("statm",      S_IRUGO, proc_pid_statm),
      27:     REG("maps",       S_IRUGO, proc_maps_operations),
      28: #ifdef CONFIG_NUMA
      29:     REG("numa_maps",  S_IRUGO, proc_numa_maps_operations),
      30: #endif
      31:     REG("mem",        S_IRUSR|S_IWUSR, proc_mem_operations),
      32:     LNK("cwd",        proc_cwd_link),
      33:     LNK("root",       proc_root_link),
      34:     LNK("exe",        proc_exe_link),
      35:     REG("mounts",     S_IRUGO, proc_mounts_operations),
      36:     REG("mountinfo",  S_IRUGO, proc_mountinfo_operations),
      37:     REG("mountstats", S_IRUSR, proc_mountstats_operations),
      38: #ifdef CONFIG_PROC_PAGE_MONITOR
      39:     REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
      40:     REG("smaps",      S_IRUGO, proc_smaps_operations),
      41:     REG("pagemap",    S_IRUGO, proc_pagemap_operations),
      42: #endif
      43: #ifdef CONFIG_SECURITY
      44:     DIR("attr",       S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
      45: #endif
      46: #ifdef CONFIG_KALLSYMS
      47:     INF("wchan",      S_IRUGO, proc_pid_wchan),
      48: #endif
      49: #ifdef CONFIG_STACKTRACE
      50:     ONE("stack",      S_IRUGO, proc_pid_stack),
      51: #endif
      52: #ifdef CONFIG_SCHEDSTATS
      53:     INF("schedstat",  S_IRUGO, proc_pid_schedstat),
      54: #endif
      55: #ifdef CONFIG_LATENCYTOP
      56:     REG("latency",  S_IRUGO, proc_lstats_operations),
      57: #endif
      58: #ifdef CONFIG_PROC_PID_CPUSET
      59:     REG("cpuset",     S_IRUGO, proc_cpuset_operations),
      60: #endif
      61: #ifdef CONFIG_CGROUPS
      62:     REG("cgroup",  S_IRUGO, proc_cgroup_operations),
      63: #endif
      64:     INF("oom_score",  S_IRUGO, proc_oom_score),
      65:     REG("oom_adj",    S_IRUGO|S_IWUSR, proc_oom_adjust_operations),
      66:     REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
      67: #ifdef CONFIG_AUDITSYSCALL
      68:     REG("loginuid",   S_IWUSR|S_IRUGO, proc_loginuid_operations),
      69:     REG("sessionid",  S_IRUGO, proc_sessionid_operations),
      70: #endif
      71: #ifdef CONFIG_FAULT_INJECTION
      72:     REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations),
      73: #endif
      74: #ifdef CONFIG_ELF_CORE
      75:     REG("coredump_filter", S_IRUGO|S_IWUSR, proc_coredump_filter_operations),
      76: #endif
      77: #ifdef CONFIG_TASK_IO_ACCOUNTING
      78:     INF("io",    S_IRUSR, proc_tgid_io_accounting),
      79: #endif
      80: #ifdef CONFIG_HARDWALL
      81:     INF("hardwall",   S_IRUGO, proc_pid_hardwall),
      82: #endif
      83: };

    这里定义着在每个/proc/[pid]下面的所有目录项

    那么/proc/mounts呢,我们查看一下/proc/mounts的信息:

       1: #ls -l /proc
       2: ......
       3: lrwxrwxrwx  1 root       root               11 2014-01-26 22:11 mounts -> self/mounts
       4: ......
       5: lrwxrwxrwx  1 root       root               64 2014-01-23 01:22 self -> 10590
       6: ......

    因此,一切都明了了,/proc/mounts其实是到当前任务的/proc/self/mounts的软链接。

    proc_mounts的数据源头,以及生成数据的调用层次问题已经找到了答案,接下来再回过头来看看seq_file。

    return seq_list_start(&p->ns->list, *pos);

       1: struct list_head *seq_list_start(struct list_head *head, loff_t pos)
       2: {
       3:     struct list_head *lh;
       4:  
       5:     list_for_each(lh, head)
       6:         if (pos-- == 0)
       7:             return lh;
       8:  
       9:     return NULL;
      10: }
      11: EXPORT_SYMBOL(seq_list_start);

    其实很简单,就是返回到双链表head的第pos项的位置指针。如果pos超出了head双链表中的项目数目,就返回NULL。

    可见,这是为了读取seq_file中的内容做准备。

    对于m_next和m_stop的逻辑也很简单,不再详述。

       1: static void *m_next(struct seq_file *m, void *v, loff_t *pos)
       2: {
       3:     struct proc_mounts *p = m->private;
       4:  
       5:     return seq_list_next(v, &p->ns->list, pos);
       6: }
       7:  
       8: static void m_stop(struct seq_file *m, void *v)
       9: {
      10:     up_read(&namespace_sem);
      11: }
       1: struct list_head *seq_list_next(void *v, struct list_head *head, loff_t *ppos)
       2: {
       3:     struct list_head *lh;
       4:  
       5:     lh = ((struct list_head *)v)->next;
       6:     ++*ppos;
       7:     return lh == head ? NULL : lh;
       8: }
       9: EXPORT_SYMBOL(seq_list_next);

    总结一下,就是m_start/m_next向外界暴露proc_mounts->ns->list的位置指针,允许外界对其内容进行读取。

    m_stop用来当读取结束后做清理工作,这里是恢复namespace_sem信号量。

    显示函数

       1: static int show_vfsmnt(struct seq_file *m, void *v)
       2: {
       3:     struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list);
       4:     int err = 0;
       5:     struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
       6:  
       7:     if (mnt->mnt_sb->s_op->show_devname) {
       8:         err = mnt->mnt_sb->s_op->show_devname(m, mnt);
       9:         if (err)
      10:             goto out;
      11:     } else {
      12:         mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
      13:     }
      14:     seq_putc(m, ' ');
      15:     seq_path(m, &mnt_path, " 	
    \");
      16:     seq_putc(m, ' ');
      17:     show_type(m, mnt->mnt_sb);
      18:     seq_puts(m, __mnt_is_readonly(mnt) ? " ro" : " rw");
      19:     err = show_sb_opts(m, mnt->mnt_sb);
      20:     if (err)
      21:         goto out;
      22:     show_mnt_opts(m, mnt);
      23:     if (mnt->mnt_sb->s_op->show_options)
      24:         err = mnt->mnt_sb->s_op->show_options(m, mnt);
      25:     seq_puts(m, " 0 0
    ");
      26: out:
      27:     return err;
      28: }

    从show函数来看,是将v指针指向的vfsmount结构体的mnt_list内容以一定的格式写到seq_file的buffer里面去。

    这里有理由猜想v实际上保存的是

    m->p->ns->root

    接下来我们看一下,这些简单的功能(m_start/m_next/m_stop/show_vfsmnt)是怎样发挥作用的:

       1: ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
       2: {
       3:     struct seq_file *m = file->private_data;
       4:     size_t copied = 0;
       5:     loff_t pos;
       6:     size_t n;
       7:     void *p;
       8:     int err = 0;
       9:  
      10:     mutex_lock(&m->lock);
      11:  
      12:     /* Don't assume *ppos is where we left it */
      13:     if (unlikely(*ppos != m->read_pos)) {
      14:         m->read_pos = *ppos;
      15:         while ((err = traverse(m, *ppos)) == -EAGAIN)
      16:             ;
      17:         if (err) {
      18:             /* With prejudice... */
      19:             m->read_pos = 0;
      20:             m->version = 0;
      21:             m->index = 0;
      22:             m->count = 0;
      23:             goto Done;
      24:         }
      25:     }
      26:  
      27:     /*
      28:      * seq_file->op->..m_start/m_stop/m_next may do special actions
      29:      * or optimisations based on the file->f_version, so we want to
      30:      * pass the file->f_version to those methods.
      31:      *
      32:      * seq_file->version is just copy of f_version, and seq_file
      33:      * methods can treat it simply as file version.
      34:      * It is copied in first and copied out after all operations.
      35:      * It is convenient to have it as  part of structure to avoid the
      36:      * need of passing another argument to all the seq_file methods.
      37:      */
      38:     m->version = file->f_version;
      39:     /* grab buffer if we didn't have one */
      40:     if (!m->buf) {
      41:         m->buf = kmalloc(m->size = PAGE_SIZE, GFP_KERNEL);
      42:         if (!m->buf)
      43:             goto Enomem;
      44:     }
      45:     /* if not empty - flush it first */
      46:     if (m->count) {
      47:         n = min(m->count, size);
      48:         err = copy_to_user(buf, m->buf + m->from, n);
      49:         if (err)
      50:             goto Efault;
      51:         m->count -= n;
      52:         m->from += n;
      53:         size -= n;
      54:         buf += n;
      55:         copied += n;
      56:         if (!m->count)
      57:             m->index++;
      58:         if (!size)
      59:             goto Done;
      60:     }
      61:     /* we need at least one record in buffer */
      62:     pos = m->index;
      63:     p = m->op->start(m, &pos);
      64:     while (1) {
      65:         err = PTR_ERR(p);
      66:         if (!p || IS_ERR(p))
      67:             break;
      68:         err = m->op->show(m, p);
      69:         if (err < 0)
      70:             break;
      71:         if (unlikely(err))
      72:             m->count = 0;
      73:         if (unlikely(!m->count)) {
      74:             p = m->op->next(m, p, &pos);
      75:             m->index = pos;
      76:             continue;
      77:         }
      78:         if (m->count < m->size)
      79:             goto Fill;
      80:         m->op->stop(m, p);
      81:         kfree(m->buf);
      82:         m->buf = kmalloc(m->size <<= 1, GFP_KERNEL);
      83:         if (!m->buf)
      84:             goto Enomem;
      85:         m->count = 0;
      86:         m->version = 0;
      87:         pos = m->index;
      88:         p = m->op->start(m, &pos);
      89:     }
      90:     m->op->stop(m, p);
      91:     m->count = 0;
      92:     goto Done;
      93: Fill:
      94:     /* they want more? let's try to get some more */
      95:     while (m->count < size) {
      96:         size_t offs = m->count;
      97:         loff_t next = pos;
      98:         p = m->op->next(m, p, &next);
      99:         if (!p || IS_ERR(p)) {
     100:             err = PTR_ERR(p);
     101:             break;
     102:         }
     103:         err = m->op->show(m, p);
     104:         if (m->count == m->size || err) {
     105:             m->count = offs;
     106:             if (likely(err <= 0))
     107:                 break;
     108:         }
     109:         pos = next;
     110:     }
     111:     m->op->stop(m, p);
     112:     n = min(m->count, size);
     113:     err = copy_to_user(buf, m->buf, n);
     114:     if (err)
     115:         goto Efault;
     116:     copied += n;
     117:     m->count -= n;
     118:     if (m->count)
     119:         m->from = n;
     120:     else
     121:         pos++;
     122:     m->index = pos;
     123: Done:
     124:     if (!copied)
     125:         copied = err;
     126:     else {
     127:         *ppos += copied;
     128:         m->read_pos += copied;
     129:     }
     130:     file->f_version = m->version;
     131:     mutex_unlock(&m->lock);
     132:     return copied;
     133: Enomem:
     134:     err = -ENOMEM;
     135:     goto Done;
     136: Efault:
     137:     err = -EFAULT;
     138:     goto Done;
     139: }

    seq_read,显然是用来读取文件内容的,但是其接口并不是seq_file,而是file,这就表明这个接口是把seq_file的实现细节隐藏在了该函数的内容,而对于外面来说,可以通过常用的struct file接口来调用该函数。

    因此该函数起到了Adapter的作用。

    下面这段是核心代码

        pos = m->index;
        p = m->op->start(m, &pos);
        while (1) {
            err = PTR_ERR(p);
            if (!p || IS_ERR(p))
                break;
            err = m->op->show(m, p);
            if (err < 0)
                break;
            if (unlikely(err))
                m->count = 0;
            if (unlikely(!m->count)) {
                p = m->op->next(m, p, &pos);
                m->index = pos;
                continue;
            }
            if (m->count < m->size)
                goto Fill;
            m->op->stop(m, p);
            kfree(m->buf);
            m->buf = kmalloc(m->size <<= 1, GFP_KERNEL);
            if (!m->buf)
                goto Enomem;
            m->count = 0;
            m->version = 0;
            pos = m->index;
            p = m->op->start(m, &pos);
        }
        m->op->stop(m, p);
        m->count = 0;
        goto Done;

    如果err代表出错,则使用m_next读取下一条,因此控制逻辑在show中,如果没有读完,就返回出错的信息。

    err < 0, 代表成功,跳出循环;

    err > 0, 代表没有读完全,即调用m_next来读下一条;

    err = 0, 代表失败,将buffer大小调整为原平的2倍,再尝试重新读。

    其中,m->count代表已经读到m->buffer中的字节数目,m->size代表一共需要读取多少字节。

  • 相关阅读:
    003 01 Android 零基础入门 01 Java基础语法 01 Java初识 03 Java程序的执行流程
    002 01 Android 零基础入门 01 Java基础语法 01 Java初识 02 Java简介
    001 01 Android 零基础入门 01 Java基础语法 01 Java初识 01 导学
    001 Android Studio 首次编译执行项目过程中遇到的几个常见问题
    Dora.Interception,为.NET Core度身打造的AOP框架 [2]:以约定的方式定义拦截器
    Dora.Interception,为.NET Core度身打造的AOP框架 [1]:更加简练的编程体验
    监视EntityFramework中的sql流转你需要知道的三种方式Log,SqlServerProfile, EFProfile
    轻量级ORM框架——第二篇:Dapper中的一些复杂操作和inner join应该注意的坑
    轻量级ORM框架——第一篇:Dapper快速学习
    CF888G Xor-MST(异或生成树模板)
  • 原文地址:https://www.cnblogs.com/long123king/p/3534989.html
Copyright © 2011-2022 走看看