zoukankan      html  css  js  c++  java
  • inode缓存与dentry缓存

    1. inode缓存

       1: struct inode {
       2:     /* RCU path lookup touches following: */
       3:     umode_t            i_mode;
       4:     uid_t            i_uid;
       5:     gid_t            i_gid;
       6:     const struct inode_operations    *i_op;
       7:     struct super_block    *i_sb;
       8:  
       9:     spinlock_t        i_lock;    /* i_blocks, i_bytes, maybe i_size */
      10:     unsigned int        i_flags;
      11:     unsigned long        i_state;
      12: #ifdef CONFIG_SECURITY
      13:     void            *i_security;
      14: #endif
      15:     struct mutex        i_mutex;
      16:  
      17:  
      18:     unsigned long        dirtied_when;    /* jiffies of first dirtying */
      19:  
      20:     struct hlist_node    i_hash;
      21:     struct list_head    i_wb_list;    /* backing dev IO list */
      22:     struct list_head    i_lru;        /* inode LRU list */
      23:     struct list_head    i_sb_list;
      24:     union {
      25:         struct list_head    i_dentry;
      26:         struct rcu_head        i_rcu;
      27:     };
      28:     unsigned long        i_ino;
      29:     atomic_t        i_count;
      30:     unsigned int        i_nlink;
      31:     dev_t            i_rdev;
      32:     unsigned int        i_blkbits;
      33:     u64            i_version;
      34:     loff_t            i_size;
      35: #ifdef __NEED_I_SIZE_ORDERED
      36:     seqcount_t        i_size_seqcount;
      37: #endif
      38:     struct timespec        i_atime;
      39:     struct timespec        i_mtime;
      40:     struct timespec        i_ctime;
      41:     blkcnt_t        i_blocks;
      42:     unsigned short          i_bytes;
      43:     struct rw_semaphore    i_alloc_sem;
      44:     const struct file_operations    *i_fop;    /* former ->i_op->default_file_ops */
      45:     struct file_lock    *i_flock;
      46:     struct address_space    *i_mapping;
      47:     struct address_space    i_data;
      48: #ifdef CONFIG_QUOTA
      49:     struct dquot        *i_dquot[MAXQUOTAS];
      50: #endif
      51:     struct list_head    i_devices;
      52:     union {
      53:         struct pipe_inode_info    *i_pipe;
      54:         struct block_device    *i_bdev;
      55:         struct cdev        *i_cdev;
      56:     };
      57:  
      58:     __u32            i_generation;
      59:  
      60: #ifdef CONFIG_FSNOTIFY
      61:     __u32            i_fsnotify_mask; /* all events this inode cares about */
      62:     struct hlist_head    i_fsnotify_marks;
      63: #endif
      64:  
      65: #ifdef CONFIG_IMA
      66:     atomic_t        i_readcount; /* struct files open RO */
      67: #endif
      68:     atomic_t        i_writecount;
      69: #ifdef CONFIG_FS_POSIX_ACL
      70:     struct posix_acl    *i_acl;
      71:     struct posix_acl    *i_default_acl;
      72: #endif
      73:     void            *i_private; /* fs or device private pointer */
      74: };

    inode可能处于三种状态:

    1)unused,里面没有保存有效的内容,可以被复用为新的用途;

    2)in use,正在被使用,其成员i_count以及i_nlink一定大于0,此时inode与文件系统或者说设备上的文件相关联,但是自从上次与设备同步后,内容没有发生改变,即不是dirty的;

    3)dirty,inode里面的内容已经与文件系统中的文件内容不一致了,即脏了,需要进行文件同步操作。

    前两种状态的inode都各自位于一个全局的链表中,而第三种的inode位于super_block结构体中的一个链表中。

    先看inode结构体中的一个成员:

    struct list_head    i_lru;        /* inode LRU list */

    对应着一个全局的链表:

    static LIST_HEAD(inode_lru);
    static DEFINE_SPINLOCK(inode_lru_lock);

       1: /*
       2:  * Called when we're dropping the last reference
       3:  * to an inode.
       4:  *
       5:  * Call the FS "drop_inode()" function, defaulting to
       6:  * the legacy UNIX filesystem behaviour.  If it tells
       7:  * us to evict inode, do so.  Otherwise, retain inode
       8:  * in cache if fs is alive, sync and evict if fs is
       9:  * shutting down.
      10:  */
      11: static void iput_final(struct inode *inode)
      12: {
      13:     struct super_block *sb = inode->i_sb;
      14:     const struct super_operations *op = inode->i_sb->s_op;
      15:     int drop;
      16:  
      17:     WARN_ON(inode->i_state & I_NEW);
      18:  
      19:     if (op && op->drop_inode)
      20:         drop = op->drop_inode(inode);
      21:     else
      22:         drop = generic_drop_inode(inode);
      23:  
      24:     if (!drop && (sb->s_flags & MS_ACTIVE)) {
      25:         inode->i_state |= I_REFERENCED;
      26:         if (!(inode->i_state & (I_DIRTY|I_SYNC)))
      27:             inode_lru_list_add(inode);
      28:         spin_unlock(&inode->i_lock);
      29:         return;
      30:     }
      31:  
      32:     if (!drop) {
      33:         inode->i_state |= I_WILL_FREE;
      34:         spin_unlock(&inode->i_lock);
      35:         write_inode_now(inode, 1);
      36:         spin_lock(&inode->i_lock);
      37:         WARN_ON(inode->i_state & I_NEW);
      38:         inode->i_state &= ~I_WILL_FREE;
      39:     }
      40:  
      41:     inode->i_state |= I_FREEING;
      42:     inode_lru_list_del(inode);
      43:     spin_unlock(&inode->i_lock);
      44:  
      45:     evict(inode);
      46: }

    函数iput_final是在当inode没有被任何地方引用后,即变成了unused状态后,回收inode的机制。

    if (op && op->drop_inode)
            drop = op->drop_inode(inode);
        else
            drop = generic_drop_inode(inode);

    drop为0时,表示i_nlink为0,并且inode没有保存着inode_hashtable中的拉链表,即这个inode可以被释放掉。

       1: /*
       2:  * Normal UNIX filesystem behaviour: delete the
       3:  * inode when the usage count drops to zero, and
       4:  * i_nlink is zero.
       5:  */
       6: int generic_drop_inode(struct inode *inode)
       7: {
       8:     return !inode->i_nlink || inode_unhashed(inode);
       9: }
      10: EXPORT_SYMBOL_GPL(generic_drop_inode);

    if (!drop && (sb->s_flags & MS_ACTIVE)) {
            inode->i_state |= I_REFERENCED;
            if (!(inode->i_state & (I_DIRTY|I_SYNC)))
               inode_lru_list_add(inode);
            spin_unlock(&inode->i_lock);
            return;
        }

    如果superblock还存在在系统中,就调用inode_lru_list_add将inode添加到unused列表中,即将inode缓存起来。

    否则,就先调用write_inode_now写回到磁盘上,再调用inode_lru_list_del将已经缓存下来的inode删除掉,最后调用evict函数将inode彻底删除。

       1: static void inode_lru_list_add(struct inode *inode)
       2: {
       3:     spin_lock(&inode_lru_lock);
       4:     if (list_empty(&inode->i_lru)) {
       5:         list_add(&inode->i_lru, &inode_lru);
       6:         inodes_stat.nr_unused++;
       7:     }
       8:     spin_unlock(&inode_lru_lock);
       9: }

    因此inode_lru就是全局的unused inode列表,通过“Least Recently Used”的顺序保存。

    此外,操作inode_lru的函数还有prune_icache

       1: /*
       2:  * Scan `goal' inodes on the unused list for freeable ones. They are moved to a
       3:  * temporary list and then are freed outside inode_lru_lock by dispose_list().
       4:  *
       5:  * Any inodes which are pinned purely because of attached pagecache have their
       6:  * pagecache removed.  If the inode has metadata buffers attached to
       7:  * mapping->private_list then try to remove them.
       8:  *
       9:  * If the inode has the I_REFERENCED flag set, then it means that it has been
      10:  * used recently - the flag is set in iput_final(). When we encounter such an
      11:  * inode, clear the flag and move it to the back of the LRU so it gets another
      12:  * pass through the LRU before it gets reclaimed. This is necessary because of
      13:  * the fact we are doing lazy LRU updates to minimise lock contention so the
      14:  * LRU does not have strict ordering. Hence we don't want to reclaim inodes
      15:  * with this flag set because they are the inodes that are out of order.
      16:  */
      17: static void prune_icache(int nr_to_scan)
      18: {
      19:     LIST_HEAD(freeable);
      20:     int nr_scanned;
      21:     unsigned long reap = 0;
      22:  
      23:     down_read(&iprune_sem);
      24:     spin_lock(&inode_lru_lock);
      25:     for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) {
      26:         struct inode *inode;
      27:  
      28:         if (list_empty(&inode_lru))
      29:             break;
      30:  
      31:         inode = list_entry(inode_lru.prev, struct inode, i_lru);
      32:  
      33:         /*
      34:          * we are inverting the inode_lru_lock/inode->i_lock here,
      35:          * so use a trylock. If we fail to get the lock, just move the
      36:          * inode to the back of the list so we don't spin on it.
      37:          */
      38:         if (!spin_trylock(&inode->i_lock)) {
      39:             list_move(&inode->i_lru, &inode_lru);
      40:             continue;
      41:         }
      42:  
      43:         /*
      44:          * Referenced or dirty inodes are still in use. Give them
      45:          * another pass through the LRU as we canot reclaim them now.
      46:          */
      47:         if (atomic_read(&inode->i_count) ||
      48:             (inode->i_state & ~I_REFERENCED)) {
      49:             list_del_init(&inode->i_lru);
      50:             spin_unlock(&inode->i_lock);
      51:             inodes_stat.nr_unused--;
      52:             continue;
      53:         }
      54:  
      55:         /* recently referenced inodes get one more pass */
      56:         if (inode->i_state & I_REFERENCED) {
      57:             inode->i_state &= ~I_REFERENCED;
      58:             list_move(&inode->i_lru, &inode_lru);
      59:             spin_unlock(&inode->i_lock);
      60:             continue;
      61:         }
      62:         if (inode_has_buffers(inode) || inode->i_data.nrpages) {
      63:             __iget(inode);
      64:             spin_unlock(&inode->i_lock);
      65:             spin_unlock(&inode_lru_lock);
      66:             if (remove_inode_buffers(inode))
      67:                 reap += invalidate_mapping_pages(&inode->i_data,
      68:                                 0, -1);
      69:             iput(inode);
      70:             spin_lock(&inode_lru_lock);
      71:  
      72:             if (inode != list_entry(inode_lru.next,
      73:                         struct inode, i_lru))
      74:                 continue;    /* wrong inode or list_empty */
      75:             /* avoid lock inversions with trylock */
      76:             if (!spin_trylock(&inode->i_lock))
      77:                 continue;
      78:             if (!can_unuse(inode)) {
      79:                 spin_unlock(&inode->i_lock);
      80:                 continue;
      81:             }
      82:         }
      83:         WARN_ON(inode->i_state & I_NEW);
      84:         inode->i_state |= I_FREEING;
      85:         spin_unlock(&inode->i_lock);
      86:  
      87:         list_move(&inode->i_lru, &freeable);
      88:         inodes_stat.nr_unused--;
      89:     }
      90:     if (current_is_kswapd())
      91:         __count_vm_events(KSWAPD_INODESTEAL, reap);
      92:     else
      93:         __count_vm_events(PGINODESTEAL, reap);
      94:     spin_unlock(&inode_lru_lock);
      95:  
      96:     dispose_list(&freeable);
      97:     up_read(&iprune_sem);
      98: }

    该函数的作用是在内存压力较大时,通过缩减缓存的inode列表inode_lru以释放出更多的内存。

    该函数就是从inode_lru中从头开始取inode出来,做一些简单检查,如果inode还有一些原因需要继续存在在缓存中,就将该inode移到链表的尾部,然后检查下一个inode。

    使得inode继续保留的原因包括:无法获取到操作inode中数据的锁i_lock;inode中的数据是脏的;inode的使用计数非0;inode刚刚被引用过等等。

    还有一个比较实用的问题,我们看到在调用iput_final时,检查如果i_nlink为0,并且没有被用作拉链表的话,就将其放到缓存inode_lru中,但是在prune_icache时,会检查i_count引用计数是否为0。

    这也就是说,如果一个inode对应的磁盘文件已经被删除了,但是还有进程对其进行操作的话,那么它不会被直接删除,而是会保存在缓存中,也就是说对其操作的进程仍然可以对已经缓存下来的数据页面page进行操作。

    直到没有进程再对其进行操作了,才有可能被清除出缓存。

    inode中有两个链表头元素,分别是i_sb_list和i_wb_list,其中i_sb_list是super_block->s_inodes列表的元素,而i_wb_list是用于维护设备的后备inode列表。

    2. dentry缓存

    dentry缓存的目的,为了减少对慢速磁盘的访问,每当VFS文件系统对底层的数据进行访问时,都会将访问的结果缓存下来,保存成一个dentry对象。

    而且dentry对象的组织与管理,是和inode缓存极其相似的,也有一个hash表,和一个lru队列。

    而且当内存压力较大时,也会调用prune_dcache来企图释放lru中优先级较低的dentry项目。

    区别在于,inode是不需要维护目录的关系的,但是dentry需要,因此dentry的组织比inode要复杂。

       1: static struct hlist_bl_head *dentry_hashtable __read_mostly;
       2:  

    在super_block中

       1: /* s_dentry_lru, s_nr_dentry_unused protected by dcache.c lru locks */
       2:     struct list_head    s_dentry_lru;    /* unused dentry lru */
       3:  

    因此,保存dentry全局hash表的数据结构是全局的,而保存dentry缓存的数据结构是存在于super_block数据结构中。

       1: /*
       2:  * dentry_lru_(add|del|move_tail) must be called with d_lock held.
       3:  */
       4: static void dentry_lru_add(struct dentry *dentry)
       5: {
       6:     if (list_empty(&dentry->d_lru)) {
       7:         spin_lock(&dcache_lru_lock);
       8:         list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
       9:         dentry->d_sb->s_nr_dentry_unused++;
      10:         dentry_stat.nr_unused++;
      11:         spin_unlock(&dcache_lru_lock);
      12:     }
      13: }

    dentry_lur_add函数用于向dentry缓存中添加一个释放的dentry,它被函数dput调用。

       1: /* 
       2:  * This is dput
       3:  *
       4:  * This is complicated by the fact that we do not want to put
       5:  * dentries that are no longer on any hash chain on the unused
       6:  * list: we'd much rather just get rid of them immediately.
       7:  *
       8:  * However, that implies that we have to traverse the dentry
       9:  * tree upwards to the parents which might _also_ now be
      10:  * scheduled for deletion (it may have been only waiting for
      11:  * its last child to go away).
      12:  *
      13:  * This tail recursion is done by hand as we don't want to depend
      14:  * on the compiler to always get this right (gcc generally doesn't).
      15:  * Real recursion would eat up our stack space.
      16:  */
      17:  
      18: /*
      19:  * dput - release a dentry
      20:  * @dentry: dentry to release 
      21:  *
      22:  * Release a dentry. This will drop the usage count and if appropriate
      23:  * call the dentry unlink method as well as removing it from the queues and
      24:  * releasing its resources. If the parent dentries were scheduled for release
      25:  * they too may now get deleted.
      26:  */
      27: void dput(struct dentry *dentry)
      28: {
      29:     if (!dentry)
      30:         return;
      31:  
      32: repeat:
      33:     if (dentry->d_count == 1)
      34:         might_sleep();
      35:     spin_lock(&dentry->d_lock);
      36:     BUG_ON(!dentry->d_count);
      37:     if (dentry->d_count > 1) {
      38:         dentry->d_count--;
      39:         spin_unlock(&dentry->d_lock);
      40:         return;
      41:     }
      42:  
      43:     if (dentry->d_flags & DCACHE_OP_DELETE) {
      44:         if (dentry->d_op->d_delete(dentry))
      45:             goto kill_it;
      46:     }
      47:  
      48:     /* Unreachable? Get rid of it */
      49:      if (d_unhashed(dentry))
      50:         goto kill_it;
      51:  
      52:     /* Otherwise leave it cached and ensure it's on the LRU */
      53:     dentry->d_flags |= DCACHE_REFERENCED;
      54:     dentry_lru_add(dentry);
      55:  
      56:     dentry->d_count--;
      57:     spin_unlock(&dentry->d_lock);
      58:     return;
      59:  
      60: kill_it:
      61:     dentry = dentry_kill(dentry, 1);
      62:     if (dentry)
      63:         goto repeat;
      64: }
      65: EXPORT_SYMBOL(dput);

    所有的dentry实例会形成一个网络,用于反映文件系统的结构。

    d_subdirs成员,里面保存着所有的子目录以及该目录下的文件组成的列表。

    d_child成员,是该dentry链接到其父目录的dentry节点的锚点。

    这两个成员,是构成文件系统的层次结构的基本设施。

    if (dentry->d_count == 1)
            might_sleep();

    参考:http://yuxu9710108.blog.163.com/blog/static/23751534201011715413404/

    用于调试时,提示atomic context的可能睡眠情况。

    分析dput函数的逻辑:

    如果dentry的引用计数大于1,那么代表还有其他的地方在使用这个dentry,因此只减少引用计数,直接返回;

    如果dentry->d_flags里面设置了delete标志,那么直接调用d_op->d_delete函数指针删除该dentry,再调用dentry_kill来处理;

    【d_op->d_delete与dentry_kill在功能上有什么不同?】

    如果在全局的hash表中也已经找不该dentry了,那么直接调用dentry_kill来处理;

    如果dentry的引用计数为1,而且也不属于上面二种需要调用dentry_kill的情况,那么就将其缓存在super_block的LRU队列中。

    我们看一种可能的d_delete的实现

       1: /*
       2:  * This is called from dput() when d_count is going to 0.
       3:  */
       4: static int nfs_dentry_delete(const struct dentry *dentry)
       5: {
       6:     dfprintk(VFS, "NFS: dentry_delete(%s/%s, %x)
    ",
       7:         dentry->d_parent->d_name.name, dentry->d_name.name,
       8:         dentry->d_flags);
       9:  
      10:     /* Unhash any dentry with a stale inode */
      11:     if (dentry->d_inode != NULL && NFS_STALE(dentry->d_inode))
      12:         return 1;
      13:  
      14:     if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
      15:         /* Unhash it, so that ->d_iput() would be called */
      16:         return 1;
      17:     }
      18:     if (!(dentry->d_sb->s_flags & MS_ACTIVE)) {
      19:         /* Unhash it, so that ancestors of killed async unlink
      20:          * files will be cleaned up during umount */
      21:         return 1;
      22:     }
      23:     return 0;
      24:  
      25: }

    可见,该函数是进行一些内部的判断,决定是否需要将该dentry从全局的hash表中删除掉。

    if (dentry->d_flags & DCACHE_OP_DELETE) {
            if (dentry->d_op->d_delete(dentry))
                goto kill_it;
        }

       1: /*
       2:  * Finish off a dentry we've decided to kill.
       3:  * dentry->d_lock must be held, returns with it unlocked.
       4:  * If ref is non-zero, then decrement the refcount too.
       5:  * Returns dentry requiring refcount drop, or NULL if we're done.
       6:  */
       7: static inline struct dentry *dentry_kill(struct dentry *dentry, int ref)
       8:     __releases(dentry->d_lock)
       9: {
      10:     struct inode *inode;
      11:     struct dentry *parent;
      12:  
      13:     inode = dentry->d_inode;
      14:     if (inode && !spin_trylock(&inode->i_lock)) {
      15: relock:
      16:         spin_unlock(&dentry->d_lock);
      17:         cpu_relax();
      18:         return dentry; /* try again with same dentry */
      19:     }
      20:     if (IS_ROOT(dentry))
      21:         parent = NULL;
      22:     else
      23:         parent = dentry->d_parent;
      24:     if (parent && !spin_trylock(&parent->d_lock)) {
      25:         if (inode)
      26:             spin_unlock(&inode->i_lock);
      27:         goto relock;
      28:     }
      29:  
      30:     if (ref)
      31:         dentry->d_count--;
      32:     /* if dentry was on the d_lru list delete it from there */
      33:     dentry_lru_del(dentry);
      34:     /* if it was on the hash then remove it */
      35:     __d_drop(dentry);
      36:     return d_kill(dentry, parent);
      37: }
  • 相关阅读:
    Dynamics AX 2012 R2 配置E-Mail模板
    Dynamics AX 2012 R2 设置E-Mail
    Dynamics AX 2012 R2 为运行失败的批处理任务设置预警
    Dynamics AX 2012 R2 耗尽用户
    Dynamics AX 2012 R2 创建一个专用的批处理服务器
    Dynamics AX 2012 R2 创建一个带有负载均衡的服务器集群
    Dynamics AX 2012 R2 安装额外的AOS
    Dynamics AX 2012 R2 将系统用户账号连接到工作人员记录
    Dynamics AX 2012 R2 从代码中调用SSRS Report
    Dynamics AX 2012 R2 IIS WebSite Unauthorized 401
  • 原文地址:https://www.cnblogs.com/long123king/p/3536486.html
Copyright © 2011-2022 走看看