zoukankan      html  css  js  c++  java
  • vfs open系统调用flow之link_path_walk()

    vfs open系统调用flow之link_path_walk()

    do_filp_open()里将pathname保存到nameidata里,pathname是open file的完整路径名,调用path_openat,此时是flags是带了LOOKUP_RCU flag的

    struct file *do_filp_open(int dfd, struct filename *pathname,
            const struct open_flags *op)
    {
        struct nameidata nd;
        int flags = op->lookup_flags;
        struct file *filp;
    
        set_nameidata(&nd, dfd, pathname);
        filp = path_openat(&nd, op, flags | LOOKUP_RCU);
        if (unlikely(filp == ERR_PTR(-ECHILD)))
            filp = path_openat(&nd, op, flags);
        if (unlikely(filp == ERR_PTR(-ESTALE)))
            filp = path_openat(&nd, op, flags | LOOKUP_REVAL);
        restore_nameidata();
        return filp;
    }

    path_openat()里主要call了3个函数path_init()、link_path_walk()、do_last()

    调用path_init()设置nameidata的path结构体,对于常规文件来说,比如/data/test/test.txt,设置path结构体‘指向’根目录/,即设置path.mnt/path.dentry‘指向’根目录/,为后续的目录解析做准备。path_init()的返回值s即指向open file的完整路径名字串开头。

    static struct file *path_openat(struct nameidata *nd,
                const struct open_flags *op, unsigned flags)
    {
        struct file *file;
        int error;
    
        file = alloc_empty_file(op->open_flag, current_cred());
        if (IS_ERR(file))
            return file;
    
        if (unlikely(file->f_flags & __O_TMPFILE)) {
            error = do_tmpfile(nd, flags, op, file);
        } else if (unlikely(file->f_flags & O_PATH)) {
            error = do_o_path(nd, flags, file);
        } else {
            const char *s = path_init(nd, flags);
            while (!(error = link_path_walk(s, nd)) &&
                (error = do_last(nd, file, op)) > 0) {
                nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL);
                s = trailing_symlink(nd);
            }
            terminate_walk(nd);
        }
        if (likely(!error)) {
            if (likely(file->f_mode & FMODE_OPENED))
                return file;
            WARN_ON(1);
            error = -EINVAL;
        }
        fput(file);
        if (error == -EOPENSTALE) {
            if (flags & LOOKUP_RCU)
                error = -ECHILD;
            else
                error = -ESTALE;
        }
        return ERR_PTR(error);
    }
    static int link_path_walk(const char *name, struct nameidata *nd)
    {
        int err;
    
        if (IS_ERR(name))
            return PTR_ERR(name);
        while (*name=='/')
            name++;
        if (!*name)
            return 0;
    
        /* At this point we know we have a real path component. */
        for(;;) {
            u64 hash_len;
            int type;
    
            err = may_lookup(nd);
            if (err)
                return err;
    
            hash_len = hash_name(nd->path.dentry, name); //计算当前目录的hash_len,这个变量高4 byte是当前目录name字串长度,低4byte是当前目录(路径)的hash值,hash值的计算是基于当前目录的父目录dentry(nd->path.dentry)来计算的,所以它跟其目录(路径)dentry是关联的
    
            type = LAST_NORM;
            if (name[0] == '.') switch (hashlen_len(hash_len)) {
                case 2:
                    if (name[1] == '.') {
                        type = LAST_DOTDOT;
                        nd->flags |= LOOKUP_JUMPED;
                    }
                    break;
                case 1:
                    type = LAST_DOT;
            }
            if (likely(type == LAST_NORM)) {
                struct dentry *parent = nd->path.dentry;
                nd->flags &= ~LOOKUP_JUMPED;
                if (unlikely(parent->d_flags & DCACHE_OP_HASH)) {  //to do
                    struct qstr this = { { .hash_len = hash_len }, .name = name };
                    err = parent->d_op->d_hash(parent, &this);
                    if (err < 0)
                        return err;
                    hash_len = this.hash_len;
                    name = this.name;
                }
            }
    
            nd->last.hash_len = hash_len; //更新nameidata last结构体,last.hash_len,name
            nd->last.name = name;
            nd->last_type = type;
    
            name += hashlen_len(hash_len); //name指向下一级目录
            if (!*name)
                goto OK;
            /*
             * If it wasn't NUL, we know it was '/'. Skip that
             * slash, and continue until no more slashes.
             */
            do {
                name++;
            } while (unlikely(*name == '/'));
            if (unlikely(!*name)) { //假设open file文件名路径上没有任何symlink,则如果这个条件满足,说明整个路径都解析完了,还剩最后的filename留给do_last()解析,此函数将从下面的!nd->depth条件处返回。
    OK:
                /* pathname body, done */
                if (!nd->depth) //如果open file完整路径上没有任何symlink,nd->depth等于0,
                    return 0;
                name = nd->stack[nd->depth - 1].name;
                /* trailing symlink, done */
                if (!name)
                    return 0;
                /* last component of nested symlink */
                err = walk_component(nd, WALK_FOLLOW); //symlink case
            } else {
                /* not the last component */
                err = walk_component(nd, WALK_FOLLOW | WALK_MORE); //常规目录case,即非symlink case
            }
            if (err < 0)
                return err;
    
            if (err) {
                const char *s = get_link(nd);
    
                if (IS_ERR(s))
                    return PTR_ERR(s);
                err = 0;
                if (unlikely(!s)) {
                    /* jumped */
                    put_link(nd);
                } else {
                    nd->stack[nd->depth - 1].name = name;
                    name = s;
                    continue;
                }
            }
            if (unlikely(!d_can_lookup(nd->path.dentry))) {
                if (nd->flags & LOOKUP_RCU) {
                    if (unlazy_walk(nd))
                        return -ECHILD;
                }
                return -ENOTDIR;
            }
        }
    }

    walk_component()首先会调用lookup_fast()在dcache里查找,如果没有找到,则直接返回0,然后进入lookup_slow()的slow path。

    无论是在dcache里找到了当前目录对应的dentry(fast path)或者没有找到(slow path),这两个path都会去设置path结构体里的dentry、mnt成员,将当前路径更新到path结构体。

    path结构体更新后,最后调用step_info/path_to_nameidata将path结构体更新到nd.path,这样nd里的path就指向了当前目录了,至此完成一级目录的解析查找,返回link_path_walk()将基于nd.path作为父目录解析下一级目录

    static int walk_component(struct nameidata *nd, int flags)
    {
        struct path path;
        struct inode *inode;
        unsigned seq;
        int err;
        /*
         * "." and ".." are special - ".." especially so because it has
         * to be able to know about the current root directory and
         * parent relationships.
         */
        if (unlikely(nd->last_type != LAST_NORM)) {
            err = handle_dots(nd, nd->last_type);
            if (!(flags & WALK_MORE) && nd->depth)
                put_link(nd);
            return err;
        }
        err = lookup_fast(nd, &path, &inode, &seq);
        if (unlikely(err <= 0)) { //上述look_fast()如果在dcache里没有找到当前目录(路径)的dentry,则会执行如下的lookup_slow()
            if (err < 0)
                return err;
            path.dentry = lookup_slow(&nd->last, nd->path.dentry,
                          nd->flags);
            if (IS_ERR(path.dentry))
                return PTR_ERR(path.dentry);
    
            path.mnt = nd->path.mnt;
            err = follow_managed(&path, nd);
            if (unlikely(err < 0))
                return err;
    
            if (unlikely(d_is_negative(path.dentry))) {
                path_to_nameidata(&path, nd);
                return -ENOENT;
            }
    
            seq = 0;    /* we are already out of RCU mode */
            inode = d_backing_inode(path.dentry);
        }
    
        return step_into(nd, &path, flags, inode, seq);
    }

    __lookup_slow()首先调用d_alloc_parallel给当前路径分配一个新的dentry,然后调用inode->i_op->lookup(),注意这里的inode是当前路径的父路径dentry的d_inode成员。inode->i_op是具体的文件系统inode operations函数集了,当前以ext4 fs来举例的话,就是ext4 fs的inode operations函数集,即ext4_dir_inode_operations,其lookup函数是ext4_lookup(),这个具体文件系统lookup的过程将单独在一篇文章里描述。

    具体文件系统的lookup完成后,此时的dentry已经有绑定的inode了,即已经设置了其d_inode成员了,然后调用d_lookup_done()将此dentry从lookup hash链表上移除(它是在d_alloc_parallel里被插入lookup hash),这个lookup hash链表的作用是避免其它线程也同时来查找当前目录造成重复alloc dentry的问题

    static struct dentry *__lookup_slow(const struct qstr *name,
                        struct dentry *dir,
                        unsigned int flags)
    {
        struct dentry *dentry, *old;
        struct inode *inode = dir->d_inode;
        DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
    
        /* Don't go there if it's already dead */
        if (unlikely(IS_DEADDIR(inode)))
            return ERR_PTR(-ENOENT);
    again:
        dentry = d_alloc_parallel(dir, name, &wq);
        if (IS_ERR(dentry))
            return dentry;
        if (unlikely(!d_in_lookup(dentry))) { //这个不考虑并发的情况下这个条件不成立
            if (!(flags & LOOKUP_NO_REVAL)) {
                int error = d_revalidate(dentry, flags);
                if (unlikely(error <= 0)) {
                    if (!error) {
                        d_invalidate(dentry);
                        dput(dentry);
                        goto again;
                    }
                    dput(dentry);
                    dentry = ERR_PTR(error);
                }
            }
        } else {
            old = inode->i_op->lookup(inode, dentry, flags);
            d_lookup_done(dentry);
            if (unlikely(old)) { //如果dentry是一个目录的dentry,则有可能old是有效的;否则如果dentry是文件的dentry则old是null
                dput(dentry);
                dentry = old;
            }
        }
        return dentry;
    }

    d_alloc_parallel首先alloc一个dentry,然后调用__d_lookup_rcu在dcache里快速查找是否有当前目录对应的dentry,这个在不考虑并发的情况下,是找不到的。在并发条件下,可能不止一个线程去lookup当前目录,可能在其它cpu上的线程也在同时lookup这个目录。

    然后是在lookup hash链表上查找是否有当前目录对应的dentry,同样,在并发的条件下,有可能其它线程也在alloc当前目录,因为此时会进入具体的文件系统lookup当前目录,会去读取目录文件lookup当前目录,所以会涉及到IO操作,而IO操作是比较耗时的,有可能有其它线程已经将当前目录dentry加入了lookup hash链表但还在具体文件系统那边lookup,还没有完成整个当前目录的lookup,所以如果此时当前目录已经在lookup hash链表的话,当前lookup需要等其它线程完成它在具体文件系统那边的lookup过程,它那边完成了整个的lookup过程后,会将当前目录对应的dentry从lookup hash链表中移除,所以在d_alloc_parallel()里在lookup hash链表里查找时,如果该链表上的一个dentry的hash值、parent、name和当前目录的一致(说明当前目录已经在lookup hash链表上了),则会调用d_wait_lookup()等待当前目录的dentry从lookup hash链表上移除。d_wait_lookup()之后,基本会直接返回这个从lookup hash链表上找到的dentry,在return此dentry之前会先将上面已经alloc的dentry put。

    如果在lookup hash上没有找到当前目录的dentry,说明当前目录没有在此hash链表上,即在此时没有人也在lookup当前目录。此时会将新alloc的dentry的flags的DCACHE_PAR_LOOKUP flag置上,并把新alloc的dentry加入到lookup hash,最终return此dentry

    struct dentry *d_alloc_parallel(struct dentry *parent,
                    const struct qstr *name,
                    wait_queue_head_t *wq)
    {
        unsigned int hash = name->hash;
        struct hlist_bl_head *b = in_lookup_hash(parent, hash); //lookup hash链表
        struct hlist_bl_node *node;
        struct dentry *new = d_alloc(parent, name); //分配一个dentry
        struct dentry *dentry;
        unsigned seq, r_seq, d_seq;
    
        if (unlikely(!new))
            return ERR_PTR(-ENOMEM);
    
    retry:
        rcu_read_lock();
        seq = smp_load_acquire(&parent->d_inode->i_dir_seq);
        r_seq = read_seqbegin(&rename_lock);
        dentry = __d_lookup_rcu(parent, name, &d_seq); //再次到dcache里查找当前目录是否有对应的dentry,在不考虑并发的条件下,将查找不到
        if (unlikely(dentry)) {
            if (!lockref_get_not_dead(&dentry->d_lockref)) {
                rcu_read_unlock();
                goto retry;
            }
            if (read_seqcount_retry(&dentry->d_seq, d_seq)) {
                rcu_read_unlock();
                dput(dentry);
                goto retry;
            }
            rcu_read_unlock();
            dput(new);
            return dentry;
        }
        if (unlikely(read_seqretry(&rename_lock, r_seq))) {
            rcu_read_unlock();
            goto retry;
        }
    
        if (unlikely(seq & 1)) {
            rcu_read_unlock();
            goto retry;
        }
    
        hlist_bl_lock(b);
        if (unlikely(READ_ONCE(parent->d_inode->i_dir_seq) != seq)) {
            hlist_bl_unlock(b);
            rcu_read_unlock();
            goto retry;
        }
        /*
         * No changes for the parent since the beginning of d_lookup().
         * Since all removals from the chain happen with hlist_bl_lock(),
         * any potential in-lookup matches are going to stay here until
         * we unlock the chain.  All fields are stable in everything
         * we encounter.
         */
        hlist_bl_for_each_entry(dentry, node, b, d_u.d_in_lookup_hash) {  //从lookup hash链表上查找是否有当前路径对应的dentry,如果有找到,put上面alloc的dentry然后就直接返回找到的这个dentry了,这里有点奇怪,为什么不先查找,如果没有查找到再alloc dentry..
            if (dentry->d_name.hash != hash)
                continue;
            if (dentry->d_parent != parent)
                continue;
            if (!d_same_name(dentry, parent, name))
                continue;
            hlist_bl_unlock(b);
            /* now we can try to grab a reference */
            if (!lockref_get_not_dead(&dentry->d_lockref)) {
                rcu_read_unlock();
                goto retry;
            }
    
            rcu_read_unlock();
            /*
             * somebody is likely to be still doing lookup for it;
             * wait for them to finish
             */
            spin_lock(&dentry->d_lock);
            d_wait_lookup(dentry);
            /*
             * it's not in-lookup anymore; in principle we should repeat
             * everything from dcache lookup, but it's likely to be what
             * d_lookup() would've found anyway.  If it is, just return it;
             * otherwise we really have to repeat the whole thing.
             */
            if (unlikely(dentry->d_name.hash != hash))
                goto mismatch;
            if (unlikely(dentry->d_parent != parent))
                goto mismatch;
            if (unlikely(d_unhashed(dentry)))
                goto mismatch;
            if (unlikely(!d_same_name(dentry, parent, name)))
                goto mismatch;
            /* OK, it *is* a hashed match; return it */
            spin_unlock(&dentry->d_lock);
            dput(new);
            return dentry;
        }
        rcu_read_unlock();
        /* we can't take ->d_lock here; it's OK, though. */
        new->d_flags |= DCACHE_PAR_LOOKUP;
        new->d_wait = wq;
        hlist_bl_add_head_rcu(&new->d_u.d_in_lookup_hash, b); //将此新alloc的dentry插入lookup hash链表
        hlist_bl_unlock(b);
        return new;
    mismatch:
        spin_unlock(&dentry->d_lock);
        dput(dentry);
        goto retry;
    }
    EXPORT_SYMBOL(d_alloc_parallel);
  • 相关阅读:
    USES_CONVERSION宏(转)
    推送本地仓库至多个远程仓库(转)
    .gitignore的使用
    重命名分支
    不支持尝试执行的操作——————MFC窗口打开错误
    error C1010:在查找预编译头文件时遇到意外的文件结尾——————vs错误
    抓边(一)——————————————使用halcon测量助手
    CString, string, char *之间的相互转换(转)
    css模块化
    继承、原型链、在组件开发上的应用
  • 原文地址:https://www.cnblogs.com/aspirs/p/15733715.html
Copyright © 2011-2022 走看看