Newer
Older
/*
* linux/fs/namei.c
*
* Copyright (C) 1991, 1992 Linus Torvalds
*/
/*
* Some corrections by tytso.
*/
/* [Feb 1997 T. Schoebel-Theuer] Complete rewrite of the pathname
* lookup logic.
*/
/* [Feb-Apr 2000, AV] Rewrite to the new namespace architecture.
*/
#include <linux/init.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/fs.h>
#include <linux/namei.h>
#include <linux/pagemap.h>
#include <linux/personality.h>
#include <linux/security.h>
#include <linux/syscalls.h>
#include <linux/mount.h>
#include <linux/audit.h>
#include <linux/file.h>
#include <linux/posix_acl.h>
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
/* [Feb-1997 T. Schoebel-Theuer]
* Fundamental changes in the pathname lookup mechanisms (namei)
* were necessary because of omirr. The reason is that omirr needs
* to know the _real_ pathname, not the user-supplied one, in case
* of symlinks (and also when transname replacements occur).
*
* The new code replaces the old recursive symlink resolution with
* an iterative one (in case of non-nested symlink chains). It does
* this with calls to <fs>_follow_link().
* As a side effect, dir_namei(), _namei() and follow_link() are now
* replaced with a single function lookup_dentry() that can handle all
* the special cases of the former code.
*
* With the new dcache, the pathname is stored at each inode, at least as
* long as the refcount of the inode is positive. As a side effect, the
* size of the dcache depends on the inode cache and thus is dynamic.
*
* [29-Apr-1998 C. Scott Ananian] Updated above description of symlink
* resolution to correspond with current state of the code.
*
* Note that the symlink resolution is not *completely* iterative.
* There is still a significant amount of tail- and mid- recursion in
* the algorithm. Also, note that <fs>_readlink() is not used in
* lookup_dentry(): lookup_dentry() on the result of <fs>_readlink()
* may return different results than <fs>_follow_link(). Many virtual
* filesystems (including /proc) exhibit this behavior.
*/
/* [24-Feb-97 T. Schoebel-Theuer] Side effects caused by new implementation:
* New symlink semantics: when open() is called with flags O_CREAT | O_EXCL
* and the name already exists in form of a symlink, try to create the new
* name indicated by the symlink. The old code always complained that the
* name already exists, due to not following the symlink even if its target
* is nonexistent. The new semantics affects also mknod() and link() when
* the name is a symlink pointing to a non-existent name.
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
*
* I don't know which semantics is the right one, since I have no access
* to standards. But I found by trial that HP-UX 9.0 has the full "new"
* semantics implemented, while SunOS 4.1.1 and Solaris (SunOS 5.4) have the
* "old" one. Personally, I think the new semantics is much more logical.
* Note that "ln old new" where "new" is a symlink pointing to a non-existing
* file does succeed in both HP-UX and SunOs, but not in Solaris
* and in the old Linux semantics.
*/
/* [16-Dec-97 Kevin Buhr] For security reasons, we change some symlink
* semantics. See the comments in "open_namei" and "do_link" below.
*
* [10-Sep-98 Alan Modra] Another symlink change.
*/
/* [Feb-Apr 2000 AV] Complete rewrite. Rules for symlinks:
* inside the path - always follow.
* in the last component in creation/removal/renaming - never follow.
* if LOOKUP_FOLLOW passed - follow.
* if the pathname has trailing slashes - follow.
* otherwise - don't follow.
* (applied in that order).
*
* [Jun 2000 AV] Inconsistent behaviour of open() in case if flags==O_CREAT
* restored for 2.4. This is the last surviving part of old 4.2BSD bug.
* During the 2.4 we need to fix the userland stuff depending on it -
* hopefully we will be able to get rid of that wart in 2.5. So far only
* XEmacs seems to be relying on it...
*/
/*
* [Sep 2001 AV] Single-semaphore locking scheme (kudos to David Holland)
* implemented. Let's see if raised priority of ->s_vfs_rename_mutex gives
* any extra contention...
*/
/* In order to reduce some races, while at the same time doing additional
* checking and hopefully speeding things up, we copy filenames to the
* kernel data space before using them..
*
* POSIX.1 2.4: an empty pathname is invalid (ENOENT).
* PATH_MAX includes the nul terminator --RR.
*/
static int do_getname(const char __user *filename, char *page)
{
int retval;
unsigned long len = PATH_MAX;
if (!segment_eq(get_fs(), KERNEL_DS)) {
if ((unsigned long) filename >= TASK_SIZE)
return -EFAULT;
if (TASK_SIZE - (unsigned long) filename < PATH_MAX)
len = TASK_SIZE - (unsigned long) filename;
}
retval = strncpy_from_user(page, filename, len);
if (retval > 0) {
if (retval < len)
return 0;
return -ENAMETOOLONG;
} else if (!retval)
retval = -ENOENT;
return retval;
}

Andy Whitcroft
committed
static char *getname_flags(const char __user *filename, int flags, int *empty)
{
char *tmp, *result;
result = ERR_PTR(-ENOMEM);
tmp = __getname();
if (tmp) {
int retval = do_getname(filename, tmp);
result = tmp;
if (retval < 0) {

Andy Whitcroft
committed
if (retval == -ENOENT && empty)
*empty = 1;
if (retval != -ENOENT || !(flags & LOOKUP_EMPTY)) {
__putname(tmp);
result = ERR_PTR(retval);
}
}
}
audit_getname(result);
return result;
}
char *getname(const char __user * filename)
{

Andy Whitcroft
committed
return getname_flags(filename, 0, 0);
#ifdef CONFIG_AUDITSYSCALL
void putname(const char *name)
{
if (unlikely(!audit_dummy_context()))
audit_putname(name);
else
__putname(name);
}
EXPORT_SYMBOL(putname);
#endif
static int check_acl(struct inode *inode, int mask)
{
#ifdef CONFIG_FS_POSIX_ACL
struct posix_acl *acl;
if (mask & MAY_NOT_BLOCK) {
acl = get_cached_acl_rcu(inode, ACL_TYPE_ACCESS);
if (!acl)
/* no ->get_acl() calls in RCU mode... */
if (acl == ACL_NOT_CACHED)
return -ECHILD;
return posix_acl_permission(inode, acl, mask & ~MAY_NOT_BLOCK);
}
acl = get_cached_acl(inode, ACL_TYPE_ACCESS);
/*
* A filesystem can force a ACL callback by just never filling the
* ACL cache. But normally you'd fill the cache either at inode
* instantiation time, or on the first ->get_acl call.
* If the filesystem doesn't have a get_acl() function at all, we'll
* just create the negative cache entry.
*/
if (acl == ACL_NOT_CACHED) {
if (inode->i_op->get_acl) {
acl = inode->i_op->get_acl(inode, ACL_TYPE_ACCESS);
if (IS_ERR(acl))
return PTR_ERR(acl);
} else {
set_cached_acl(inode, ACL_TYPE_ACCESS, NULL);
return -EAGAIN;
}
}
if (acl) {
int error = posix_acl_permission(inode, acl, mask);
posix_acl_release(acl);
return error;
}
#endif
return -EAGAIN;
}
* This does the basic permission checking
static int acl_permission_check(struct inode *inode, int mask)
unsigned int mode = inode->i_mode;

Serge E. Hallyn
committed
if (current_user_ns() != inode_userns(inode))
goto other_perms;
if (likely(current_fsuid() == inode->i_uid))
if (IS_POSIXACL(inode) && (mode & S_IRWXG)) {
int error = check_acl(inode, mask);
if (error != -EAGAIN)
return error;
}
if (in_group_p(inode->i_gid))
mode >>= 3;
}

Serge E. Hallyn
committed
other_perms:
/*
* If the DACs are ok we don't need any capability check.
*/
if ((mask & ~mode & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0)
return -EACCES;
}
/**
* generic_permission - check for access rights on a Posix-like filesystem
* @inode: inode to check access rights for
* @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC, ...)
*
* Used to check for read/write/execute permissions on a file.
* We use "fsuid" for this, letting us set arbitrary permissions
* for filesystem access without changing the "normal" uids which
* are used for other things.
*
* generic_permission is rcu-walk aware. It returns -ECHILD in case an rcu-walk
* request cannot be satisfied (eg. requires blocking or too much complexity).
* It would then be called again in ref-walk mode.
int generic_permission(struct inode *inode, int mask)
{
int ret;
/*
* Do the basic permission checks.
ret = acl_permission_check(inode, mask);
if (ret != -EACCES)
return ret;
if (S_ISDIR(inode->i_mode)) {
/* DACs are overridable for directories */
if (ns_capable(inode_userns(inode), CAP_DAC_OVERRIDE))
return 0;
if (!(mask & MAY_WRITE))
if (ns_capable(inode_userns(inode), CAP_DAC_READ_SEARCH))
return 0;
return -EACCES;
}
* Executable DACs are overridable when there is
* at least one exec bit set.
if (!(mask & MAY_EXEC) || (inode->i_mode & S_IXUGO))

Serge E. Hallyn
committed
if (ns_capable(inode_userns(inode), CAP_DAC_OVERRIDE))
return 0;
/*
* Searching includes executable on directories, else just read.
*/
mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
if (mask == MAY_READ)

Serge E. Hallyn
committed
if (ns_capable(inode_userns(inode), CAP_DAC_READ_SEARCH))
/*
* We _really_ want to just do "generic_permission()" without
* even looking at the inode->i_op values. So we keep a cache
* flag in inode->i_opflags, that says "this has not special
* permission function, use the fast case".
*/
static inline int do_inode_permission(struct inode *inode, int mask)
{
if (unlikely(!(inode->i_opflags & IOP_FASTPERM))) {
if (likely(inode->i_op->permission))
return inode->i_op->permission(inode, mask);
/* This gets set once for the inode lifetime */
spin_lock(&inode->i_lock);
inode->i_opflags |= IOP_FASTPERM;
spin_unlock(&inode->i_lock);
}
return generic_permission(inode, mask);
}
/**
* inode_permission - check for access rights to a given inode
* @inode: inode to check permission on
* @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC, ...)
*
* Used to check for read/write/execute permissions on an inode.
* We use "fsuid" for this, letting us set arbitrary permissions
* for filesystem access without changing the "normal" uids which
* are used for other things.
*
* When checking for MAY_APPEND, MAY_WRITE must also be set in @mask.
int inode_permission(struct inode *inode, int mask)
if (unlikely(mask & MAY_WRITE)) {
/*
* Nobody gets write access to a read-only fs.
*/
if (IS_RDONLY(inode) &&
(S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
return -EROFS;
/*
* Nobody gets write access to an immutable file.
*/
if (IS_IMMUTABLE(inode))
return -EACCES;
}
retval = do_inode_permission(inode, mask);
retval = devcgroup_inode_permission(inode, mask);
if (retval)
return retval;
return security_inode_permission(inode, mask);
/**
* path_get - get a reference to a path
* @path: path to get the reference to
*
* Given a path increment the reference count to the dentry and the vfsmount.
*/
void path_get(struct path *path)
{
mntget(path->mnt);
dget(path->dentry);
}
EXPORT_SYMBOL(path_get);
/**
* path_put - put a reference to a path
* @path: path to put the reference to
*
* Given a path decrement the reference count to the dentry and the vfsmount.
*/
void path_put(struct path *path)
* Path walking has 2 modes, rcu-walk and ref-walk (see
* Documentation/filesystems/path-lookup.txt). In situations when we can't
* continue in RCU mode, we attempt to drop out of rcu-walk mode and grab
* normal reference counts on dentries and vfsmounts to transition to rcu-walk
* mode. Refcounts are grabbed at the last known good point before rcu-walk
* got stuck, so ref-walk may continue from there. If this is not successful
* (eg. a seqcount has changed), then failure is returned and it's up to caller
* to restart the path walk from the beginning in ref-walk mode.
* unlazy_walk - try to switch to ref-walk mode.
* @nd: nameidata pathwalk data
* @dentry: child of nd->path.dentry or NULL
* Returns: 0 on success, -ECHILD on failure
* unlazy_walk attempts to legitimize the current nd->path, nd->root and dentry
* for ref-walk mode. @dentry must be a path found by a do_lookup call on
* @nd or NULL. Must be called from rcu-walk context.
static int unlazy_walk(struct nameidata *nd, struct dentry *dentry)
{
struct fs_struct *fs = current->fs;
struct dentry *parent = nd->path.dentry;
if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
want_root = 1;
spin_lock(&fs->lock);
if (nd->root.mnt != fs->root.mnt ||
nd->root.dentry != fs->root.dentry)
goto err_root;
}
spin_lock(&parent->d_lock);
if (!dentry) {
if (!__d_rcu_to_refcount(parent, nd->seq))
goto err_parent;
BUG_ON(nd->inode != parent->d_inode);
} else {
if (dentry->d_parent != parent)
goto err_parent;
spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
if (!__d_rcu_to_refcount(dentry, nd->seq))
goto err_child;
/*
* If the sequence check on the child dentry passed, then
* the child has not been removed from its parent. This
* means the parent dentry must be valid and able to take
* a reference at this point.
*/
BUG_ON(!IS_ROOT(dentry) && dentry->d_parent != parent);
BUG_ON(!parent->d_count);
parent->d_count++;
spin_unlock(&dentry->d_lock);
}
path_get(&nd->root);
spin_unlock(&fs->lock);
}
mntget(nd->path.mnt);
rcu_read_unlock();
br_read_unlock(vfsmount_lock);
nd->flags &= ~LOOKUP_RCU;
return 0;
spin_unlock(&parent->d_lock);
err_root:
spin_unlock(&fs->lock);
return -ECHILD;
}
/**
* release_open_intent - free up open intent resources
* @nd: pointer to nameidata
*/
void release_open_intent(struct nameidata *nd)
{
struct file *file = nd->intent.open.file;
if (file && !IS_ERR(file)) {
if (file->f_path.dentry == NULL)
put_filp(file);
else
fput(file);
}
}
static inline int d_revalidate(struct dentry *dentry, struct nameidata *nd)
return dentry->d_op->d_revalidate(dentry, nd);
/**
* complete_walk - successful completion of path walk
* @nd: pointer nameidata
* If we had been in RCU mode, drop out of it and legitimize nd->path.
* Revalidate the final result, unless we'd already done that during
* the path walk or the filesystem doesn't ask for it. Return 0 on
* success, -error on failure. In case of failure caller does not
* need to drop nd->path.
static int complete_walk(struct nameidata *nd)
int status;
if (nd->flags & LOOKUP_RCU) {
nd->flags &= ~LOOKUP_RCU;
if (!(nd->flags & LOOKUP_ROOT))
nd->root.mnt = NULL;
spin_lock(&dentry->d_lock);
if (unlikely(!__d_rcu_to_refcount(dentry, nd->seq))) {
spin_unlock(&dentry->d_lock);
rcu_read_unlock();
br_read_unlock(vfsmount_lock);
return -ECHILD;
}
BUG_ON(nd->inode != dentry->d_inode);
spin_unlock(&dentry->d_lock);
mntget(nd->path.mnt);
rcu_read_unlock();
br_read_unlock(vfsmount_lock);
}
if (likely(!(nd->flags & LOOKUP_JUMPED)))
return 0;
if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE)))
return 0;
if (likely(!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)))
return 0;
/* Note: we do not d_invalidate() */
status = d_revalidate(dentry, nd);
if (status > 0)
return 0;
status = -ESTALE;
return status;
}
static __always_inline void set_root(struct nameidata *nd)
{
if (!nd->root.mnt)
get_fs_root(current->fs, &nd->root);
static int link_path_walk(const char *, struct nameidata *);
static __always_inline void set_root_rcu(struct nameidata *nd)
{
if (!nd->root.mnt) {
struct fs_struct *fs = current->fs;
unsigned seq;
do {
seq = read_seqcount_begin(&fs->seq);
nd->root = fs->root;
nd->seq = __read_seqcount_begin(&nd->root.dentry->d_seq);
} while (read_seqcount_retry(&fs->seq, seq));
static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link)
if (IS_ERR(link))
goto fail;
if (*link == '/') {
ret = link_path_walk(link, nd);
return ret;
static void path_put_conditional(struct path *path, struct nameidata *nd)
{
dput(path->dentry);
if (path->mnt != nd->path.mnt)
mntput(path->mnt);
}
static inline void path_to_nameidata(const struct path *path,
struct nameidata *nd)
if (!(nd->flags & LOOKUP_RCU)) {
dput(nd->path.dentry);
if (nd->path.mnt != path->mnt)
mntput(nd->path.mnt);
nd->path.dentry = path->dentry;
static inline void put_link(struct nameidata *nd, struct path *link, void *cookie)
{
struct inode *inode = link->dentry->d_inode;
if (!IS_ERR(cookie) && inode->i_op->put_link)
inode->i_op->put_link(link->dentry, nd, cookie);
path_put(link);
}
follow_link(struct path *link, struct nameidata *nd, void **p)
struct dentry *dentry = link->dentry;
BUG_ON(nd->flags & LOOKUP_RCU);
if (link->mnt == nd->path.mnt)
mntget(link->mnt);
if (unlikely(current->total_link_count >= 40)) {
*p = ERR_PTR(-ELOOP); /* no ->put_link(), please */
path_put(&nd->path);
return -ELOOP;
}
cond_resched();
current->total_link_count++;
touch_atime(link->mnt, dentry);
error = security_inode_follow_link(link->dentry, nd);
if (error) {
*p = ERR_PTR(error); /* no ->put_link(), please */
path_put(&nd->path);
return error;
}
*p = dentry->d_inode->i_op->follow_link(dentry, nd);
error = PTR_ERR(*p);
if (!IS_ERR(*p)) {
nd->inode = nd->path.dentry->d_inode;
if (nd->inode->i_op->follow_link) {
/* stepped on a _really_ weird one */
path_put(&nd->path);
error = -ELOOP;
}
}
static int follow_up_rcu(struct path *path)
{
struct vfsmount *parent;
struct dentry *mountpoint;
parent = path->mnt->mnt_parent;
if (parent == path->mnt)
return 0;
mountpoint = path->mnt->mnt_mountpoint;
path->dentry = mountpoint;
path->mnt = parent;
return 1;
}
{
struct vfsmount *parent;
struct dentry *mountpoint;
parent = path->mnt->mnt_parent;
if (parent == path->mnt) {
mountpoint = dget(path->mnt->mnt_mountpoint);
dput(path->dentry);
path->dentry = mountpoint;
mntput(path->mnt);
path->mnt = parent;
* Perform an automount
* - return -EISDIR to tell follow_managed() to stop and return the path we
* were called with.
static int follow_automount(struct path *path, unsigned flags,
bool *need_mntput)
struct vfsmount *mnt;
int err;
if (!path->dentry->d_op || !path->dentry->d_op->d_automount)
return -EREMOTE;
/* We don't want to mount if someone's just doing a stat -
* unless they're stat'ing a directory and appended a '/' to
* the name.
*
* We do, however, want to mount if someone wants to open or
* create a file of any type under the mountpoint, wants to
* traverse through the mountpoint or wants to open the
* mounted directory. Also, autofs may mark negative dentries
* as being automount points. These will need the attentions
* of the daemon to instantiate them before they can be used.
*/
if (!(flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY |
LOOKUP_OPEN | LOOKUP_CREATE | LOOKUP_AUTOMOUNT)) &&
path->dentry->d_inode)
return -EISDIR;
current->total_link_count++;
if (current->total_link_count >= 40)
return -ELOOP;
mnt = path->dentry->d_op->d_automount(path);
if (IS_ERR(mnt)) {
/*
* The filesystem is allowed to return -EISDIR here to indicate
* it doesn't want to automount. For instance, autofs would do
* this so that its userspace daemon can mount on this dentry.
*
* However, we can only permit this if it's a terminal point in
* the path being looked up; if it wasn't then the remainder of
* the path is inaccessible and we should say so.
*/
return -EREMOTE;
return PTR_ERR(mnt);
if (!mnt) /* mount collision */
return 0;
if (!*need_mntput) {
/* lock_mount() may release path->mnt on error */
mntget(path->mnt);
*need_mntput = true;
}
err = finish_automount(mnt, path);
switch (err) {
case -EBUSY:
/* Someone else made a mount here whilst we were busy */
case 0:
path->mnt = mnt;
path->dentry = dget(mnt->mnt_root);
return 0;
}
/*
* Handle a dentry that is managed in some way.
* - Flagged for transit management (autofs)
* - Flagged as mountpoint
* - Flagged as automount point
*
* This may only be called in refwalk mode.
*
* Serialization is taken care of in namespace.c
*/
static int follow_managed(struct path *path, unsigned flags)
struct vfsmount *mnt = path->mnt; /* held by caller, must be left alone */
unsigned managed;
bool need_mntput = false;
/* Given that we're not holding a lock here, we retain the value in a
* local variable for each dentry as we look at it so that we don't see
* the components of that value change under us */
while (managed = ACCESS_ONCE(path->dentry->d_flags),
managed &= DCACHE_MANAGED_DENTRY,
unlikely(managed != 0)) {
/* Allow the filesystem to manage the transit without i_mutex
* being held. */
if (managed & DCACHE_MANAGE_TRANSIT) {
BUG_ON(!path->dentry->d_op);
BUG_ON(!path->dentry->d_op->d_manage);
ret = path->dentry->d_op->d_manage(path->dentry, false);
if (ret < 0)
}
/* Transit to a mounted filesystem. */
if (managed & DCACHE_MOUNTED) {
struct vfsmount *mounted = lookup_mnt(path);
if (mounted) {
dput(path->dentry);
if (need_mntput)
mntput(path->mnt);
path->mnt = mounted;
path->dentry = dget(mounted->mnt_root);
need_mntput = true;
continue;
}
/* Something is mounted on this dentry in another
* namespace and/or whatever was mounted there in this
* namespace got unmounted before we managed to get the
* vfsmount_lock */
}
/* Handle an automount point */
if (managed & DCACHE_NEED_AUTOMOUNT) {
ret = follow_automount(path, flags, &need_mntput);
if (ret < 0)
continue;
}
/* We didn't change the current path point */
break;
if (need_mntput && path->mnt == mnt)
mntput(path->mnt);
if (ret == -EISDIR)
ret = 0;
return ret < 0 ? ret : need_mntput;
int follow_down_one(struct path *path)
dput(path->dentry);
mntput(path->mnt);
path->mnt = mounted;
path->dentry = dget(mounted->mnt_root);
static inline bool managed_dentry_might_block(struct dentry *dentry)
{
return (dentry->d_flags & DCACHE_MANAGE_TRANSIT &&
dentry->d_op->d_manage(dentry, true) < 0);
}
/*
* Try to skip to top of mountpoint pile in rcuwalk mode. Fail if
* we meet a managed dentry that would need blocking.
*/
static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
{
for (;;) {
/*
* Don't forget we might have a non-mountpoint managed dentry
* that wants to block transit.
*/
if (unlikely(managed_dentry_might_block(path->dentry)))
if (!d_mountpoint(path->dentry))
break;
mounted = __lookup_mnt(path->mnt, path->dentry, 1);
if (!mounted)
break;
path->mnt = &mounted->mnt;
path->dentry = mounted->mnt.mnt_root;
nd->flags |= LOOKUP_JUMPED;
nd->seq = read_seqcount_begin(&path->dentry->d_seq);
/*
* Update the inode too. We don't need to re-check the
* dentry sequence number here after this d_inode read,
* because a mount-point is always pinned.
*/
*inode = path->dentry->d_inode;
}
return true;
}
static void follow_mount_rcu(struct nameidata *nd)
while (d_mountpoint(nd->path.dentry)) {
mounted = __lookup_mnt(nd->path.mnt, nd->path.dentry, 1);
nd->path.mnt = &mounted->mnt;
nd->path.dentry = mounted->mnt.mnt_root;
nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
static int follow_dotdot_rcu(struct nameidata *nd)
{
set_root_rcu(nd);
while (1) {
if (nd->path.dentry == nd->root.dentry &&
nd->path.mnt == nd->root.mnt) {
break;
}
if (nd->path.dentry != nd->path.mnt->mnt_root) {
struct dentry *old = nd->path.dentry;
struct dentry *parent = old->d_parent;
unsigned seq;
seq = read_seqcount_begin(&parent->d_seq);
if (read_seqcount_retry(&old->d_seq, nd->seq))
nd->path.dentry = parent;
nd->seq = seq;
break;
}
if (!follow_up_rcu(&nd->path))
break;
nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
}
follow_mount_rcu(nd);
nd->inode = nd->path.dentry->d_inode;
failed:
nd->flags &= ~LOOKUP_RCU;
if (!(nd->flags & LOOKUP_ROOT))
nd->root.mnt = NULL;
rcu_read_unlock();
br_read_unlock(vfsmount_lock);
return -ECHILD;
/*
* Follow down to the covering mount currently visible to userspace. At each
* point, the filesystem owning that dentry may be queried as to whether the
* caller is permitted to proceed or not.
*/
int follow_down(struct path *path)
{
unsigned managed;
int ret;
while (managed = ACCESS_ONCE(path->dentry->d_flags),
unlikely(managed & DCACHE_MANAGED_DENTRY)) {
/* Allow the filesystem to manage the transit without i_mutex
* being held.
*
* We indicate to the filesystem if someone is trying to mount
* something here. This gives autofs the chance to deny anyone
* other than its daemon the right to mount on its
* superstructure.
*
* The filesystem may sleep at this point.
*/
if (managed & DCACHE_MANAGE_TRANSIT) {
BUG_ON(!path->dentry->d_op);
BUG_ON(!path->dentry->d_op->d_manage);
ret = path->dentry->d_op->d_manage(
if (ret < 0)
return ret == -EISDIR ? 0 : ret;
}
/* Transit to a mounted filesystem. */
if (managed & DCACHE_MOUNTED) {
struct vfsmount *mounted = lookup_mnt(path);