namei: stash the sampled ->d_seq into nameidata
New field: nd->next_seq. Set to 0 outside of RCU mode, holds the sampled value for the next dentry to be considered. Used instead of an arseload of local variables, arguments, etc. step_into() has lost seq argument; nd->next_seq is used, so dentry passed to it must be the one ->next_seq is about. There are two requirements for RCU pathwalk: 1) it should not give a hard failure (other than -ECHILD) unless non-RCU pathwalk might fail that way given suitable timings. 2) it should not succeed unless non-RCU pathwalk might succeed with the same end location given suitable timings. The use of seq numbers is the way we achieve that. Invariant we want to maintain is: if RCU pathwalk can reach the state with given nd->path, nd->inode and nd->seq after having traversed some part of pathname, it must be possible for non-RCU pathwalk to reach the same nd->path and nd->inode after having traversed the same part of pathname, and observe the nd->path.dentry->d_seq equal to what RCU pathwalk has in nd->seq For transition from parent to child, we sample child's ->d_seq and verify that parent's ->d_seq remains unchanged. Anything that disrupts parent-child relationship would've bumped ->d_seq on both. For transitions from child to parent we sample parent's ->d_seq and verify that child's ->d_seq has not changed. Same reasoning as for the previous case applies. For transition from mountpoint to root of mounted we sample the ->d_seq of root and verify that nobody has touched mount_lock since the beginning of pathwalk. That guarantees that mount we'd found had been there all along, with these mountpoint and root of the mounted. It would be possible for a non-RCU pathwalk to reach the previous state, find the same mount and observe its root at the moment we'd sampled ->d_seq of that For transitions from root of mounted to mountpoint we sample ->d_seq of mountpoint and verify that mount_lock had not been touched since the beginning of pathwalk. The same reasoning as in the previous case applies. Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
This commit is contained in:
parent
6e18032715
commit
03fa86e9f7
98
fs/namei.c
98
fs/namei.c
@ -567,7 +567,7 @@ struct nameidata {
|
||||
struct path root;
|
||||
struct inode *inode; /* path.dentry.d_inode */
|
||||
unsigned int flags, state;
|
||||
unsigned seq, m_seq, r_seq;
|
||||
unsigned seq, next_seq, m_seq, r_seq;
|
||||
int last_type;
|
||||
unsigned depth;
|
||||
int total_link_count;
|
||||
@ -668,6 +668,7 @@ static void drop_links(struct nameidata *nd)
|
||||
static void leave_rcu(struct nameidata *nd)
|
||||
{
|
||||
nd->flags &= ~LOOKUP_RCU;
|
||||
nd->seq = nd->next_seq = 0;
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
@ -792,7 +793,6 @@ out:
|
||||
* try_to_unlazy_next - try to switch to ref-walk mode.
|
||||
* @nd: nameidata pathwalk data
|
||||
* @dentry: next dentry to step into
|
||||
* @seq: seq number to check @dentry against
|
||||
* Returns: true on success, false on failure
|
||||
*
|
||||
* Similar to try_to_unlazy(), but here we have the next dentry already
|
||||
@ -801,7 +801,7 @@ out:
|
||||
* Nothing should touch nameidata between try_to_unlazy_next() failure and
|
||||
* terminate_walk().
|
||||
*/
|
||||
static bool try_to_unlazy_next(struct nameidata *nd, struct dentry *dentry, unsigned seq)
|
||||
static bool try_to_unlazy_next(struct nameidata *nd, struct dentry *dentry)
|
||||
{
|
||||
int res;
|
||||
BUG_ON(!(nd->flags & LOOKUP_RCU));
|
||||
@ -826,7 +826,7 @@ static bool try_to_unlazy_next(struct nameidata *nd, struct dentry *dentry, unsi
|
||||
*/
|
||||
if (unlikely(!lockref_get_not_dead(&dentry->d_lockref)))
|
||||
goto out;
|
||||
if (read_seqcount_retry(&dentry->d_seq, seq))
|
||||
if (read_seqcount_retry(&dentry->d_seq, nd->next_seq))
|
||||
goto out_dput;
|
||||
/*
|
||||
* Sequence counts matched. Now make sure that the root is
|
||||
@ -1475,7 +1475,7 @@ EXPORT_SYMBOL(follow_down);
|
||||
* we meet a managed dentry that would need blocking.
|
||||
*/
|
||||
static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
|
||||
struct inode **inode, unsigned *seqp)
|
||||
struct inode **inode)
|
||||
{
|
||||
struct dentry *dentry = path->dentry;
|
||||
unsigned int flags = dentry->d_flags;
|
||||
@ -1504,7 +1504,7 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
|
||||
path->mnt = &mounted->mnt;
|
||||
dentry = path->dentry = mounted->mnt.mnt_root;
|
||||
nd->state |= ND_JUMPED;
|
||||
*seqp = read_seqcount_begin(&dentry->d_seq);
|
||||
nd->next_seq = read_seqcount_begin(&dentry->d_seq);
|
||||
*inode = dentry->d_inode;
|
||||
/*
|
||||
* We don't need to re-check ->d_seq after this
|
||||
@ -1513,6 +1513,8 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
|
||||
* becoming unpinned.
|
||||
*/
|
||||
flags = dentry->d_flags;
|
||||
// makes sure that non-RCU pathwalk could reach
|
||||
// this state.
|
||||
if (read_seqretry(&mount_lock, nd->m_seq))
|
||||
return false;
|
||||
continue;
|
||||
@ -1525,8 +1527,7 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
|
||||
}
|
||||
|
||||
static inline int handle_mounts(struct nameidata *nd, struct dentry *dentry,
|
||||
struct path *path, struct inode **inode,
|
||||
unsigned int *seqp)
|
||||
struct path *path, struct inode **inode)
|
||||
{
|
||||
bool jumped;
|
||||
int ret;
|
||||
@ -1534,16 +1535,17 @@ static inline int handle_mounts(struct nameidata *nd, struct dentry *dentry,
|
||||
path->mnt = nd->path.mnt;
|
||||
path->dentry = dentry;
|
||||
if (nd->flags & LOOKUP_RCU) {
|
||||
unsigned int seq = *seqp;
|
||||
unsigned int seq = nd->next_seq;
|
||||
if (unlikely(!*inode))
|
||||
return -ENOENT;
|
||||
if (likely(__follow_mount_rcu(nd, path, inode, seqp)))
|
||||
if (likely(__follow_mount_rcu(nd, path, inode)))
|
||||
return 0;
|
||||
if (!try_to_unlazy_next(nd, dentry, seq))
|
||||
return -ECHILD;
|
||||
// *path might've been clobbered by __follow_mount_rcu()
|
||||
// *path and nd->next_seq might've been clobbered
|
||||
path->mnt = nd->path.mnt;
|
||||
path->dentry = dentry;
|
||||
nd->next_seq = seq;
|
||||
if (!try_to_unlazy_next(nd, dentry))
|
||||
return -ECHILD;
|
||||
}
|
||||
ret = traverse_mounts(path, &jumped, &nd->total_link_count, nd->flags);
|
||||
if (jumped) {
|
||||
@ -1558,7 +1560,6 @@ static inline int handle_mounts(struct nameidata *nd, struct dentry *dentry,
|
||||
mntput(path->mnt);
|
||||
} else {
|
||||
*inode = d_backing_inode(path->dentry);
|
||||
*seqp = 0; /* out of RCU mode, so the value doesn't matter */
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
@ -1618,8 +1619,7 @@ static struct dentry *__lookup_hash(const struct qstr *name,
|
||||
}
|
||||
|
||||
static struct dentry *lookup_fast(struct nameidata *nd,
|
||||
struct inode **inode,
|
||||
unsigned *seqp)
|
||||
struct inode **inode)
|
||||
{
|
||||
struct dentry *dentry, *parent = nd->path.dentry;
|
||||
int status = 1;
|
||||
@ -1630,8 +1630,7 @@ static struct dentry *lookup_fast(struct nameidata *nd,
|
||||
* going to fall back to non-racy lookup.
|
||||
*/
|
||||
if (nd->flags & LOOKUP_RCU) {
|
||||
unsigned seq;
|
||||
dentry = __d_lookup_rcu(parent, &nd->last, &seq);
|
||||
dentry = __d_lookup_rcu(parent, &nd->last, &nd->next_seq);
|
||||
if (unlikely(!dentry)) {
|
||||
if (!try_to_unlazy(nd))
|
||||
return ERR_PTR(-ECHILD);
|
||||
@ -1643,7 +1642,7 @@ static struct dentry *lookup_fast(struct nameidata *nd,
|
||||
* the dentry name information from lookup.
|
||||
*/
|
||||
*inode = d_backing_inode(dentry);
|
||||
if (read_seqcount_retry(&dentry->d_seq, seq))
|
||||
if (read_seqcount_retry(&dentry->d_seq, nd->next_seq))
|
||||
return ERR_PTR(-ECHILD);
|
||||
|
||||
/*
|
||||
@ -1656,11 +1655,10 @@ static struct dentry *lookup_fast(struct nameidata *nd,
|
||||
if (__read_seqcount_retry(&parent->d_seq, nd->seq))
|
||||
return ERR_PTR(-ECHILD);
|
||||
|
||||
*seqp = seq;
|
||||
status = d_revalidate(dentry, nd->flags);
|
||||
if (likely(status > 0))
|
||||
return dentry;
|
||||
if (!try_to_unlazy_next(nd, dentry, seq))
|
||||
if (!try_to_unlazy_next(nd, dentry))
|
||||
return ERR_PTR(-ECHILD);
|
||||
if (status == -ECHILD)
|
||||
/* we'd been told to redo it in non-rcu mode */
|
||||
@ -1741,7 +1739,7 @@ static inline int may_lookup(struct user_namespace *mnt_userns,
|
||||
return inode_permission(mnt_userns, nd->inode, MAY_EXEC);
|
||||
}
|
||||
|
||||
static int reserve_stack(struct nameidata *nd, struct path *link, unsigned seq)
|
||||
static int reserve_stack(struct nameidata *nd, struct path *link)
|
||||
{
|
||||
if (unlikely(nd->total_link_count++ >= MAXSYMLINKS))
|
||||
return -ELOOP;
|
||||
@ -1756,7 +1754,7 @@ static int reserve_stack(struct nameidata *nd, struct path *link, unsigned seq)
|
||||
if (nd->flags & LOOKUP_RCU) {
|
||||
// we need to grab link before we do unlazy. And we can't skip
|
||||
// unlazy even if we fail to grab the link - cleanup needs it
|
||||
bool grabbed_link = legitimize_path(nd, link, seq);
|
||||
bool grabbed_link = legitimize_path(nd, link, nd->next_seq);
|
||||
|
||||
if (!try_to_unlazy(nd) || !grabbed_link)
|
||||
return -ECHILD;
|
||||
@ -1770,11 +1768,11 @@ static int reserve_stack(struct nameidata *nd, struct path *link, unsigned seq)
|
||||
enum {WALK_TRAILING = 1, WALK_MORE = 2, WALK_NOFOLLOW = 4};
|
||||
|
||||
static const char *pick_link(struct nameidata *nd, struct path *link,
|
||||
struct inode *inode, unsigned seq, int flags)
|
||||
struct inode *inode, int flags)
|
||||
{
|
||||
struct saved *last;
|
||||
const char *res;
|
||||
int error = reserve_stack(nd, link, seq);
|
||||
int error = reserve_stack(nd, link);
|
||||
|
||||
if (unlikely(error)) {
|
||||
if (!(nd->flags & LOOKUP_RCU))
|
||||
@ -1784,7 +1782,7 @@ static const char *pick_link(struct nameidata *nd, struct path *link,
|
||||
last = nd->stack + nd->depth++;
|
||||
last->link = *link;
|
||||
clear_delayed_call(&last->done);
|
||||
last->seq = seq;
|
||||
last->seq = nd->next_seq;
|
||||
|
||||
if (flags & WALK_TRAILING) {
|
||||
error = may_follow_link(nd, inode);
|
||||
@ -1846,12 +1844,14 @@ all_done: // pure jump
|
||||
* to do this check without having to look at inode->i_op,
|
||||
* so we keep a cache of "no, this doesn't need follow_link"
|
||||
* for the common case.
|
||||
*
|
||||
* NOTE: dentry must be what nd->next_seq had been sampled from.
|
||||
*/
|
||||
static const char *step_into(struct nameidata *nd, int flags,
|
||||
struct dentry *dentry, struct inode *inode, unsigned seq)
|
||||
struct dentry *dentry, struct inode *inode)
|
||||
{
|
||||
struct path path;
|
||||
int err = handle_mounts(nd, dentry, &path, &inode, &seq);
|
||||
int err = handle_mounts(nd, dentry, &path, &inode);
|
||||
|
||||
if (err < 0)
|
||||
return ERR_PTR(err);
|
||||
@ -1866,23 +1866,22 @@ static const char *step_into(struct nameidata *nd, int flags,
|
||||
}
|
||||
nd->path = path;
|
||||
nd->inode = inode;
|
||||
nd->seq = seq;
|
||||
nd->seq = nd->next_seq;
|
||||
return NULL;
|
||||
}
|
||||
if (nd->flags & LOOKUP_RCU) {
|
||||
/* make sure that d_is_symlink above matches inode */
|
||||
if (read_seqcount_retry(&path.dentry->d_seq, seq))
|
||||
if (read_seqcount_retry(&path.dentry->d_seq, nd->next_seq))
|
||||
return ERR_PTR(-ECHILD);
|
||||
} else {
|
||||
if (path.mnt == nd->path.mnt)
|
||||
mntget(path.mnt);
|
||||
}
|
||||
return pick_link(nd, &path, inode, seq, flags);
|
||||
return pick_link(nd, &path, inode, flags);
|
||||
}
|
||||
|
||||
static struct dentry *follow_dotdot_rcu(struct nameidata *nd,
|
||||
struct inode **inodep,
|
||||
unsigned *seqp)
|
||||
struct inode **inodep)
|
||||
{
|
||||
struct dentry *parent, *old;
|
||||
|
||||
@ -1899,6 +1898,7 @@ static struct dentry *follow_dotdot_rcu(struct nameidata *nd,
|
||||
nd->path = path;
|
||||
nd->inode = path.dentry->d_inode;
|
||||
nd->seq = seq;
|
||||
// makes sure that non-RCU pathwalk could reach this state
|
||||
if (read_seqretry(&mount_lock, nd->m_seq))
|
||||
return ERR_PTR(-ECHILD);
|
||||
/* we know that mountpoint was pinned */
|
||||
@ -1906,7 +1906,8 @@ static struct dentry *follow_dotdot_rcu(struct nameidata *nd,
|
||||
old = nd->path.dentry;
|
||||
parent = old->d_parent;
|
||||
*inodep = parent->d_inode;
|
||||
*seqp = read_seqcount_begin(&parent->d_seq);
|
||||
nd->next_seq = read_seqcount_begin(&parent->d_seq);
|
||||
// makes sure that non-RCU pathwalk could reach this state
|
||||
if (read_seqcount_retry(&old->d_seq, nd->seq))
|
||||
return ERR_PTR(-ECHILD);
|
||||
if (unlikely(!path_connected(nd->path.mnt, parent)))
|
||||
@ -1917,14 +1918,13 @@ in_root:
|
||||
return ERR_PTR(-ECHILD);
|
||||
if (unlikely(nd->flags & LOOKUP_BENEATH))
|
||||
return ERR_PTR(-ECHILD);
|
||||
*seqp = nd->seq;
|
||||
nd->next_seq = nd->seq;
|
||||
*inodep = nd->path.dentry->d_inode;
|
||||
return nd->path.dentry;
|
||||
}
|
||||
|
||||
static struct dentry *follow_dotdot(struct nameidata *nd,
|
||||
struct inode **inodep,
|
||||
unsigned *seqp)
|
||||
struct inode **inodep)
|
||||
{
|
||||
struct dentry *parent;
|
||||
|
||||
@ -1948,14 +1948,12 @@ static struct dentry *follow_dotdot(struct nameidata *nd,
|
||||
dput(parent);
|
||||
return ERR_PTR(-ENOENT);
|
||||
}
|
||||
*seqp = 0;
|
||||
*inodep = parent->d_inode;
|
||||
return parent;
|
||||
|
||||
in_root:
|
||||
if (unlikely(nd->flags & LOOKUP_BENEATH))
|
||||
return ERR_PTR(-EXDEV);
|
||||
*seqp = 0;
|
||||
*inodep = nd->path.dentry->d_inode;
|
||||
return dget(nd->path.dentry);
|
||||
}
|
||||
@ -1966,7 +1964,6 @@ static const char *handle_dots(struct nameidata *nd, int type)
|
||||
const char *error = NULL;
|
||||
struct dentry *parent;
|
||||
struct inode *inode;
|
||||
unsigned seq;
|
||||
|
||||
if (!nd->root.mnt) {
|
||||
error = ERR_PTR(set_root(nd));
|
||||
@ -1974,12 +1971,12 @@ static const char *handle_dots(struct nameidata *nd, int type)
|
||||
return error;
|
||||
}
|
||||
if (nd->flags & LOOKUP_RCU)
|
||||
parent = follow_dotdot_rcu(nd, &inode, &seq);
|
||||
parent = follow_dotdot_rcu(nd, &inode);
|
||||
else
|
||||
parent = follow_dotdot(nd, &inode, &seq);
|
||||
parent = follow_dotdot(nd, &inode);
|
||||
if (IS_ERR(parent))
|
||||
return ERR_CAST(parent);
|
||||
error = step_into(nd, WALK_NOFOLLOW, parent, inode, seq);
|
||||
error = step_into(nd, WALK_NOFOLLOW, parent, inode);
|
||||
if (unlikely(error))
|
||||
return error;
|
||||
|
||||
@ -2004,7 +2001,6 @@ static const char *walk_component(struct nameidata *nd, int flags)
|
||||
{
|
||||
struct dentry *dentry;
|
||||
struct inode *inode;
|
||||
unsigned seq;
|
||||
/*
|
||||
* "." and ".." are special - ".." especially so because it has
|
||||
* to be able to know about the current root directory and
|
||||
@ -2015,7 +2011,7 @@ static const char *walk_component(struct nameidata *nd, int flags)
|
||||
put_link(nd);
|
||||
return handle_dots(nd, nd->last_type);
|
||||
}
|
||||
dentry = lookup_fast(nd, &inode, &seq);
|
||||
dentry = lookup_fast(nd, &inode);
|
||||
if (IS_ERR(dentry))
|
||||
return ERR_CAST(dentry);
|
||||
if (unlikely(!dentry)) {
|
||||
@ -2025,7 +2021,7 @@ static const char *walk_component(struct nameidata *nd, int flags)
|
||||
}
|
||||
if (!(flags & WALK_MORE) && nd->depth)
|
||||
put_link(nd);
|
||||
return step_into(nd, flags, dentry, inode, seq);
|
||||
return step_into(nd, flags, dentry, inode);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -2380,6 +2376,8 @@ static const char *path_init(struct nameidata *nd, unsigned flags)
|
||||
flags &= ~LOOKUP_RCU;
|
||||
if (flags & LOOKUP_RCU)
|
||||
rcu_read_lock();
|
||||
else
|
||||
nd->seq = nd->next_seq = 0;
|
||||
|
||||
nd->flags = flags;
|
||||
nd->state |= ND_JUMPED;
|
||||
@ -2481,8 +2479,9 @@ static int handle_lookup_down(struct nameidata *nd)
|
||||
{
|
||||
if (!(nd->flags & LOOKUP_RCU))
|
||||
dget(nd->path.dentry);
|
||||
nd->next_seq = nd->seq;
|
||||
return PTR_ERR(step_into(nd, WALK_NOFOLLOW,
|
||||
nd->path.dentry, nd->inode, nd->seq));
|
||||
nd->path.dentry, nd->inode));
|
||||
}
|
||||
|
||||
/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
|
||||
@ -3401,7 +3400,6 @@ static const char *open_last_lookups(struct nameidata *nd,
|
||||
struct dentry *dir = nd->path.dentry;
|
||||
int open_flag = op->open_flag;
|
||||
bool got_write = false;
|
||||
unsigned seq;
|
||||
struct inode *inode;
|
||||
struct dentry *dentry;
|
||||
const char *res;
|
||||
@ -3418,7 +3416,7 @@ static const char *open_last_lookups(struct nameidata *nd,
|
||||
if (nd->last.name[nd->last.len])
|
||||
nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
|
||||
/* we _can_ be in RCU mode here */
|
||||
dentry = lookup_fast(nd, &inode, &seq);
|
||||
dentry = lookup_fast(nd, &inode);
|
||||
if (IS_ERR(dentry))
|
||||
return ERR_CAST(dentry);
|
||||
if (likely(dentry))
|
||||
@ -3472,7 +3470,7 @@ static const char *open_last_lookups(struct nameidata *nd,
|
||||
finish_lookup:
|
||||
if (nd->depth)
|
||||
put_link(nd);
|
||||
res = step_into(nd, WALK_TRAILING, dentry, inode, seq);
|
||||
res = step_into(nd, WALK_TRAILING, dentry, inode);
|
||||
if (unlikely(res))
|
||||
nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL);
|
||||
return res;
|
||||
|
Loading…
x
Reference in New Issue
Block a user