diff --git a/Documentation/filesystems/overlayfs.txt b/Documentation/filesystems/overlayfs.txt index c9e884b52698..36f528a7fdd6 100644 --- a/Documentation/filesystems/overlayfs.txt +++ b/Documentation/filesystems/overlayfs.txt @@ -201,6 +201,40 @@ rightmost one and going left. In the above example lower1 will be the top, lower2 the middle and lower3 the bottom layer. +Sharing and copying layers +-------------------------- + +Lower layers may be shared among several overlay mounts and that is indeed +a very common practice. An overlay mount may use the same lower layer +path as another overlay mount and it may use a lower layer path that is +beneath or above the path of another overlay lower layer path. + +Using an upper layer path and/or a workdir path that are already used by +another overlay mount is not allowed and will fail with EBUSY. Using +partially overlapping paths is not allowed but will not fail with EBUSY. + +Mounting an overlay using an upper layer path, where the upper layer path +was previously used by another mounted overlay in combination with a +different lower layer path, is allowed, unless the "inodes index" feature +is enabled. + +With the "inodes index" feature, on the first time mount, an NFS file +handle of the lower layer root directory, along with the UUID of the lower +filesystem, are encoded and stored in the "trusted.overlay.origin" extended +attribute on the upper layer root directory. On subsequent mount attempts, +the lower root directory file handle and lower filesystem UUID are compared +to the stored origin in upper root directory. On failure to verify the +lower root origin, mount will fail with ESTALE. An overlayfs mount with +"inodes index" enabled will fail with EOPNOTSUPP if the lower filesystem +does not support NFS export, lower filesystem does not have a valid UUID or +if the upper filesystem does not support extended attributes. + +It is quite a common practice to copy overlay layers to a different +directory tree on the same or different underlying filesystem, and even +to a different machine. With the "inodes index" feature, trying to mount +the copied layers will fail the verification of the lower root file handle. + + Non-standard behavior --------------------- diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig index c0c9683934b7..cbfc196e5dc5 100644 --- a/fs/overlayfs/Kconfig +++ b/fs/overlayfs/Kconfig @@ -23,3 +23,23 @@ config OVERLAY_FS_REDIRECT_DIR Note, that redirects are not backward compatible. That is, mounting an overlay which has redirects on a kernel that doesn't support this feature will have unexpected results. + +config OVERLAY_FS_INDEX + bool "Overlayfs: turn on inodes index feature by default" + depends on OVERLAY_FS + help + If this config option is enabled then overlay filesystems will use + the inodes index dir to map lower inodes to upper inodes by default. + In this case it is still possible to turn off index globally with the + "index=off" module option or on a filesystem instance basis with the + "index=off" mount option. + + The inodes index feature prevents breaking of lower hardlinks on copy + up. + + Note, that the inodes index feature is read-only backward compatible. + That is, mounting an overlay which has an index dir on a kernel that + doesn't support this feature read-only, will not have any negative + outcomes. However, mounting the same overlay with an old kernel + read-write and then mounting it again with a new kernel, will have + unexpected results. diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index e5869f91b3ab..acb6f97deb97 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -233,12 +233,13 @@ int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat) return err; } -static struct ovl_fh *ovl_encode_fh(struct dentry *lower, uuid_t *uuid) +struct ovl_fh *ovl_encode_fh(struct dentry *lower, bool is_upper) { struct ovl_fh *fh; int fh_type, fh_len, dwords; void *buf; int buflen = MAX_HANDLE_SZ; + uuid_t *uuid = &lower->d_sb->s_uuid; buf = kmalloc(buflen, GFP_TEMPORARY); if (!buf) @@ -271,6 +272,14 @@ static struct ovl_fh *ovl_encode_fh(struct dentry *lower, uuid_t *uuid) fh->magic = OVL_FH_MAGIC; fh->type = fh_type; fh->flags = OVL_FH_FLAG_CPU_ENDIAN; + /* + * When we will want to decode an overlay dentry from this handle + * and all layers are on the same fs, if we get a disconncted real + * dentry when we decode fid, the only way to tell if we should assign + * it to upperdentry or to lowerstack is by checking this flag. + */ + if (is_upper) + fh->flags |= OVL_FH_FLAG_PATH_UPPER; fh->len = fh_len; fh->uuid = *uuid; memcpy(fh->fid, buf, buflen); @@ -283,7 +292,6 @@ out: static int ovl_set_origin(struct dentry *dentry, struct dentry *lower, struct dentry *upper) { - struct super_block *sb = lower->d_sb; const struct ovl_fh *fh = NULL; int err; @@ -292,9 +300,8 @@ static int ovl_set_origin(struct dentry *dentry, struct dentry *lower, * so we can use the overlay.origin xattr to distignuish between a copy * up and a pure upper inode. */ - if (sb->s_export_op && sb->s_export_op->fh_to_dentry && - !uuid_is_null(&sb->s_uuid)) { - fh = ovl_encode_fh(lower, &sb->s_uuid); + if (ovl_can_decode_fh(lower->d_sb)) { + fh = ovl_encode_fh(lower, false); if (IS_ERR(fh)) return PTR_ERR(fh); } @@ -309,84 +316,156 @@ static int ovl_set_origin(struct dentry *dentry, struct dentry *lower, return err; } -static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir, - struct dentry *dentry, struct path *lowerpath, - struct kstat *stat, const char *link, - struct kstat *pstat, bool tmpfile) +struct ovl_copy_up_ctx { + struct dentry *parent; + struct dentry *dentry; + struct path lowerpath; + struct kstat stat; + struct kstat pstat; + const char *link; + struct dentry *destdir; + struct qstr destname; + struct dentry *workdir; + bool tmpfile; + bool origin; +}; + +static int ovl_link_up(struct ovl_copy_up_ctx *c) { - struct inode *wdir = workdir->d_inode; - struct inode *udir = upperdir->d_inode; - struct dentry *newdentry = NULL; - struct dentry *upper = NULL; - struct dentry *temp = NULL; int err; + struct dentry *upper; + struct dentry *upperdir = ovl_dentry_upper(c->parent); + struct inode *udir = d_inode(upperdir); + + /* Mark parent "impure" because it may now contain non-pure upper */ + err = ovl_set_impure(c->parent, upperdir); + if (err) + return err; + + err = ovl_set_nlink_lower(c->dentry); + if (err) + return err; + + inode_lock_nested(udir, I_MUTEX_PARENT); + upper = lookup_one_len(c->dentry->d_name.name, upperdir, + c->dentry->d_name.len); + err = PTR_ERR(upper); + if (!IS_ERR(upper)) { + err = ovl_do_link(ovl_dentry_upper(c->dentry), udir, upper, + true); + dput(upper); + + if (!err) { + /* Restore timestamps on parent (best effort) */ + ovl_set_timestamps(upperdir, &c->pstat); + ovl_dentry_set_upper_alias(c->dentry); + } + } + inode_unlock(udir); + ovl_set_nlink_upper(c->dentry); + + return err; +} + +static int ovl_install_temp(struct ovl_copy_up_ctx *c, struct dentry *temp, + struct dentry **newdentry) +{ + int err; + struct dentry *upper; + struct inode *udir = d_inode(c->destdir); + + upper = lookup_one_len(c->destname.name, c->destdir, c->destname.len); + if (IS_ERR(upper)) + return PTR_ERR(upper); + + if (c->tmpfile) + err = ovl_do_link(temp, udir, upper, true); + else + err = ovl_do_rename(d_inode(c->workdir), temp, udir, upper, 0); + + if (!err) + *newdentry = dget(c->tmpfile ? upper : temp); + dput(upper); + + return err; +} + +static int ovl_get_tmpfile(struct ovl_copy_up_ctx *c, struct dentry **tempp) +{ + int err; + struct dentry *temp; const struct cred *old_creds = NULL; struct cred *new_creds = NULL; struct cattr cattr = { /* Can't properly set mode on creation because of the umask */ - .mode = stat->mode & S_IFMT, - .rdev = stat->rdev, - .link = link + .mode = c->stat.mode & S_IFMT, + .rdev = c->stat.rdev, + .link = c->link }; - err = security_inode_copy_up(dentry, &new_creds); + err = security_inode_copy_up(c->dentry, &new_creds); if (err < 0) goto out; if (new_creds) old_creds = override_creds(new_creds); - if (tmpfile) - temp = ovl_do_tmpfile(upperdir, stat->mode); - else - temp = ovl_lookup_temp(workdir); - err = 0; - if (IS_ERR(temp)) { - err = PTR_ERR(temp); - temp = NULL; + if (c->tmpfile) { + temp = ovl_do_tmpfile(c->workdir, c->stat.mode); + if (IS_ERR(temp)) + goto temp_err; + } else { + temp = ovl_lookup_temp(c->workdir); + if (IS_ERR(temp)) + goto temp_err; + + err = ovl_create_real(d_inode(c->workdir), temp, &cattr, + NULL, true); + if (err) { + dput(temp); + goto out; + } } - - if (!err && !tmpfile) - err = ovl_create_real(wdir, temp, &cattr, NULL, true); - + err = 0; + *tempp = temp; +out: if (new_creds) { revert_creds(old_creds); put_cred(new_creds); } - if (err) - goto out; + return err; - if (S_ISREG(stat->mode)) { +temp_err: + err = PTR_ERR(temp); + goto out; +} + +static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp) +{ + int err; + + if (S_ISREG(c->stat.mode)) { struct path upperpath; - ovl_path_upper(dentry, &upperpath); + ovl_path_upper(c->dentry, &upperpath); BUG_ON(upperpath.dentry != NULL); upperpath.dentry = temp; - if (tmpfile) { - inode_unlock(udir); - err = ovl_copy_up_data(lowerpath, &upperpath, - stat->size); - inode_lock_nested(udir, I_MUTEX_PARENT); - } else { - err = ovl_copy_up_data(lowerpath, &upperpath, - stat->size); - } - + err = ovl_copy_up_data(&c->lowerpath, &upperpath, c->stat.size); if (err) - goto out_cleanup; + return err; } - err = ovl_copy_xattr(lowerpath->dentry, temp); + err = ovl_copy_xattr(c->lowerpath.dentry, temp); if (err) - goto out_cleanup; + return err; inode_lock(temp->d_inode); - err = ovl_set_attr(temp, stat); + err = ovl_set_attr(temp, &c->stat); inode_unlock(temp->d_inode); if (err) - goto out_cleanup; + return err; /* * Store identifier of lower inode in upper inode xattr to @@ -395,41 +474,48 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir, * Don't set origin when we are breaking the association with a lower * hard link. */ - if (S_ISDIR(stat->mode) || stat->nlink == 1) { - err = ovl_set_origin(dentry, lowerpath->dentry, temp); + if (c->origin) { + err = ovl_set_origin(c->dentry, c->lowerpath.dentry, temp); if (err) - goto out_cleanup; + return err; } - upper = lookup_one_len(dentry->d_name.name, upperdir, - dentry->d_name.len); - if (IS_ERR(upper)) { - err = PTR_ERR(upper); - upper = NULL; - goto out_cleanup; - } + return 0; +} - if (tmpfile) - err = ovl_do_link(temp, udir, upper, true); - else - err = ovl_do_rename(wdir, temp, udir, upper, 0); +static int ovl_copy_up_locked(struct ovl_copy_up_ctx *c) +{ + struct inode *udir = c->destdir->d_inode; + struct dentry *newdentry = NULL; + struct dentry *temp = NULL; + int err; + + err = ovl_get_tmpfile(c, &temp); + if (err) + goto out; + + err = ovl_copy_up_inode(c, temp); if (err) goto out_cleanup; - newdentry = dget(tmpfile ? upper : temp); - ovl_dentry_update(dentry, newdentry); - ovl_inode_update(d_inode(dentry), d_inode(newdentry)); + if (c->tmpfile) { + inode_lock_nested(udir, I_MUTEX_PARENT); + err = ovl_install_temp(c, temp, &newdentry); + inode_unlock(udir); + } else { + err = ovl_install_temp(c, temp, &newdentry); + } + if (err) + goto out_cleanup; - /* Restore timestamps on parent (best effort) */ - ovl_set_timestamps(upperdir, pstat); + ovl_inode_update(d_inode(c->dentry), newdentry); out: dput(temp); - dput(upper); return err; out_cleanup: - if (!tmpfile) - ovl_cleanup(wdir, temp); + if (!c->tmpfile) + ovl_cleanup(d_inode(c->workdir), temp); goto out; } @@ -442,78 +528,119 @@ out_cleanup: * is possible that the copy up will lock the old parent. At that point * the file will have already been copied up anyway. */ -static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, - struct path *lowerpath, struct kstat *stat) +static int ovl_do_copy_up(struct ovl_copy_up_ctx *c) { - DEFINE_DELAYED_CALL(done); - struct dentry *workdir = ovl_workdir(dentry); int err; - struct kstat pstat; - struct path parentpath; - struct dentry *lowerdentry = lowerpath->dentry; - struct dentry *upperdir; - const char *link = NULL; - struct ovl_fs *ofs = dentry->d_sb->s_fs_info; + struct ovl_fs *ofs = c->dentry->d_sb->s_fs_info; + bool indexed = false; - if (WARN_ON(!workdir)) + if (ovl_indexdir(c->dentry->d_sb) && !S_ISDIR(c->stat.mode) && + c->stat.nlink > 1) + indexed = true; + + if (S_ISDIR(c->stat.mode) || c->stat.nlink == 1 || indexed) + c->origin = true; + + if (indexed) { + c->destdir = ovl_indexdir(c->dentry->d_sb); + err = ovl_get_index_name(c->lowerpath.dentry, &c->destname); + if (err) + return err; + } else { + /* + * Mark parent "impure" because it may now contain non-pure + * upper + */ + err = ovl_set_impure(c->parent, c->destdir); + if (err) + return err; + } + + /* Should we copyup with O_TMPFILE or with workdir? */ + if (S_ISREG(c->stat.mode) && ofs->tmpfile) { + c->tmpfile = true; + err = ovl_copy_up_locked(c); + } else { + err = -EIO; + if (lock_rename(c->workdir, c->destdir) != NULL) { + pr_err("overlayfs: failed to lock workdir+upperdir\n"); + } else { + err = ovl_copy_up_locked(c); + unlock_rename(c->workdir, c->destdir); + } + } + + if (indexed) { + if (!err) + ovl_set_flag(OVL_INDEX, d_inode(c->dentry)); + kfree(c->destname.name); + } else if (!err) { + struct inode *udir = d_inode(c->destdir); + + /* Restore timestamps on parent (best effort) */ + inode_lock(udir); + ovl_set_timestamps(c->destdir, &c->pstat); + inode_unlock(udir); + + ovl_dentry_set_upper_alias(c->dentry); + } + + return err; +} + +static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, + int flags) +{ + int err; + DEFINE_DELAYED_CALL(done); + struct path parentpath; + struct ovl_copy_up_ctx ctx = { + .parent = parent, + .dentry = dentry, + .workdir = ovl_workdir(dentry), + }; + + if (WARN_ON(!ctx.workdir)) return -EROFS; - ovl_do_check_copy_up(lowerdentry); - - ovl_path_upper(parent, &parentpath); - upperdir = parentpath.dentry; - - /* Mark parent "impure" because it may now contain non-pure upper */ - err = ovl_set_impure(parent, upperdir); + ovl_path_lower(dentry, &ctx.lowerpath); + err = vfs_getattr(&ctx.lowerpath, &ctx.stat, + STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT); if (err) return err; - err = vfs_getattr(&parentpath, &pstat, + ovl_path_upper(parent, &parentpath); + ctx.destdir = parentpath.dentry; + ctx.destname = dentry->d_name; + + err = vfs_getattr(&parentpath, &ctx.pstat, STATX_ATIME | STATX_MTIME, AT_STATX_SYNC_AS_STAT); if (err) return err; - if (S_ISLNK(stat->mode)) { - link = vfs_get_link(lowerdentry, &done); - if (IS_ERR(link)) - return PTR_ERR(link); + /* maybe truncate regular file. this has no effect on dirs */ + if (flags & O_TRUNC) + ctx.stat.size = 0; + + if (S_ISLNK(ctx.stat.mode)) { + ctx.link = vfs_get_link(ctx.lowerpath.dentry, &done); + if (IS_ERR(ctx.link)) + return PTR_ERR(ctx.link); } + ovl_do_check_copy_up(ctx.lowerpath.dentry); - /* Should we copyup with O_TMPFILE or with workdir? */ - if (S_ISREG(stat->mode) && ofs->tmpfile) { - err = ovl_copy_up_start(dentry); - /* err < 0: interrupted, err > 0: raced with another copy-up */ - if (unlikely(err)) { - pr_debug("ovl_copy_up_start(%pd2) = %i\n", dentry, err); - if (err > 0) - err = 0; - goto out_done; - } - - inode_lock_nested(upperdir->d_inode, I_MUTEX_PARENT); - err = ovl_copy_up_locked(workdir, upperdir, dentry, lowerpath, - stat, link, &pstat, true); - inode_unlock(upperdir->d_inode); + err = ovl_copy_up_start(dentry); + /* err < 0: interrupted, err > 0: raced with another copy-up */ + if (unlikely(err)) { + if (err > 0) + err = 0; + } else { + if (!ovl_dentry_upper(dentry)) + err = ovl_do_copy_up(&ctx); + if (!err && !ovl_dentry_has_upper_alias(dentry)) + err = ovl_link_up(&ctx); ovl_copy_up_end(dentry); - goto out_done; } - - err = -EIO; - if (lock_rename(workdir, upperdir) != NULL) { - pr_err("overlayfs: failed to lock workdir+upperdir\n"); - goto out_unlock; - } - if (ovl_dentry_upper(dentry)) { - /* Raced with another copy-up? Nothing to do, then... */ - err = 0; - goto out_unlock; - } - - err = ovl_copy_up_locked(workdir, upperdir, dentry, lowerpath, - stat, link, &pstat, false); -out_unlock: - unlock_rename(workdir, upperdir); -out_done: do_delayed_call(&done); return err; @@ -527,11 +654,22 @@ int ovl_copy_up_flags(struct dentry *dentry, int flags) while (!err) { struct dentry *next; struct dentry *parent; - struct path lowerpath; - struct kstat stat; - enum ovl_path_type type = ovl_path_type(dentry); - if (OVL_TYPE_UPPER(type)) + /* + * Check if copy-up has happened as well as for upper alias (in + * case of hard links) is there. + * + * Both checks are lockless: + * - false negatives: will recheck under oi->lock + * - false positives: + * + ovl_dentry_upper() uses memory barriers to ensure the + * upper dentry is up-to-date + * + ovl_dentry_has_upper_alias() relies on locking of + * upper parent i_rwsem to prevent reordering copy-up + * with rename. + */ + if (ovl_dentry_upper(dentry) && + ovl_dentry_has_upper_alias(dentry)) break; next = dget(dentry); @@ -539,22 +677,14 @@ int ovl_copy_up_flags(struct dentry *dentry, int flags) for (;;) { parent = dget_parent(next); - type = ovl_path_type(parent); - if (OVL_TYPE_UPPER(type)) + if (ovl_dentry_upper(parent)) break; dput(next); next = parent; } - ovl_path_lower(next, &lowerpath); - err = vfs_getattr(&lowerpath, &stat, - STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT); - /* maybe truncate regular file. this has no effect on dirs */ - if (flags & O_TRUNC) - stat.size = 0; - if (!err) - err = ovl_copy_up_one(parent, next, &lowerpath, &stat); + err = ovl_copy_up_one(parent, next, flags); dput(parent); dput(next); diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index a63a71656e9b..641d9ee97f91 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -24,7 +24,7 @@ module_param_named(redirect_max, ovl_redirect_max, ushort, 0644); MODULE_PARM_DESC(ovl_redirect_max, "Maximum length of absolute redirect xattr value"); -void ovl_cleanup(struct inode *wdir, struct dentry *wdentry) +int ovl_cleanup(struct inode *wdir, struct dentry *wdentry) { int err; @@ -39,6 +39,8 @@ void ovl_cleanup(struct inode *wdir, struct dentry *wdentry) pr_err("overlayfs: cleanup of '%pd2' failed (%i)\n", wdentry, err); } + + return err; } struct dentry *ovl_lookup_temp(struct dentry *workdir) @@ -154,12 +156,13 @@ static void ovl_instantiate(struct dentry *dentry, struct inode *inode, struct dentry *newdentry, bool hardlink) { ovl_dentry_version_inc(dentry->d_parent); - ovl_dentry_update(dentry, newdentry); + ovl_dentry_set_upper_alias(dentry); if (!hardlink) { - ovl_inode_update(inode, d_inode(newdentry)); + ovl_inode_update(inode, newdentry); ovl_copyattr(newdentry->d_inode, inode); } else { - WARN_ON(ovl_inode_real(inode, NULL) != d_inode(newdentry)); + WARN_ON(ovl_inode_real(inode) != d_inode(newdentry)); + dput(newdentry); inc_nlink(inode); } d_instantiate(dentry, inode); @@ -588,6 +591,7 @@ static int ovl_link(struct dentry *old, struct inode *newdir, struct dentry *new) { int err; + bool locked = false; struct inode *inode; err = ovl_want_write(old); @@ -598,6 +602,10 @@ static int ovl_link(struct dentry *old, struct inode *newdir, if (err) goto out_drop_write; + err = ovl_nlink_start(old, &locked); + if (err) + goto out_drop_write; + inode = d_inode(old); ihold(inode); @@ -605,12 +613,18 @@ static int ovl_link(struct dentry *old, struct inode *newdir, if (err) iput(inode); + ovl_nlink_end(old, locked); out_drop_write: ovl_drop_write(old); out: return err; } +static bool ovl_matches_upper(struct dentry *dentry, struct dentry *upper) +{ + return d_inode(ovl_dentry_upper(dentry)) == d_inode(upper); +} + static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir) { struct dentry *workdir = ovl_workdir(dentry); @@ -646,7 +660,7 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir) err = -ESTALE; if ((opaquedir && upper != opaquedir) || (!opaquedir && ovl_dentry_upper(dentry) && - upper != ovl_dentry_upper(dentry))) { + !ovl_matches_upper(dentry, upper))) { goto out_dput_upper; } @@ -707,7 +721,7 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir) err = -ESTALE; if ((opaquedir && upper != opaquedir) || - (!opaquedir && upper != ovl_dentry_upper(dentry))) + (!opaquedir && !ovl_matches_upper(dentry, upper))) goto out_dput_upper; if (is_dir) @@ -735,8 +749,8 @@ out: static int ovl_do_remove(struct dentry *dentry, bool is_dir) { - enum ovl_path_type type; int err; + bool locked = false; const struct cred *old_cred; err = ovl_want_write(dentry); @@ -747,7 +761,9 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir) if (err) goto out_drop_write; - type = ovl_path_type(dentry); + err = ovl_nlink_start(dentry, &locked); + if (err) + goto out_drop_write; old_cred = ovl_override_creds(dentry->d_sb); if (!ovl_lower_positive(dentry)) @@ -761,6 +777,7 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir) else drop_nlink(dentry->d_inode); } + ovl_nlink_end(dentry, locked); out_drop_write: ovl_drop_write(dentry); out: @@ -883,6 +900,7 @@ static int ovl_rename(struct inode *olddir, struct dentry *old, unsigned int flags) { int err; + bool locked = false; struct dentry *old_upperdir; struct dentry *new_upperdir; struct dentry *olddentry; @@ -926,6 +944,10 @@ static int ovl_rename(struct inode *olddir, struct dentry *old, err = ovl_copy_up(new); if (err) goto out_drop_write; + } else { + err = ovl_nlink_start(new, &locked); + if (err) + goto out_drop_write; } old_cred = ovl_override_creds(old->d_sb); @@ -985,7 +1007,7 @@ static int ovl_rename(struct inode *olddir, struct dentry *old, goto out_unlock; err = -ESTALE; - if (olddentry != ovl_dentry_upper(old)) + if (!ovl_matches_upper(old, olddentry)) goto out_dput_old; newdentry = lookup_one_len(new->d_name.name, new_upperdir, @@ -998,12 +1020,12 @@ static int ovl_rename(struct inode *olddir, struct dentry *old, new_opaque = ovl_dentry_is_opaque(new); err = -ESTALE; - if (ovl_dentry_upper(new)) { + if (d_inode(new) && ovl_dentry_upper(new)) { if (opaquedir) { if (newdentry != opaquedir) goto out_dput; } else { - if (newdentry != ovl_dentry_upper(new)) + if (!ovl_matches_upper(new, newdentry)) goto out_dput; } } else { @@ -1046,6 +1068,13 @@ static int ovl_rename(struct inode *olddir, struct dentry *old, if (cleanup_whiteout) ovl_cleanup(old_upperdir->d_inode, newdentry); + if (overwrite && d_inode(new)) { + if (new_is_dir) + clear_nlink(d_inode(new)); + else + drop_nlink(d_inode(new)); + } + ovl_dentry_version_inc(old->d_parent); ovl_dentry_version_inc(new->d_parent); @@ -1057,6 +1086,7 @@ out_unlock: unlock_rename(new_upperdir, old_upperdir); out_revert_creds: revert_creds(old_cred); + ovl_nlink_end(new, locked); out_drop_write: ovl_drop_write(old); out: diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index d613e2c41242..69f4fc26ee39 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "overlayfs.h" int ovl_setattr(struct dentry *dentry, struct iattr *attr) @@ -96,11 +97,15 @@ int ovl_getattr(const struct path *path, struct kstat *stat, WARN_ON_ONCE(stat->dev != lowerstat.dev); /* - * Lower hardlinks are broken on copy up to different + * Lower hardlinks may be broken on copy up to different * upper files, so we cannot use the lower origin st_ino * for those different files, even for the same fs case. + * With inodes index enabled, it is safe to use st_ino + * of an indexed hardlinked origin. The index validates + * that the upper hardlink is not broken. */ - if (is_dir || lowerstat.nlink == 1) + if (is_dir || lowerstat.nlink == 1 || + ovl_test_flag(OVL_INDEX, d_inode(dentry))) stat->ino = lowerstat.ino; } stat->dev = dentry->d_sb->s_dev; @@ -126,6 +131,15 @@ int ovl_getattr(const struct path *path, struct kstat *stat, if (is_dir && OVL_TYPE_MERGE(type)) stat->nlink = 1; + /* + * Return the overlay inode nlinks for indexed upper inodes. + * Overlay inode nlink counts the union of the upper hardlinks + * and non-covered lower hardlinks. It does not include the upper + * index hardlink. + */ + if (!is_dir && ovl_test_flag(OVL_INDEX, d_inode(dentry))) + stat->nlink = dentry->d_inode->i_nlink; + out: revert_creds(old_cred); @@ -134,8 +148,8 @@ out: int ovl_permission(struct inode *inode, int mask) { - bool is_upper; - struct inode *realinode = ovl_inode_real(inode, &is_upper); + struct inode *upperinode = ovl_inode_upper(inode); + struct inode *realinode = upperinode ?: ovl_inode_lower(inode); const struct cred *old_cred; int err; @@ -154,7 +168,8 @@ int ovl_permission(struct inode *inode, int mask) return err; old_cred = ovl_override_creds(inode->i_sb); - if (!is_upper && !special_file(realinode->i_mode) && mask & MAY_WRITE) { + if (!upperinode && + !special_file(realinode->i_mode) && mask & MAY_WRITE) { mask &= ~(MAY_WRITE | MAY_APPEND); /* Make sure mounter can read file for copy up later */ mask |= MAY_READ; @@ -286,7 +301,7 @@ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) struct posix_acl *ovl_get_acl(struct inode *inode, int type) { - struct inode *realinode = ovl_inode_real(inode, NULL); + struct inode *realinode = ovl_inode_real(inode); const struct cred *old_cred; struct posix_acl *acl; @@ -300,13 +315,13 @@ struct posix_acl *ovl_get_acl(struct inode *inode, int type) return acl; } -static bool ovl_open_need_copy_up(int flags, enum ovl_path_type type, - struct dentry *realdentry) +static bool ovl_open_need_copy_up(struct dentry *dentry, int flags) { - if (OVL_TYPE_UPPER(type)) + if (ovl_dentry_upper(dentry) && + ovl_dentry_has_upper_alias(dentry)) return false; - if (special_file(realdentry->d_inode->i_mode)) + if (special_file(d_inode(dentry)->i_mode)) return false; if (!(OPEN_FMODE(flags) & FMODE_WRITE) && !(flags & O_TRUNC)) @@ -318,11 +333,8 @@ static bool ovl_open_need_copy_up(int flags, enum ovl_path_type type, int ovl_open_maybe_copy_up(struct dentry *dentry, unsigned int file_flags) { int err = 0; - struct path realpath; - enum ovl_path_type type; - type = ovl_path_real(dentry, &realpath); - if (ovl_open_need_copy_up(file_flags, type, realpath.dentry)) { + if (ovl_open_need_copy_up(dentry, file_flags)) { err = ovl_want_write(dentry); if (!err) { err = ovl_copy_up_flags(dentry, file_flags); @@ -440,6 +452,103 @@ static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev) } } +/* + * With inodes index enabled, an overlay inode nlink counts the union of upper + * hardlinks and non-covered lower hardlinks. During the lifetime of a non-pure + * upper inode, the following nlink modifying operations can happen: + * + * 1. Lower hardlink copy up + * 2. Upper hardlink created, unlinked or renamed over + * 3. Lower hardlink whiteout or renamed over + * + * For the first, copy up case, the union nlink does not change, whether the + * operation succeeds or fails, but the upper inode nlink may change. + * Therefore, before copy up, we store the union nlink value relative to the + * lower inode nlink in the index inode xattr trusted.overlay.nlink. + * + * For the second, upper hardlink case, the union nlink should be incremented + * or decremented IFF the operation succeeds, aligned with nlink change of the + * upper inode. Therefore, before link/unlink/rename, we store the union nlink + * value relative to the upper inode nlink in the index inode. + * + * For the last, lower cover up case, we simplify things by preceding the + * whiteout or cover up with copy up. This makes sure that there is an index + * upper inode where the nlink xattr can be stored before the copied up upper + * entry is unlink. + */ +#define OVL_NLINK_ADD_UPPER (1 << 0) + +/* + * On-disk format for indexed nlink: + * + * nlink relative to the upper inode - "U[+-]NUM" + * nlink relative to the lower inode - "L[+-]NUM" + */ + +static int ovl_set_nlink_common(struct dentry *dentry, + struct dentry *realdentry, const char *format) +{ + struct inode *inode = d_inode(dentry); + struct inode *realinode = d_inode(realdentry); + char buf[13]; + int len; + + len = snprintf(buf, sizeof(buf), format, + (int) (inode->i_nlink - realinode->i_nlink)); + + return ovl_do_setxattr(ovl_dentry_upper(dentry), + OVL_XATTR_NLINK, buf, len, 0); +} + +int ovl_set_nlink_upper(struct dentry *dentry) +{ + return ovl_set_nlink_common(dentry, ovl_dentry_upper(dentry), "U%+i"); +} + +int ovl_set_nlink_lower(struct dentry *dentry) +{ + return ovl_set_nlink_common(dentry, ovl_dentry_lower(dentry), "L%+i"); +} + +unsigned int ovl_get_nlink(struct dentry *lowerdentry, + struct dentry *upperdentry, + unsigned int fallback) +{ + int nlink_diff; + int nlink; + char buf[13]; + int err; + + if (!lowerdentry || !upperdentry || d_inode(lowerdentry)->i_nlink == 1) + return fallback; + + err = vfs_getxattr(upperdentry, OVL_XATTR_NLINK, &buf, sizeof(buf) - 1); + if (err < 0) + goto fail; + + buf[err] = '\0'; + if ((buf[0] != 'L' && buf[0] != 'U') || + (buf[1] != '+' && buf[1] != '-')) + goto fail; + + err = kstrtoint(buf + 1, 10, &nlink_diff); + if (err < 0) + goto fail; + + nlink = d_inode(buf[0] == 'L' ? lowerdentry : upperdentry)->i_nlink; + nlink += nlink_diff; + + if (nlink <= 0) + goto fail; + + return nlink; + +fail: + pr_warn_ratelimited("overlayfs: failed to get index nlink (%pd2, err=%i)\n", + upperdentry, err); + return fallback; +} + struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev) { struct inode *inode; @@ -453,27 +562,87 @@ struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev) static int ovl_inode_test(struct inode *inode, void *data) { - return ovl_inode_real(inode, NULL) == data; + return inode->i_private == data; } static int ovl_inode_set(struct inode *inode, void *data) { - inode->i_private = (void *) (((unsigned long) data) | OVL_ISUPPER_MASK); + inode->i_private = data; return 0; } -struct inode *ovl_get_inode(struct super_block *sb, struct inode *realinode) - +static bool ovl_verify_inode(struct inode *inode, struct dentry *lowerdentry, + struct dentry *upperdentry) { + struct inode *lowerinode = lowerdentry ? d_inode(lowerdentry) : NULL; + + /* Lower (origin) inode must match, even if NULL */ + if (ovl_inode_lower(inode) != lowerinode) + return false; + + /* + * Allow non-NULL __upperdentry in inode even if upperdentry is NULL. + * This happens when finding a lower alias for a copied up hard link. + */ + if (upperdentry && ovl_inode_upper(inode) != d_inode(upperdentry)) + return false; + + return true; +} + +struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry) +{ + struct dentry *lowerdentry = ovl_dentry_lower(dentry); + struct inode *realinode = upperdentry ? d_inode(upperdentry) : NULL; struct inode *inode; - inode = iget5_locked(sb, (unsigned long) realinode, - ovl_inode_test, ovl_inode_set, realinode); - if (inode && inode->i_state & I_NEW) { - ovl_fill_inode(inode, realinode->i_mode, realinode->i_rdev); - set_nlink(inode, realinode->i_nlink); - unlock_new_inode(inode); - } + if (!realinode) + realinode = d_inode(lowerdentry); + if (!S_ISDIR(realinode->i_mode) && + (upperdentry || (lowerdentry && ovl_indexdir(dentry->d_sb)))) { + struct inode *key = d_inode(lowerdentry ?: upperdentry); + unsigned int nlink; + + inode = iget5_locked(dentry->d_sb, (unsigned long) key, + ovl_inode_test, ovl_inode_set, key); + if (!inode) + goto out_nomem; + if (!(inode->i_state & I_NEW)) { + /* + * Verify that the underlying files stored in the inode + * match those in the dentry. + */ + if (!ovl_verify_inode(inode, lowerdentry, upperdentry)) { + iput(inode); + inode = ERR_PTR(-ESTALE); + goto out; + } + + dput(upperdentry); + goto out; + } + + nlink = ovl_get_nlink(lowerdentry, upperdentry, + realinode->i_nlink); + set_nlink(inode, nlink); + } else { + inode = new_inode(dentry->d_sb); + if (!inode) + goto out_nomem; + } + ovl_fill_inode(inode, realinode->i_mode, realinode->i_rdev); + ovl_inode_init(inode, upperdentry, lowerdentry); + + if (upperdentry && ovl_is_impuredir(upperdentry)) + ovl_set_flag(OVL_IMPURE, inode); + + if (inode->i_state & I_NEW) + unlock_new_inode(inode); +out: return inode; + +out_nomem: + inode = ERR_PTR(-ENOMEM); + goto out; } diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index de0d4f742f36..9bc0e580a5b3 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -88,13 +88,10 @@ static int ovl_acceptable(void *ctx, struct dentry *dentry) return 1; } -static struct dentry *ovl_get_origin(struct dentry *dentry, - struct vfsmount *mnt) +static struct ovl_fh *ovl_get_origin_fh(struct dentry *dentry) { int res; struct ovl_fh *fh = NULL; - struct dentry *origin = NULL; - int bytes; res = vfs_getxattr(dentry, OVL_XATTR_ORIGIN, NULL, 0); if (res < 0) { @@ -106,7 +103,7 @@ static struct dentry *ovl_get_origin(struct dentry *dentry, if (res == 0) return NULL; - fh = kzalloc(res, GFP_TEMPORARY); + fh = kzalloc(res, GFP_TEMPORARY); if (!fh) return ERR_PTR(-ENOMEM); @@ -129,7 +126,29 @@ static struct dentry *ovl_get_origin(struct dentry *dentry, (fh->flags & OVL_FH_FLAG_BIG_ENDIAN) != OVL_FH_FLAG_CPU_ENDIAN) goto out; - bytes = (fh->len - offsetof(struct ovl_fh, fid)); + return fh; + +out: + kfree(fh); + return NULL; + +fail: + pr_warn_ratelimited("overlayfs: failed to get origin (%i)\n", res); + goto out; +invalid: + pr_warn_ratelimited("overlayfs: invalid origin (%*phN)\n", res, fh); + goto out; +} + +static struct dentry *ovl_get_origin(struct dentry *dentry, + struct vfsmount *mnt) +{ + struct dentry *origin = NULL; + struct ovl_fh *fh = ovl_get_origin_fh(dentry); + int bytes; + + if (IS_ERR_OR_NULL(fh)) + return (struct dentry *)fh; /* * Make sure that the stored uuid matches the uuid of the lower @@ -138,6 +157,7 @@ static struct dentry *ovl_get_origin(struct dentry *dentry, if (!uuid_equal(&fh->uuid, &mnt->mnt_sb->s_uuid)) goto out; + bytes = (fh->len - offsetof(struct ovl_fh, fid)); origin = exportfs_decode_fh(mnt, (struct fid *)fh->fid, bytes >> 2, (int)fh->type, ovl_acceptable, NULL); @@ -149,21 +169,17 @@ static struct dentry *ovl_get_origin(struct dentry *dentry, } if (ovl_dentry_weird(origin) || - ((d_inode(origin)->i_mode ^ d_inode(dentry)->i_mode) & S_IFMT)) { - dput(origin); - origin = NULL; + ((d_inode(origin)->i_mode ^ d_inode(dentry)->i_mode) & S_IFMT)) goto invalid; - } out: kfree(fh); return origin; -fail: - pr_warn_ratelimited("overlayfs: failed to get origin (%i)\n", res); - goto out; invalid: - pr_warn_ratelimited("overlayfs: invalid origin (%*phN)\n", res, fh); + pr_warn_ratelimited("overlayfs: invalid origin (%pd2)\n", origin); + dput(origin); + origin = NULL; goto out; } @@ -269,34 +285,31 @@ static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d, } -static int ovl_check_origin(struct dentry *dentry, struct dentry *upperdentry, +static int ovl_check_origin(struct dentry *upperdentry, + struct path *lowerstack, unsigned int numlower, struct path **stackp, unsigned int *ctrp) { - struct super_block *same_sb = ovl_same_sb(dentry->d_sb); - struct ovl_entry *roe = dentry->d_sb->s_root->d_fsdata; struct vfsmount *mnt; - struct dentry *origin; + struct dentry *origin = NULL; + int i; - if (!same_sb || !roe->numlower) + + for (i = 0; i < numlower; i++) { + mnt = lowerstack[i].mnt; + origin = ovl_get_origin(upperdentry, mnt); + if (IS_ERR(origin)) + return PTR_ERR(origin); + + if (origin) + break; + } + + if (!origin) return 0; - /* - * Since all layers are on the same fs, we use the first layer for - * decoding the file handle. We may get a disconnected dentry, - * which is fine, because we only need to hold the origin inode in - * cache and use its inode number. We may even get a connected dentry, - * that is not under the first layer's root. That is also fine for - * using it's inode number - it's the same as if we held a reference - * to a dentry in first layer that was moved under us. - */ - mnt = roe->lowerstack[0].mnt; - - origin = ovl_get_origin(upperdentry, mnt); - if (IS_ERR_OR_NULL(origin)) - return PTR_ERR(origin); - - BUG_ON(*stackp || *ctrp); - *stackp = kmalloc(sizeof(struct path), GFP_TEMPORARY); + BUG_ON(*ctrp); + if (!*stackp) + *stackp = kmalloc(sizeof(struct path), GFP_TEMPORARY); if (!*stackp) { dput(origin); return -ENOMEM; @@ -307,6 +320,215 @@ static int ovl_check_origin(struct dentry *dentry, struct dentry *upperdentry, return 0; } +/* + * Verify that @fh matches the origin file handle stored in OVL_XATTR_ORIGIN. + * Return 0 on match, -ESTALE on mismatch, < 0 on error. + */ +static int ovl_verify_origin_fh(struct dentry *dentry, const struct ovl_fh *fh) +{ + struct ovl_fh *ofh = ovl_get_origin_fh(dentry); + int err = 0; + + if (!ofh) + return -ENODATA; + + if (IS_ERR(ofh)) + return PTR_ERR(ofh); + + if (fh->len != ofh->len || memcmp(fh, ofh, fh->len)) + err = -ESTALE; + + kfree(ofh); + return err; +} + +/* + * Verify that an inode matches the origin file handle stored in upper inode. + * + * If @set is true and there is no stored file handle, encode and store origin + * file handle in OVL_XATTR_ORIGIN. + * + * Return 0 on match, -ESTALE on mismatch, < 0 on error. + */ +int ovl_verify_origin(struct dentry *dentry, struct vfsmount *mnt, + struct dentry *origin, bool is_upper, bool set) +{ + struct inode *inode; + struct ovl_fh *fh; + int err; + + fh = ovl_encode_fh(origin, is_upper); + err = PTR_ERR(fh); + if (IS_ERR(fh)) + goto fail; + + err = ovl_verify_origin_fh(dentry, fh); + if (set && err == -ENODATA) + err = ovl_do_setxattr(dentry, OVL_XATTR_ORIGIN, fh, fh->len, 0); + if (err) + goto fail; + +out: + kfree(fh); + return err; + +fail: + inode = d_inode(origin); + pr_warn_ratelimited("overlayfs: failed to verify origin (%pd2, ino=%lu, err=%i)\n", + origin, inode ? inode->i_ino : 0, err); + goto out; +} + +/* + * Verify that an index entry name matches the origin file handle stored in + * OVL_XATTR_ORIGIN and that origin file handle can be decoded to lower path. + * Return 0 on match, -ESTALE on mismatch or stale origin, < 0 on error. + */ +int ovl_verify_index(struct dentry *index, struct path *lowerstack, + unsigned int numlower) +{ + struct ovl_fh *fh = NULL; + size_t len; + struct path origin = { }; + struct path *stack = &origin; + unsigned int ctr = 0; + int err; + + if (!d_inode(index)) + return 0; + + err = -EISDIR; + if (d_is_dir(index)) + goto fail; + + err = -EINVAL; + if (index->d_name.len < sizeof(struct ovl_fh)*2) + goto fail; + + err = -ENOMEM; + len = index->d_name.len / 2; + fh = kzalloc(len, GFP_TEMPORARY); + if (!fh) + goto fail; + + err = -EINVAL; + if (hex2bin((u8 *)fh, index->d_name.name, len) || len != fh->len) + goto fail; + + err = ovl_verify_origin_fh(index, fh); + if (err) + goto fail; + + err = ovl_check_origin(index, lowerstack, numlower, &stack, &ctr); + if (!err && !ctr) + err = -ESTALE; + if (err) + goto fail; + + /* Check if index is orphan and don't warn before cleaning it */ + if (d_inode(index)->i_nlink == 1 && + ovl_get_nlink(index, origin.dentry, 0) == 0) + err = -ENOENT; + + dput(origin.dentry); +out: + kfree(fh); + return err; + +fail: + pr_warn_ratelimited("overlayfs: failed to verify index (%pd2, err=%i)\n", + index, err); + goto out; +} + +/* + * Lookup in indexdir for the index entry of a lower real inode or a copy up + * origin inode. The index entry name is the hex representation of the lower + * inode file handle. + * + * If the index dentry in negative, then either no lower aliases have been + * copied up yet, or aliases have been copied up in older kernels and are + * not indexed. + * + * If the index dentry for a copy up origin inode is positive, but points + * to an inode different than the upper inode, then either the upper inode + * has been copied up and not indexed or it was indexed, but since then + * index dir was cleared. Either way, that index cannot be used to indentify + * the overlay inode. + */ +int ovl_get_index_name(struct dentry *origin, struct qstr *name) +{ + int err; + struct ovl_fh *fh; + char *n, *s; + + fh = ovl_encode_fh(origin, false); + if (IS_ERR(fh)) + return PTR_ERR(fh); + + err = -ENOMEM; + n = kzalloc(fh->len * 2, GFP_TEMPORARY); + if (n) { + s = bin2hex(n, fh, fh->len); + *name = (struct qstr) QSTR_INIT(n, s - n); + err = 0; + } + kfree(fh); + + return err; + +} + +static struct dentry *ovl_lookup_index(struct dentry *dentry, + struct dentry *upper, + struct dentry *origin) +{ + struct ovl_fs *ofs = dentry->d_sb->s_fs_info; + struct dentry *index; + struct inode *inode; + struct qstr name; + int err; + + err = ovl_get_index_name(origin, &name); + if (err) + return ERR_PTR(err); + + index = lookup_one_len_unlocked(name.name, ofs->indexdir, name.len); + if (IS_ERR(index)) { + pr_warn_ratelimited("overlayfs: failed inode index lookup (ino=%lu, key=%*s, err=%i);\n" + "overlayfs: mount with '-o index=off' to disable inodes index.\n", + d_inode(origin)->i_ino, name.len, name.name, + err); + goto out; + } + + if (d_is_negative(index)) { + if (upper && d_inode(origin)->i_nlink > 1) { + pr_warn_ratelimited("overlayfs: hard link with origin but no index (ino=%lu).\n", + d_inode(origin)->i_ino); + goto fail; + } + + dput(index); + index = NULL; + } else if (upper && d_inode(index) != d_inode(upper)) { + inode = d_inode(index); + pr_warn_ratelimited("overlayfs: wrong index found (index ino: %lu, upper ino: %lu).\n", + d_inode(index)->i_ino, + d_inode(upper)->i_ino); + goto fail; + } + +out: + kfree(name.name); + return index; + +fail: + dput(index); + index = ERR_PTR(-EIO); + goto out; +} + /* * Returns next layer in stack starting from top. * Returns -1 if this is the last layer. @@ -338,10 +560,10 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, struct ovl_entry *roe = dentry->d_sb->s_root->d_fsdata; struct path *stack = NULL; struct dentry *upperdir, *upperdentry = NULL; + struct dentry *index = NULL; unsigned int ctr = 0; struct inode *inode = NULL; bool upperopaque = false; - bool upperimpure = false; char *upperredirect = NULL; struct dentry *this; unsigned int i; @@ -359,7 +581,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, return ERR_PTR(-ENAMETOOLONG); old_cred = ovl_override_creds(dentry->d_sb); - upperdir = ovl_upperdentry_dereference(poe); + upperdir = ovl_dentry_upper(dentry->d_parent); if (upperdir) { err = ovl_lookup_layer(upperdir, &d, &upperdentry); if (err) @@ -372,8 +594,18 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, } if (upperdentry && !d.is_dir) { BUG_ON(!d.stop || d.redirect); - err = ovl_check_origin(dentry, upperdentry, - &stack, &ctr); + /* + * Lookup copy up origin by decoding origin file handle. + * We may get a disconnected dentry, which is fine, + * because we only need to hold the origin inode in + * cache and use its inode number. We may even get a + * connected dentry, that is not under any of the lower + * layers root. That is also fine for using it's inode + * number - it's the same as if we held a reference + * to a dentry in lower layer that was moved under us. + */ + err = ovl_check_origin(upperdentry, roe->lowerstack, + roe->numlower, &stack, &ctr); if (err) goto out; } @@ -386,8 +618,6 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, poe = roe; } upperopaque = d.opaque; - if (upperdentry && d.is_dir) - upperimpure = ovl_is_impuredir(upperdentry); } if (!d.stop && poe->numlower) { @@ -428,48 +658,56 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, } } + /* Lookup index by lower inode and verify it matches upper inode */ + if (ctr && !d.is_dir && ovl_indexdir(dentry->d_sb)) { + struct dentry *origin = stack[0].dentry; + + index = ovl_lookup_index(dentry, upperdentry, origin); + if (IS_ERR(index)) { + err = PTR_ERR(index); + index = NULL; + goto out_put; + } + } + oe = ovl_alloc_entry(ctr); err = -ENOMEM; if (!oe) goto out_put; + oe->opaque = upperopaque; + memcpy(oe->lowerstack, stack, sizeof(struct path) * ctr); + dentry->d_fsdata = oe; + + if (upperdentry) + ovl_dentry_set_upper_alias(dentry); + else if (index) + upperdentry = dget(index); + if (upperdentry || ctr) { - struct dentry *realdentry; - struct inode *realinode; - - realdentry = upperdentry ? upperdentry : stack[0].dentry; - realinode = d_inode(realdentry); - - err = -ENOMEM; - if (upperdentry && !d_is_dir(upperdentry)) { - inode = ovl_get_inode(dentry->d_sb, realinode); - } else { - inode = ovl_new_inode(dentry->d_sb, realinode->i_mode, - realinode->i_rdev); - if (inode) - ovl_inode_init(inode, realinode, !!upperdentry); - } - if (!inode) + inode = ovl_get_inode(dentry, upperdentry); + err = PTR_ERR(inode); + if (IS_ERR(inode)) goto out_free_oe; - ovl_copyattr(realdentry->d_inode, inode); + + OVL_I(inode)->redirect = upperredirect; + if (index) + ovl_set_flag(OVL_INDEX, inode); } revert_creds(old_cred); - oe->opaque = upperopaque; - oe->impure = upperimpure; - oe->redirect = upperredirect; - oe->__upperdentry = upperdentry; - memcpy(oe->lowerstack, stack, sizeof(struct path) * ctr); + dput(index); kfree(stack); kfree(d.redirect); - dentry->d_fsdata = oe; d_add(dentry, inode); return NULL; out_free_oe: + dentry->d_fsdata = NULL; kfree(oe); out_put: + dput(index); for (i = 0; i < ctr; i++) dput(stack[i].dentry); kfree(stack); @@ -499,7 +737,7 @@ bool ovl_lower_positive(struct dentry *dentry) return oe->opaque; /* Negative upper -> positive lower */ - if (!oe->__upperdentry) + if (!ovl_dentry_upper(dentry)) return true; /* Positive upper -> have to look up lower to see whether it exists */ diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 10863b4105fa..60d26605e039 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -25,6 +25,12 @@ enum ovl_path_type { #define OVL_XATTR_REDIRECT OVL_XATTR_PREFIX "redirect" #define OVL_XATTR_ORIGIN OVL_XATTR_PREFIX "origin" #define OVL_XATTR_IMPURE OVL_XATTR_PREFIX "impure" +#define OVL_XATTR_NLINK OVL_XATTR_PREFIX "nlink" + +enum ovl_flag { + OVL_IMPURE, + OVL_INDEX, +}; /* * The tuple (fh,uuid) is a universal unique identifier for a copy up origin, @@ -38,6 +44,8 @@ enum ovl_path_type { /* CPU byte order required for fid decoding: */ #define OVL_FH_FLAG_BIG_ENDIAN (1 << 0) #define OVL_FH_FLAG_ANY_ENDIAN (1 << 1) +/* Is the real inode encoded in fid an upper inode? */ +#define OVL_FH_FLAG_PATH_UPPER (1 << 2) #define OVL_FH_FLAG_ALL (OVL_FH_FLAG_BIG_ENDIAN | OVL_FH_FLAG_ANY_ENDIAN) @@ -60,8 +68,6 @@ struct ovl_fh { u8 fid[0]; /* file identifier */ } __packed; -#define OVL_ISUPPER_MASK 1UL - static inline int ovl_do_rmdir(struct inode *dir, struct dentry *dentry) { int err = vfs_rmdir(dir, dentry); @@ -175,22 +181,14 @@ static inline struct dentry *ovl_do_tmpfile(struct dentry *dentry, umode_t mode) return ret; } -static inline struct inode *ovl_inode_real(struct inode *inode, bool *is_upper) -{ - unsigned long x = (unsigned long) READ_ONCE(inode->i_private); - - if (is_upper) - *is_upper = x & OVL_ISUPPER_MASK; - - return (struct inode *) (x & ~OVL_ISUPPER_MASK); -} - /* util.c */ int ovl_want_write(struct dentry *dentry); void ovl_drop_write(struct dentry *dentry); struct dentry *ovl_workdir(struct dentry *dentry); const struct cred *ovl_override_creds(struct super_block *sb); struct super_block *ovl_same_sb(struct super_block *sb); +bool ovl_can_decode_fh(struct super_block *sb); +struct dentry *ovl_indexdir(struct super_block *sb); struct ovl_entry *ovl_alloc_entry(unsigned int numlower); bool ovl_dentry_remote(struct dentry *dentry); bool ovl_dentry_weird(struct dentry *dentry); @@ -201,19 +199,22 @@ enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path); struct dentry *ovl_dentry_upper(struct dentry *dentry); struct dentry *ovl_dentry_lower(struct dentry *dentry); struct dentry *ovl_dentry_real(struct dentry *dentry); +struct inode *ovl_inode_upper(struct inode *inode); +struct inode *ovl_inode_lower(struct inode *inode); +struct inode *ovl_inode_real(struct inode *inode); struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry); void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache); bool ovl_dentry_is_opaque(struct dentry *dentry); -bool ovl_dentry_is_impure(struct dentry *dentry); bool ovl_dentry_is_whiteout(struct dentry *dentry); void ovl_dentry_set_opaque(struct dentry *dentry); +bool ovl_dentry_has_upper_alias(struct dentry *dentry); +void ovl_dentry_set_upper_alias(struct dentry *dentry); bool ovl_redirect_dir(struct super_block *sb); const char *ovl_dentry_get_redirect(struct dentry *dentry); void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect); -void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry); -void ovl_inode_init(struct inode *inode, struct inode *realinode, - bool is_upper); -void ovl_inode_update(struct inode *inode, struct inode *upperinode); +void ovl_inode_init(struct inode *inode, struct dentry *upperdentry, + struct dentry *lowerdentry); +void ovl_inode_update(struct inode *inode, struct dentry *upperdentry); void ovl_dentry_version_inc(struct dentry *dentry); u64 ovl_dentry_version_get(struct dentry *dentry); bool ovl_is_whiteout(struct dentry *dentry); @@ -225,6 +226,12 @@ int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry, const char *name, const void *value, size_t size, int xerr); int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry); +void ovl_set_flag(unsigned long flag, struct inode *inode); +bool ovl_test_flag(unsigned long flag, struct inode *inode); +bool ovl_inuse_trylock(struct dentry *dentry); +void ovl_inuse_unlock(struct dentry *dentry); +int ovl_nlink_start(struct dentry *dentry, bool *locked); +void ovl_nlink_end(struct dentry *dentry, bool locked); static inline bool ovl_is_impuredir(struct dentry *dentry) { @@ -233,6 +240,11 @@ static inline bool ovl_is_impuredir(struct dentry *dentry) /* namei.c */ +int ovl_verify_origin(struct dentry *dentry, struct vfsmount *mnt, + struct dentry *origin, bool is_upper, bool set); +int ovl_verify_index(struct dentry *index, struct path *lowerstack, + unsigned int numlower); +int ovl_get_index_name(struct dentry *origin, struct qstr *name); int ovl_path_next(int idx, struct dentry *dentry, struct path *path); struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags); bool ovl_lower_positive(struct dentry *dentry); @@ -245,8 +257,15 @@ void ovl_cache_free(struct list_head *list); int ovl_check_d_type_supported(struct path *realpath); void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt, struct dentry *dentry, int level); +int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt, + struct path *lowerstack, unsigned int numlower); /* inode.c */ +int ovl_set_nlink_upper(struct dentry *dentry); +int ovl_set_nlink_lower(struct dentry *dentry); +unsigned int ovl_get_nlink(struct dentry *lowerdentry, + struct dentry *upperdentry, + unsigned int fallback); int ovl_setattr(struct dentry *dentry, struct iattr *attr); int ovl_getattr(const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags); @@ -262,7 +281,7 @@ int ovl_update_time(struct inode *inode, struct timespec *ts, int flags); bool ovl_is_private_xattr(const char *name); struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev); -struct inode *ovl_get_inode(struct super_block *sb, struct inode *realinode); +struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry); static inline void ovl_copyattr(struct inode *from, struct inode *to) { to->i_uid = from->i_uid; @@ -284,10 +303,11 @@ struct cattr { int ovl_create_real(struct inode *dir, struct dentry *newdentry, struct cattr *attr, struct dentry *hardlink, bool debug); -void ovl_cleanup(struct inode *dir, struct dentry *dentry); +int ovl_cleanup(struct inode *dir, struct dentry *dentry); /* copy_up.c */ int ovl_copy_up(struct dentry *dentry); int ovl_copy_up_flags(struct dentry *dentry, int flags); int ovl_copy_xattr(struct dentry *old, struct dentry *new); int ovl_set_attr(struct dentry *upper, struct kstat *stat); +struct ovl_fh *ovl_encode_fh(struct dentry *lower, bool is_upper); diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h index 34bc4a9f5c61..878a750986dd 100644 --- a/fs/overlayfs/ovl_entry.h +++ b/fs/overlayfs/ovl_entry.h @@ -14,6 +14,7 @@ struct ovl_config { char *workdir; bool default_permissions; bool redirect_dir; + bool index; }; /* private information held for overlayfs's superblock */ @@ -21,7 +22,12 @@ struct ovl_fs { struct vfsmount *upper_mnt; unsigned numlower; struct vfsmount **lower_mnt; + /* workbasedir is the path at workdir= mount option */ + struct dentry *workbasedir; + /* workdir is the 'work' directory under workbasedir */ struct dentry *workdir; + /* index directory listing overlay inodes by origin file handle */ + struct dentry *indexdir; long namelen; /* pathnames of lower and upper dirs, for show_options */ struct ovl_config config; @@ -29,22 +35,16 @@ struct ovl_fs { const struct cred *creator_cred; bool tmpfile; bool noxattr; - wait_queue_head_t copyup_wq; /* sb common to all layers */ struct super_block *same_sb; }; /* private information held for every overlayfs dentry */ struct ovl_entry { - struct dentry *__upperdentry; - struct ovl_dir_cache *cache; union { struct { - u64 version; - const char *redirect; + unsigned long has_upper; bool opaque; - bool impure; - bool copying; }; struct rcu_head rcu; }; @@ -54,7 +54,25 @@ struct ovl_entry { struct ovl_entry *ovl_alloc_entry(unsigned int numlower); -static inline struct dentry *ovl_upperdentry_dereference(struct ovl_entry *oe) +struct ovl_inode { + struct ovl_dir_cache *cache; + const char *redirect; + u64 version; + unsigned long flags; + struct inode vfs_inode; + struct dentry *__upperdentry; + struct inode *lower; + + /* synchronize copy up and more */ + struct mutex lock; +}; + +static inline struct ovl_inode *OVL_I(struct inode *inode) { - return lockless_dereference(oe->__upperdentry); + return container_of(inode, struct ovl_inode, vfs_inode); +} + +static inline struct dentry *ovl_upperdentry_dereference(struct ovl_inode *oi) +{ + return lockless_dereference(oi->__upperdentry); } diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index f241b4ee3d8a..0298463cf9c3 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -667,3 +667,53 @@ void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt, ovl_cleanup(dir, dentry); } } + +int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt, + struct path *lowerstack, unsigned int numlower) +{ + int err; + struct inode *dir = dentry->d_inode; + struct path path = { .mnt = mnt, .dentry = dentry }; + LIST_HEAD(list); + struct ovl_cache_entry *p; + struct ovl_readdir_data rdd = { + .ctx.actor = ovl_fill_merge, + .dentry = NULL, + .list = &list, + .root = RB_ROOT, + .is_lowest = false, + }; + + err = ovl_dir_read(&path, &rdd); + if (err) + goto out; + + inode_lock_nested(dir, I_MUTEX_PARENT); + list_for_each_entry(p, &list, l_node) { + struct dentry *index; + + if (p->name[0] == '.') { + if (p->len == 1) + continue; + if (p->len == 2 && p->name[1] == '.') + continue; + } + index = lookup_one_len(p->name, dentry, p->len); + if (IS_ERR(index)) { + err = PTR_ERR(index); + break; + } + if (ovl_verify_index(index, lowerstack, numlower)) { + err = ovl_cleanup(dir, index); + if (err) + break; + } + dput(index); + } + inode_unlock(dir); +out: + ovl_cache_free(&list); + if (err) + pr_err("overlayfs: failed index dir cleanup (%i)\n", err); + return err; +} diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 4882ffb37bae..44dc2d6ffe0f 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -34,6 +34,11 @@ module_param_named(redirect_dir, ovl_redirect_dir_def, bool, 0644); MODULE_PARM_DESC(ovl_redirect_dir_def, "Default to on or off for the redirect_dir feature"); +static bool ovl_index_def = IS_ENABLED(CONFIG_OVERLAY_FS_INDEX); +module_param_named(index, ovl_index_def, bool, 0644); +MODULE_PARM_DESC(ovl_index_def, + "Default to on or off for the inodes index feature"); + static void ovl_dentry_release(struct dentry *dentry) { struct ovl_entry *oe = dentry->d_fsdata; @@ -41,8 +46,6 @@ static void ovl_dentry_release(struct dentry *dentry) if (oe) { unsigned int i; - dput(oe->__upperdentry); - kfree(oe->redirect); for (i = 0; i < oe->numlower; i++) dput(oe->lowerstack[i].dentry); kfree_rcu(oe, rcu); @@ -165,12 +168,52 @@ static const struct dentry_operations ovl_reval_dentry_operations = { .d_weak_revalidate = ovl_dentry_weak_revalidate, }; +static struct kmem_cache *ovl_inode_cachep; + +static struct inode *ovl_alloc_inode(struct super_block *sb) +{ + struct ovl_inode *oi = kmem_cache_alloc(ovl_inode_cachep, GFP_KERNEL); + + oi->cache = NULL; + oi->redirect = NULL; + oi->version = 0; + oi->flags = 0; + oi->__upperdentry = NULL; + oi->lower = NULL; + mutex_init(&oi->lock); + + return &oi->vfs_inode; +} + +static void ovl_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + + kmem_cache_free(ovl_inode_cachep, OVL_I(inode)); +} + +static void ovl_destroy_inode(struct inode *inode) +{ + struct ovl_inode *oi = OVL_I(inode); + + dput(oi->__upperdentry); + kfree(oi->redirect); + mutex_destroy(&oi->lock); + + call_rcu(&inode->i_rcu, ovl_i_callback); +} + static void ovl_put_super(struct super_block *sb) { struct ovl_fs *ufs = sb->s_fs_info; unsigned i; + dput(ufs->indexdir); dput(ufs->workdir); + ovl_inuse_unlock(ufs->workbasedir); + dput(ufs->workbasedir); + if (ufs->upper_mnt) + ovl_inuse_unlock(ufs->upper_mnt->mnt_root); mntput(ufs->upper_mnt); for (i = 0; i < ufs->numlower; i++) mntput(ufs->lower_mnt[i]); @@ -228,6 +271,12 @@ static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf) return err; } +/* Will this overlay be forced to mount/remount ro? */ +static bool ovl_force_readonly(struct ovl_fs *ufs) +{ + return (!ufs->upper_mnt || !ufs->workdir); +} + /** * ovl_show_options * @@ -249,6 +298,9 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry) if (ufs->config.redirect_dir != ovl_redirect_dir_def) seq_printf(m, ",redirect_dir=%s", ufs->config.redirect_dir ? "on" : "off"); + if (ufs->config.index != ovl_index_def) + seq_printf(m, ",index=%s", + ufs->config.index ? "on" : "off"); return 0; } @@ -256,19 +308,21 @@ static int ovl_remount(struct super_block *sb, int *flags, char *data) { struct ovl_fs *ufs = sb->s_fs_info; - if (!(*flags & MS_RDONLY) && (!ufs->upper_mnt || !ufs->workdir)) + if (!(*flags & MS_RDONLY) && ovl_force_readonly(ufs)) return -EROFS; return 0; } static const struct super_operations ovl_super_operations = { + .alloc_inode = ovl_alloc_inode, + .destroy_inode = ovl_destroy_inode, + .drop_inode = generic_delete_inode, .put_super = ovl_put_super, .sync_fs = ovl_sync_fs, .statfs = ovl_statfs, .show_options = ovl_show_options, .remount_fs = ovl_remount, - .drop_inode = generic_delete_inode, }; enum { @@ -278,6 +332,8 @@ enum { OPT_DEFAULT_PERMISSIONS, OPT_REDIRECT_DIR_ON, OPT_REDIRECT_DIR_OFF, + OPT_INDEX_ON, + OPT_INDEX_OFF, OPT_ERR, }; @@ -288,6 +344,8 @@ static const match_table_t ovl_tokens = { {OPT_DEFAULT_PERMISSIONS, "default_permissions"}, {OPT_REDIRECT_DIR_ON, "redirect_dir=on"}, {OPT_REDIRECT_DIR_OFF, "redirect_dir=off"}, + {OPT_INDEX_ON, "index=on"}, + {OPT_INDEX_OFF, "index=off"}, {OPT_ERR, NULL} }; @@ -360,6 +418,14 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config) config->redirect_dir = false; break; + case OPT_INDEX_ON: + config->index = true; + break; + + case OPT_INDEX_OFF: + config->index = false; + break; + default: pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p); return -EINVAL; @@ -378,23 +444,29 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config) } #define OVL_WORKDIR_NAME "work" +#define OVL_INDEXDIR_NAME "index" -static struct dentry *ovl_workdir_create(struct vfsmount *mnt, - struct dentry *dentry) +static struct dentry *ovl_workdir_create(struct super_block *sb, + struct ovl_fs *ufs, + struct dentry *dentry, + const char *name, bool persist) { struct inode *dir = dentry->d_inode; + struct vfsmount *mnt = ufs->upper_mnt; struct dentry *work; int err; bool retried = false; + bool locked = false; err = mnt_want_write(mnt); if (err) - return ERR_PTR(err); + goto out_err; inode_lock_nested(dir, I_MUTEX_PARENT); + locked = true; + retry: - work = lookup_one_len(OVL_WORKDIR_NAME, dentry, - strlen(OVL_WORKDIR_NAME)); + work = lookup_one_len(name, dentry, strlen(name)); if (!IS_ERR(work)) { struct iattr attr = { @@ -407,6 +479,9 @@ retry: if (retried) goto out_dput; + if (persist) + goto out_unlock; + retried = true; ovl_workdir_cleanup(dir, mnt, work, 0); dput(work); @@ -446,16 +521,24 @@ retry: inode_unlock(work->d_inode); if (err) goto out_dput; + } else { + err = PTR_ERR(work); + goto out_err; } out_unlock: - inode_unlock(dir); mnt_drop_write(mnt); + if (locked) + inode_unlock(dir); return work; out_dput: dput(work); - work = ERR_PTR(err); +out_err: + pr_warn("overlayfs: failed to create directory %s/%s (errno: %i); mounting read-only\n", + ufs->config.workdir, name, -err); + sb->s_flags |= MS_RDONLY; + work = NULL; goto out_unlock; } @@ -555,6 +638,15 @@ static int ovl_lower_dir(const char *name, struct path *path, if (ovl_dentry_remote(path->dentry)) *remote = true; + /* + * The inodes index feature needs to encode and decode file + * handles, so it requires that all layers support them. + */ + if (ofs->config.index && !ovl_can_decode_fh(path->dentry->d_sb)) { + ofs->config.index = false; + pr_warn("overlayfs: fs on '%s' does not support file handles, falling back to index=off.\n", name); + } + return 0; out_put: @@ -610,7 +702,7 @@ ovl_posix_acl_xattr_set(const struct xattr_handler *handler, size_t size, int flags) { struct dentry *workdir = ovl_workdir(dentry); - struct inode *realinode = ovl_inode_real(inode, NULL); + struct inode *realinode = ovl_inode_real(inode); struct posix_acl *acl = NULL; int err; @@ -652,7 +744,7 @@ ovl_posix_acl_xattr_set(const struct xattr_handler *handler, err = ovl_xattr_set(dentry, handler->name, value, size, flags); if (!err) - ovl_copyattr(ovl_inode_real(inode, NULL), inode); + ovl_copyattr(ovl_inode_real(inode), inode); return err; @@ -734,7 +826,6 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) struct path upperpath = { }; struct path workpath = { }; struct dentry *root_dentry; - struct inode *realinode; struct ovl_entry *oe; struct ovl_fs *ufs; struct path *stack = NULL; @@ -752,8 +843,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) if (!ufs) goto out; - init_waitqueue_head(&ufs->copyup_wq); ufs->config.redirect_dir = ovl_redirect_dir_def; + ufs->config.index = ovl_index_def; err = ovl_parse_opt((char *) data, &ufs->config); if (err) goto out_free_config; @@ -788,9 +879,15 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) if (err) goto out_put_upperpath; + err = -EBUSY; + if (!ovl_inuse_trylock(upperpath.dentry)) { + pr_err("overlayfs: upperdir is in-use by another mount\n"); + goto out_put_upperpath; + } + err = ovl_mount_dir(ufs->config.workdir, &workpath); if (err) - goto out_put_upperpath; + goto out_unlock_upperdentry; err = -EINVAL; if (upperpath.mnt != workpath.mnt) { @@ -801,12 +898,20 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) pr_err("overlayfs: workdir and upperdir must be separate subtrees\n"); goto out_put_workpath; } + + err = -EBUSY; + if (!ovl_inuse_trylock(workpath.dentry)) { + pr_err("overlayfs: workdir is in-use by another mount\n"); + goto out_put_workpath; + } + + ufs->workbasedir = workpath.dentry; sb->s_stack_depth = upperpath.mnt->mnt_sb->s_stack_depth; } err = -ENOMEM; lowertmp = kstrdup(ufs->config.lowerdir, GFP_KERNEL); if (!lowertmp) - goto out_put_workpath; + goto out_unlock_workdentry; err = -EINVAL; stacklen = ovl_split_lowerdirs(lowertmp); @@ -849,20 +954,14 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) pr_err("overlayfs: failed to clone upperpath\n"); goto out_put_lowerpath; } + /* Don't inherit atime flags */ ufs->upper_mnt->mnt_flags &= ~(MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME); sb->s_time_gran = ufs->upper_mnt->mnt_sb->s_time_gran; - ufs->workdir = ovl_workdir_create(ufs->upper_mnt, workpath.dentry); - err = PTR_ERR(ufs->workdir); - if (IS_ERR(ufs->workdir)) { - pr_warn("overlayfs: failed to create directory %s/%s (errno: %i); mounting read-only\n", - ufs->config.workdir, OVL_WORKDIR_NAME, -err); - sb->s_flags |= MS_RDONLY; - ufs->workdir = NULL; - } - + ufs->workdir = ovl_workdir_create(sb, ufs, workpath.dentry, + OVL_WORKDIR_NAME, false); /* * Upper should support d_type, else whiteouts are visible. * Given workdir and upper are on same fs, we can do @@ -904,6 +1003,13 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) } else { vfs_removexattr(ufs->workdir, OVL_XATTR_OPAQUE); } + + /* Check if upper/work fs supports file handles */ + if (ufs->config.index && + !ovl_can_decode_fh(ufs->workdir->d_sb)) { + ufs->config.index = false; + pr_warn("overlayfs: upper fs does not support file handles, falling back to index=off.\n"); + } } } @@ -941,6 +1047,44 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) else if (ufs->upper_mnt->mnt_sb != ufs->same_sb) ufs->same_sb = NULL; + if (!(ovl_force_readonly(ufs)) && ufs->config.index) { + /* Verify lower root is upper root origin */ + err = ovl_verify_origin(upperpath.dentry, ufs->lower_mnt[0], + stack[0].dentry, false, true); + if (err) { + pr_err("overlayfs: failed to verify upper root origin\n"); + goto out_put_lower_mnt; + } + + ufs->indexdir = ovl_workdir_create(sb, ufs, workpath.dentry, + OVL_INDEXDIR_NAME, true); + err = PTR_ERR(ufs->indexdir); + if (IS_ERR(ufs->indexdir)) + goto out_put_lower_mnt; + + if (ufs->indexdir) { + /* Verify upper root is index dir origin */ + err = ovl_verify_origin(ufs->indexdir, ufs->upper_mnt, + upperpath.dentry, true, true); + if (err) + pr_err("overlayfs: failed to verify index dir origin\n"); + + /* Cleanup bad/stale/orphan index entries */ + if (!err) + err = ovl_indexdir_cleanup(ufs->indexdir, + ufs->upper_mnt, + stack, numlower); + } + if (err || !ufs->indexdir) + pr_warn("overlayfs: try deleting index dir or mounting with '-o index=off' to disable inodes index.\n"); + if (err) + goto out_put_indexdir; + } + + /* Show index=off/on in /proc/mounts for any of the reasons above */ + if (!ufs->indexdir) + ufs->config.index = false; + if (remote) sb->s_d_op = &ovl_reval_dentry_operations; else @@ -948,7 +1092,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) ufs->creator_cred = cred = prepare_creds(); if (!cred) - goto out_put_lower_mnt; + goto out_put_indexdir; /* Never override disk quota limits or use reserved space */ cap_lower(cred->cap_effective, CAP_SYS_RESOURCE); @@ -971,12 +1115,13 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) mntput(upperpath.mnt); for (i = 0; i < numlower; i++) mntput(stack[i].mnt); - path_put(&workpath); + mntput(workpath.mnt); kfree(lowertmp); if (upperpath.dentry) { - oe->__upperdentry = upperpath.dentry; - oe->impure = ovl_is_impuredir(upperpath.dentry); + oe->has_upper = true; + if (ovl_is_impuredir(upperpath.dentry)) + ovl_set_flag(OVL_IMPURE, d_inode(root_dentry)); } for (i = 0; i < numlower; i++) { oe->lowerstack[i].dentry = stack[i].dentry; @@ -986,9 +1131,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) root_dentry->d_fsdata = oe; - realinode = d_inode(ovl_dentry_real(root_dentry)); - ovl_inode_init(d_inode(root_dentry), realinode, !!upperpath.dentry); - ovl_copyattr(realinode, d_inode(root_dentry)); + ovl_inode_init(d_inode(root_dentry), upperpath.dentry, + ovl_dentry_lower(root_dentry)); sb->s_root = root_dentry; @@ -998,6 +1142,8 @@ out_free_oe: kfree(oe); out_put_cred: put_cred(ufs->creator_cred); +out_put_indexdir: + dput(ufs->indexdir); out_put_lower_mnt: for (i = 0; i < ufs->numlower; i++) mntput(ufs->lower_mnt[i]); @@ -1011,8 +1157,12 @@ out_put_lowerpath: kfree(stack); out_free_lowertmp: kfree(lowertmp); +out_unlock_workdentry: + ovl_inuse_unlock(workpath.dentry); out_put_workpath: path_put(&workpath); +out_unlock_upperdentry: + ovl_inuse_unlock(upperpath.dentry); out_put_upperpath: path_put(&upperpath); out_free_config: @@ -1038,14 +1188,43 @@ static struct file_system_type ovl_fs_type = { }; MODULE_ALIAS_FS("overlay"); +static void ovl_inode_init_once(void *foo) +{ + struct ovl_inode *oi = foo; + + inode_init_once(&oi->vfs_inode); +} + static int __init ovl_init(void) { - return register_filesystem(&ovl_fs_type); + int err; + + ovl_inode_cachep = kmem_cache_create("ovl_inode", + sizeof(struct ovl_inode), 0, + (SLAB_RECLAIM_ACCOUNT| + SLAB_MEM_SPREAD|SLAB_ACCOUNT), + ovl_inode_init_once); + if (ovl_inode_cachep == NULL) + return -ENOMEM; + + err = register_filesystem(&ovl_fs_type); + if (err) + kmem_cache_destroy(ovl_inode_cachep); + + return err; } static void __exit ovl_exit(void) { unregister_filesystem(&ovl_fs_type); + + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); + kmem_cache_destroy(ovl_inode_cachep); + } module_init(ovl_init); diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index 809048913889..c492ba75c659 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -12,6 +12,10 @@ #include #include #include +#include +#include +#include +#include #include "overlayfs.h" #include "ovl_entry.h" @@ -47,6 +51,19 @@ struct super_block *ovl_same_sb(struct super_block *sb) return ofs->same_sb; } +bool ovl_can_decode_fh(struct super_block *sb) +{ + return (sb->s_export_op && sb->s_export_op->fh_to_dentry && + !uuid_is_null(&sb->s_uuid)); +} + +struct dentry *ovl_indexdir(struct super_block *sb) +{ + struct ovl_fs *ofs = sb->s_fs_info; + + return ofs->indexdir; +} + struct ovl_entry *ovl_alloc_entry(unsigned int numlower) { size_t size = offsetof(struct ovl_entry, lowerstack[numlower]); @@ -78,7 +95,7 @@ enum ovl_path_type ovl_path_type(struct dentry *dentry) struct ovl_entry *oe = dentry->d_fsdata; enum ovl_path_type type = 0; - if (oe->__upperdentry) { + if (ovl_dentry_upper(dentry)) { type = __OVL_PATH_UPPER; /* @@ -99,10 +116,9 @@ enum ovl_path_type ovl_path_type(struct dentry *dentry) void ovl_path_upper(struct dentry *dentry, struct path *path) { struct ovl_fs *ofs = dentry->d_sb->s_fs_info; - struct ovl_entry *oe = dentry->d_fsdata; path->mnt = ofs->upper_mnt; - path->dentry = ovl_upperdentry_dereference(oe); + path->dentry = ovl_dentry_upper(dentry); } void ovl_path_lower(struct dentry *dentry, struct path *path) @@ -126,47 +142,47 @@ enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path) struct dentry *ovl_dentry_upper(struct dentry *dentry) { - struct ovl_entry *oe = dentry->d_fsdata; - - return ovl_upperdentry_dereference(oe); -} - -static struct dentry *__ovl_dentry_lower(struct ovl_entry *oe) -{ - return oe->numlower ? oe->lowerstack[0].dentry : NULL; + return ovl_upperdentry_dereference(OVL_I(d_inode(dentry))); } struct dentry *ovl_dentry_lower(struct dentry *dentry) { struct ovl_entry *oe = dentry->d_fsdata; - return __ovl_dentry_lower(oe); + return oe->numlower ? oe->lowerstack[0].dentry : NULL; } struct dentry *ovl_dentry_real(struct dentry *dentry) { - struct ovl_entry *oe = dentry->d_fsdata; - struct dentry *realdentry; - - realdentry = ovl_upperdentry_dereference(oe); - if (!realdentry) - realdentry = __ovl_dentry_lower(oe); - - return realdentry; + return ovl_dentry_upper(dentry) ?: ovl_dentry_lower(dentry); } +struct inode *ovl_inode_upper(struct inode *inode) +{ + struct dentry *upperdentry = ovl_upperdentry_dereference(OVL_I(inode)); + + return upperdentry ? d_inode(upperdentry) : NULL; +} + +struct inode *ovl_inode_lower(struct inode *inode) +{ + return OVL_I(inode)->lower; +} + +struct inode *ovl_inode_real(struct inode *inode) +{ + return ovl_inode_upper(inode) ?: ovl_inode_lower(inode); +} + + struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry) { - struct ovl_entry *oe = dentry->d_fsdata; - - return oe->cache; + return OVL_I(d_inode(dentry))->cache; } void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache) { - struct ovl_entry *oe = dentry->d_fsdata; - - oe->cache = cache; + OVL_I(d_inode(dentry))->cache = cache; } bool ovl_dentry_is_opaque(struct dentry *dentry) @@ -175,13 +191,6 @@ bool ovl_dentry_is_opaque(struct dentry *dentry) return oe->opaque; } -bool ovl_dentry_is_impure(struct dentry *dentry) -{ - struct ovl_entry *oe = dentry->d_fsdata; - - return oe->impure; -} - bool ovl_dentry_is_whiteout(struct dentry *dentry) { return !dentry->d_inode && ovl_dentry_is_opaque(dentry); @@ -194,6 +203,25 @@ void ovl_dentry_set_opaque(struct dentry *dentry) oe->opaque = true; } +/* + * For hard links it's possible for ovl_dentry_upper() to return positive, while + * there's no actual upper alias for the inode. Copy up code needs to know + * about the existence of the upper alias, so it can't use ovl_dentry_upper(). + */ +bool ovl_dentry_has_upper_alias(struct dentry *dentry) +{ + struct ovl_entry *oe = dentry->d_fsdata; + + return oe->has_upper; +} + +void ovl_dentry_set_upper_alias(struct dentry *dentry) +{ + struct ovl_entry *oe = dentry->d_fsdata; + + oe->has_upper = true; +} + bool ovl_redirect_dir(struct super_block *sb) { struct ovl_fs *ofs = sb->s_fs_info; @@ -203,63 +231,59 @@ bool ovl_redirect_dir(struct super_block *sb) const char *ovl_dentry_get_redirect(struct dentry *dentry) { - struct ovl_entry *oe = dentry->d_fsdata; - - return oe->redirect; + return OVL_I(d_inode(dentry))->redirect; } void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect) { - struct ovl_entry *oe = dentry->d_fsdata; + struct ovl_inode *oi = OVL_I(d_inode(dentry)); - kfree(oe->redirect); - oe->redirect = redirect; + kfree(oi->redirect); + oi->redirect = redirect; } -void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry) +void ovl_inode_init(struct inode *inode, struct dentry *upperdentry, + struct dentry *lowerdentry) { - struct ovl_entry *oe = dentry->d_fsdata; + if (upperdentry) + OVL_I(inode)->__upperdentry = upperdentry; + if (lowerdentry) + OVL_I(inode)->lower = d_inode(lowerdentry); + + ovl_copyattr(d_inode(upperdentry ?: lowerdentry), inode); +} + +void ovl_inode_update(struct inode *inode, struct dentry *upperdentry) +{ + struct inode *upperinode = d_inode(upperdentry); + + WARN_ON(OVL_I(inode)->__upperdentry); - WARN_ON(!inode_is_locked(upperdentry->d_parent->d_inode)); - WARN_ON(oe->__upperdentry); /* - * Make sure upperdentry is consistent before making it visible to - * ovl_upperdentry_dereference(). + * Make sure upperdentry is consistent before making it visible */ smp_wmb(); - oe->__upperdentry = upperdentry; -} - -void ovl_inode_init(struct inode *inode, struct inode *realinode, bool is_upper) -{ - WRITE_ONCE(inode->i_private, (unsigned long) realinode | - (is_upper ? OVL_ISUPPER_MASK : 0)); -} - -void ovl_inode_update(struct inode *inode, struct inode *upperinode) -{ - WARN_ON(!upperinode); - WARN_ON(!inode_unhashed(inode)); - WRITE_ONCE(inode->i_private, - (unsigned long) upperinode | OVL_ISUPPER_MASK); - if (!S_ISDIR(upperinode->i_mode)) + OVL_I(inode)->__upperdentry = upperdentry; + if (!S_ISDIR(upperinode->i_mode) && inode_unhashed(inode)) { + inode->i_private = upperinode; __insert_inode_hash(inode, (unsigned long) upperinode); + } } void ovl_dentry_version_inc(struct dentry *dentry) { - struct ovl_entry *oe = dentry->d_fsdata; + struct inode *inode = d_inode(dentry); - WARN_ON(!inode_is_locked(dentry->d_inode)); - oe->version++; + WARN_ON(!inode_is_locked(inode)); + OVL_I(inode)->version++; } u64 ovl_dentry_version_get(struct dentry *dentry) { - struct ovl_entry *oe = dentry->d_fsdata; + struct inode *inode = d_inode(dentry); - WARN_ON(!inode_is_locked(dentry->d_inode)); - return oe->version; + WARN_ON(!inode_is_locked(inode)); + return OVL_I(inode)->version; } bool ovl_is_whiteout(struct dentry *dentry) @@ -276,32 +300,21 @@ struct file *ovl_path_open(struct path *path, int flags) int ovl_copy_up_start(struct dentry *dentry) { - struct ovl_fs *ofs = dentry->d_sb->s_fs_info; - struct ovl_entry *oe = dentry->d_fsdata; + struct ovl_inode *oi = OVL_I(d_inode(dentry)); int err; - spin_lock(&ofs->copyup_wq.lock); - err = wait_event_interruptible_locked(ofs->copyup_wq, !oe->copying); - if (!err) { - if (oe->__upperdentry) - err = 1; /* Already copied up */ - else - oe->copying = true; + err = mutex_lock_interruptible(&oi->lock); + if (!err && ovl_dentry_has_upper_alias(dentry)) { + err = 1; /* Already copied up */ + mutex_unlock(&oi->lock); } - spin_unlock(&ofs->copyup_wq.lock); return err; } void ovl_copy_up_end(struct dentry *dentry) { - struct ovl_fs *ofs = dentry->d_sb->s_fs_info; - struct ovl_entry *oe = dentry->d_fsdata; - - spin_lock(&ofs->copyup_wq.lock); - oe->copying = false; - wake_up_locked(&ofs->copyup_wq); - spin_unlock(&ofs->copyup_wq.lock); + mutex_unlock(&OVL_I(d_inode(dentry))->lock); } bool ovl_check_dir_xattr(struct dentry *dentry, const char *name) @@ -343,9 +356,8 @@ int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry, int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry) { int err; - struct ovl_entry *oe = dentry->d_fsdata; - if (oe->impure) + if (ovl_test_flag(OVL_IMPURE, d_inode(dentry))) return 0; /* @@ -355,7 +367,176 @@ int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry) err = ovl_check_setxattr(dentry, upperdentry, OVL_XATTR_IMPURE, "y", 1, 0); if (!err) - oe->impure = true; + ovl_set_flag(OVL_IMPURE, d_inode(dentry)); return err; } + +void ovl_set_flag(unsigned long flag, struct inode *inode) +{ + set_bit(flag, &OVL_I(inode)->flags); +} + +bool ovl_test_flag(unsigned long flag, struct inode *inode) +{ + return test_bit(flag, &OVL_I(inode)->flags); +} + +/** + * Caller must hold a reference to inode to prevent it from being freed while + * it is marked inuse. + */ +bool ovl_inuse_trylock(struct dentry *dentry) +{ + struct inode *inode = d_inode(dentry); + bool locked = false; + + spin_lock(&inode->i_lock); + if (!(inode->i_state & I_OVL_INUSE)) { + inode->i_state |= I_OVL_INUSE; + locked = true; + } + spin_unlock(&inode->i_lock); + + return locked; +} + +void ovl_inuse_unlock(struct dentry *dentry) +{ + if (dentry) { + struct inode *inode = d_inode(dentry); + + spin_lock(&inode->i_lock); + WARN_ON(!(inode->i_state & I_OVL_INUSE)); + inode->i_state &= ~I_OVL_INUSE; + spin_unlock(&inode->i_lock); + } +} + +/* Called must hold OVL_I(inode)->oi_lock */ +static void ovl_cleanup_index(struct dentry *dentry) +{ + struct inode *dir = ovl_indexdir(dentry->d_sb)->d_inode; + struct dentry *lowerdentry = ovl_dentry_lower(dentry); + struct dentry *upperdentry = ovl_dentry_upper(dentry); + struct dentry *index = NULL; + struct inode *inode; + struct qstr name; + int err; + + err = ovl_get_index_name(lowerdentry, &name); + if (err) + goto fail; + + inode = d_inode(upperdentry); + if (inode->i_nlink != 1) { + pr_warn_ratelimited("overlayfs: cleanup linked index (%pd2, ino=%lu, nlink=%u)\n", + upperdentry, inode->i_ino, inode->i_nlink); + /* + * We either have a bug with persistent union nlink or a lower + * hardlink was added while overlay is mounted. Adding a lower + * hardlink and then unlinking all overlay hardlinks would drop + * overlay nlink to zero before all upper inodes are unlinked. + * As a safety measure, when that situation is detected, set + * the overlay nlink to the index inode nlink minus one for the + * index entry itself. + */ + set_nlink(d_inode(dentry), inode->i_nlink - 1); + ovl_set_nlink_upper(dentry); + goto out; + } + + inode_lock_nested(dir, I_MUTEX_PARENT); + /* TODO: whiteout instead of cleanup to block future open by handle */ + index = lookup_one_len(name.name, ovl_indexdir(dentry->d_sb), name.len); + err = PTR_ERR(index); + if (!IS_ERR(index)) + err = ovl_cleanup(dir, index); + inode_unlock(dir); + if (err) + goto fail; + +out: + dput(index); + return; + +fail: + pr_err("overlayfs: cleanup index of '%pd2' failed (%i)\n", dentry, err); + goto out; +} + +/* + * Operations that change overlay inode and upper inode nlink need to be + * synchronized with copy up for persistent nlink accounting. + */ +int ovl_nlink_start(struct dentry *dentry, bool *locked) +{ + struct ovl_inode *oi = OVL_I(d_inode(dentry)); + const struct cred *old_cred; + int err; + + if (!d_inode(dentry) || d_is_dir(dentry)) + return 0; + + /* + * With inodes index is enabled, we store the union overlay nlink + * in an xattr on the index inode. When whiting out lower hardlinks + * we need to decrement the overlay persistent nlink, but before the + * first copy up, we have no upper index inode to store the xattr. + * + * As a workaround, before whiteout/rename over of a lower hardlink, + * copy up to create the upper index. Creating the upper index will + * initialize the overlay nlink, so it could be dropped if unlink + * or rename succeeds. + * + * TODO: implement metadata only index copy up when called with + * ovl_copy_up_flags(dentry, O_PATH). + */ + if (ovl_indexdir(dentry->d_sb) && !ovl_dentry_has_upper_alias(dentry) && + d_inode(ovl_dentry_lower(dentry))->i_nlink > 1) { + err = ovl_copy_up(dentry); + if (err) + return err; + } + + err = mutex_lock_interruptible(&oi->lock); + if (err) + return err; + + if (!ovl_test_flag(OVL_INDEX, d_inode(dentry))) + goto out; + + old_cred = ovl_override_creds(dentry->d_sb); + /* + * The overlay inode nlink should be incremented/decremented IFF the + * upper operation succeeds, along with nlink change of upper inode. + * Therefore, before link/unlink/rename, we store the union nlink + * value relative to the upper inode nlink in an upper inode xattr. + */ + err = ovl_set_nlink_upper(dentry); + revert_creds(old_cred); + +out: + if (err) + mutex_unlock(&oi->lock); + else + *locked = true; + + return err; +} + +void ovl_nlink_end(struct dentry *dentry, bool locked) +{ + if (locked) { + if (ovl_test_flag(OVL_INDEX, d_inode(dentry)) && + d_inode(dentry)->i_nlink == 0) { + const struct cred *old_cred; + + old_cred = ovl_override_creds(dentry->d_sb); + ovl_cleanup_index(dentry); + revert_creds(old_cred); + } + + mutex_unlock(&OVL_I(d_inode(dentry))->lock); + } +} diff --git a/include/linux/fs.h b/include/linux/fs.h index 78e1dbbe4cfd..976aaa1af82a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1955,6 +1955,9 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp) * wb stat updates to grab mapping->tree_lock. See * inode_switch_wb_work_fn() for details. * + * I_OVL_INUSE Used by overlayfs to get exclusive ownership on upper + * and work dirs among overlayfs mounts. + * * Q: What is the difference between I_WILL_FREE and I_FREEING? */ #define I_DIRTY_SYNC (1 << 0) @@ -1975,6 +1978,7 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp) #define __I_DIRTY_TIME_EXPIRED 12 #define I_DIRTY_TIME_EXPIRED (1 << __I_DIRTY_TIME_EXPIRED) #define I_WB_SWITCH (1 << 13) +#define I_OVL_INUSE (1 << 14) #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES) #define I_DIRTY_ALL (I_DIRTY | I_DIRTY_TIME)