d796e77f1d
As a writable mount, it is not expected for overlayfs to return EINVAL/EROFS for fsync, even if dir/file is not changed. This commit fixes the case of fsync of directory, which is easier to address, because overlayfs already implements fsync file operation for directories. The problem reported by Raphael is that new PostgreSQL 10.0 with a database in overlayfs where lower layer in squashfs fails to start. The failure is due to fsync error, when PostgreSQL does fsync on all existing db directories on startup and a specific directory exists lower layer with no changes. Reported-by: Raphael Hertzog <raphael@ouaza.com> Cc: <stable@vger.kernel.org> # v3.18 Signed-off-by: Amir Goldstein <amir73il@gmail.com> Tested-by: Raphaël Hertzog <hertzog@debian.org> Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
1077 lines
24 KiB
C
1077 lines
24 KiB
C
/*
|
|
*
|
|
* Copyright (C) 2011 Novell Inc.
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify it
|
|
* under the terms of the GNU General Public License version 2 as published by
|
|
* the Free Software Foundation.
|
|
*/
|
|
|
|
#include <linux/fs.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/namei.h>
|
|
#include <linux/file.h>
|
|
#include <linux/xattr.h>
|
|
#include <linux/rbtree.h>
|
|
#include <linux/security.h>
|
|
#include <linux/cred.h>
|
|
#include <linux/ratelimit.h>
|
|
#include "overlayfs.h"
|
|
|
|
struct ovl_cache_entry {
|
|
unsigned int len;
|
|
unsigned int type;
|
|
u64 real_ino;
|
|
u64 ino;
|
|
struct list_head l_node;
|
|
struct rb_node node;
|
|
struct ovl_cache_entry *next_maybe_whiteout;
|
|
bool is_upper;
|
|
bool is_whiteout;
|
|
char name[];
|
|
};
|
|
|
|
struct ovl_dir_cache {
|
|
long refcount;
|
|
u64 version;
|
|
struct list_head entries;
|
|
struct rb_root root;
|
|
};
|
|
|
|
struct ovl_readdir_data {
|
|
struct dir_context ctx;
|
|
struct dentry *dentry;
|
|
bool is_lowest;
|
|
struct rb_root *root;
|
|
struct list_head *list;
|
|
struct list_head middle;
|
|
struct ovl_cache_entry *first_maybe_whiteout;
|
|
int count;
|
|
int err;
|
|
bool is_upper;
|
|
bool d_type_supported;
|
|
};
|
|
|
|
struct ovl_dir_file {
|
|
bool is_real;
|
|
bool is_upper;
|
|
struct ovl_dir_cache *cache;
|
|
struct list_head *cursor;
|
|
struct file *realfile;
|
|
struct file *upperfile;
|
|
};
|
|
|
|
static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n)
|
|
{
|
|
return rb_entry(n, struct ovl_cache_entry, node);
|
|
}
|
|
|
|
static bool ovl_cache_entry_find_link(const char *name, int len,
|
|
struct rb_node ***link,
|
|
struct rb_node **parent)
|
|
{
|
|
bool found = false;
|
|
struct rb_node **newp = *link;
|
|
|
|
while (!found && *newp) {
|
|
int cmp;
|
|
struct ovl_cache_entry *tmp;
|
|
|
|
*parent = *newp;
|
|
tmp = ovl_cache_entry_from_node(*newp);
|
|
cmp = strncmp(name, tmp->name, len);
|
|
if (cmp > 0)
|
|
newp = &tmp->node.rb_right;
|
|
else if (cmp < 0 || len < tmp->len)
|
|
newp = &tmp->node.rb_left;
|
|
else
|
|
found = true;
|
|
}
|
|
*link = newp;
|
|
|
|
return found;
|
|
}
|
|
|
|
static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root,
|
|
const char *name, int len)
|
|
{
|
|
struct rb_node *node = root->rb_node;
|
|
int cmp;
|
|
|
|
while (node) {
|
|
struct ovl_cache_entry *p = ovl_cache_entry_from_node(node);
|
|
|
|
cmp = strncmp(name, p->name, len);
|
|
if (cmp > 0)
|
|
node = p->node.rb_right;
|
|
else if (cmp < 0 || len < p->len)
|
|
node = p->node.rb_left;
|
|
else
|
|
return p;
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
static bool ovl_calc_d_ino(struct ovl_readdir_data *rdd,
|
|
struct ovl_cache_entry *p)
|
|
{
|
|
/* Don't care if not doing ovl_iter() */
|
|
if (!rdd->dentry)
|
|
return false;
|
|
|
|
/* Always recalc d_ino for parent */
|
|
if (strcmp(p->name, "..") == 0)
|
|
return true;
|
|
|
|
/* If this is lower, then native d_ino will do */
|
|
if (!rdd->is_upper)
|
|
return false;
|
|
|
|
/*
|
|
* Recalc d_ino for '.' and for all entries if dir is impure (contains
|
|
* copied up entries)
|
|
*/
|
|
if ((p->name[0] == '.' && p->len == 1) ||
|
|
ovl_test_flag(OVL_IMPURE, d_inode(rdd->dentry)))
|
|
return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd,
|
|
const char *name, int len,
|
|
u64 ino, unsigned int d_type)
|
|
{
|
|
struct ovl_cache_entry *p;
|
|
size_t size = offsetof(struct ovl_cache_entry, name[len + 1]);
|
|
|
|
p = kmalloc(size, GFP_KERNEL);
|
|
if (!p)
|
|
return NULL;
|
|
|
|
memcpy(p->name, name, len);
|
|
p->name[len] = '\0';
|
|
p->len = len;
|
|
p->type = d_type;
|
|
p->real_ino = ino;
|
|
p->ino = ino;
|
|
/* Defer setting d_ino for upper entry to ovl_iterate() */
|
|
if (ovl_calc_d_ino(rdd, p))
|
|
p->ino = 0;
|
|
p->is_upper = rdd->is_upper;
|
|
p->is_whiteout = false;
|
|
|
|
if (d_type == DT_CHR) {
|
|
p->next_maybe_whiteout = rdd->first_maybe_whiteout;
|
|
rdd->first_maybe_whiteout = p;
|
|
}
|
|
return p;
|
|
}
|
|
|
|
static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd,
|
|
const char *name, int len, u64 ino,
|
|
unsigned int d_type)
|
|
{
|
|
struct rb_node **newp = &rdd->root->rb_node;
|
|
struct rb_node *parent = NULL;
|
|
struct ovl_cache_entry *p;
|
|
|
|
if (ovl_cache_entry_find_link(name, len, &newp, &parent))
|
|
return 0;
|
|
|
|
p = ovl_cache_entry_new(rdd, name, len, ino, d_type);
|
|
if (p == NULL) {
|
|
rdd->err = -ENOMEM;
|
|
return -ENOMEM;
|
|
}
|
|
|
|
list_add_tail(&p->l_node, rdd->list);
|
|
rb_link_node(&p->node, parent, newp);
|
|
rb_insert_color(&p->node, rdd->root);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int ovl_fill_lowest(struct ovl_readdir_data *rdd,
|
|
const char *name, int namelen,
|
|
loff_t offset, u64 ino, unsigned int d_type)
|
|
{
|
|
struct ovl_cache_entry *p;
|
|
|
|
p = ovl_cache_entry_find(rdd->root, name, namelen);
|
|
if (p) {
|
|
list_move_tail(&p->l_node, &rdd->middle);
|
|
} else {
|
|
p = ovl_cache_entry_new(rdd, name, namelen, ino, d_type);
|
|
if (p == NULL)
|
|
rdd->err = -ENOMEM;
|
|
else
|
|
list_add_tail(&p->l_node, &rdd->middle);
|
|
}
|
|
|
|
return rdd->err;
|
|
}
|
|
|
|
void ovl_cache_free(struct list_head *list)
|
|
{
|
|
struct ovl_cache_entry *p;
|
|
struct ovl_cache_entry *n;
|
|
|
|
list_for_each_entry_safe(p, n, list, l_node)
|
|
kfree(p);
|
|
|
|
INIT_LIST_HEAD(list);
|
|
}
|
|
|
|
void ovl_dir_cache_free(struct inode *inode)
|
|
{
|
|
struct ovl_dir_cache *cache = ovl_dir_cache(inode);
|
|
|
|
if (cache) {
|
|
ovl_cache_free(&cache->entries);
|
|
kfree(cache);
|
|
}
|
|
}
|
|
|
|
static void ovl_cache_put(struct ovl_dir_file *od, struct dentry *dentry)
|
|
{
|
|
struct ovl_dir_cache *cache = od->cache;
|
|
|
|
WARN_ON(cache->refcount <= 0);
|
|
cache->refcount--;
|
|
if (!cache->refcount) {
|
|
if (ovl_dir_cache(d_inode(dentry)) == cache)
|
|
ovl_set_dir_cache(d_inode(dentry), NULL);
|
|
|
|
ovl_cache_free(&cache->entries);
|
|
kfree(cache);
|
|
}
|
|
}
|
|
|
|
static int ovl_fill_merge(struct dir_context *ctx, const char *name,
|
|
int namelen, loff_t offset, u64 ino,
|
|
unsigned int d_type)
|
|
{
|
|
struct ovl_readdir_data *rdd =
|
|
container_of(ctx, struct ovl_readdir_data, ctx);
|
|
|
|
rdd->count++;
|
|
if (!rdd->is_lowest)
|
|
return ovl_cache_entry_add_rb(rdd, name, namelen, ino, d_type);
|
|
else
|
|
return ovl_fill_lowest(rdd, name, namelen, offset, ino, d_type);
|
|
}
|
|
|
|
static int ovl_check_whiteouts(struct dentry *dir, struct ovl_readdir_data *rdd)
|
|
{
|
|
int err;
|
|
struct ovl_cache_entry *p;
|
|
struct dentry *dentry;
|
|
const struct cred *old_cred;
|
|
|
|
old_cred = ovl_override_creds(rdd->dentry->d_sb);
|
|
|
|
err = down_write_killable(&dir->d_inode->i_rwsem);
|
|
if (!err) {
|
|
while (rdd->first_maybe_whiteout) {
|
|
p = rdd->first_maybe_whiteout;
|
|
rdd->first_maybe_whiteout = p->next_maybe_whiteout;
|
|
dentry = lookup_one_len(p->name, dir, p->len);
|
|
if (!IS_ERR(dentry)) {
|
|
p->is_whiteout = ovl_is_whiteout(dentry);
|
|
dput(dentry);
|
|
}
|
|
}
|
|
inode_unlock(dir->d_inode);
|
|
}
|
|
revert_creds(old_cred);
|
|
|
|
return err;
|
|
}
|
|
|
|
static inline int ovl_dir_read(struct path *realpath,
|
|
struct ovl_readdir_data *rdd)
|
|
{
|
|
struct file *realfile;
|
|
int err;
|
|
|
|
realfile = ovl_path_open(realpath, O_RDONLY | O_DIRECTORY);
|
|
if (IS_ERR(realfile))
|
|
return PTR_ERR(realfile);
|
|
|
|
rdd->first_maybe_whiteout = NULL;
|
|
rdd->ctx.pos = 0;
|
|
do {
|
|
rdd->count = 0;
|
|
rdd->err = 0;
|
|
err = iterate_dir(realfile, &rdd->ctx);
|
|
if (err >= 0)
|
|
err = rdd->err;
|
|
} while (!err && rdd->count);
|
|
|
|
if (!err && rdd->first_maybe_whiteout && rdd->dentry)
|
|
err = ovl_check_whiteouts(realpath->dentry, rdd);
|
|
|
|
fput(realfile);
|
|
|
|
return err;
|
|
}
|
|
|
|
/*
|
|
* Can we iterate real dir directly?
|
|
*
|
|
* Non-merge dir may contain whiteouts from a time it was a merge upper, before
|
|
* lower dir was removed under it and possibly before it was rotated from upper
|
|
* to lower layer.
|
|
*/
|
|
static bool ovl_dir_is_real(struct dentry *dir)
|
|
{
|
|
return !ovl_test_flag(OVL_WHITEOUTS, d_inode(dir));
|
|
}
|
|
|
|
static void ovl_dir_reset(struct file *file)
|
|
{
|
|
struct ovl_dir_file *od = file->private_data;
|
|
struct ovl_dir_cache *cache = od->cache;
|
|
struct dentry *dentry = file->f_path.dentry;
|
|
bool is_real;
|
|
|
|
if (cache && ovl_dentry_version_get(dentry) != cache->version) {
|
|
ovl_cache_put(od, dentry);
|
|
od->cache = NULL;
|
|
od->cursor = NULL;
|
|
}
|
|
is_real = ovl_dir_is_real(dentry);
|
|
if (od->is_real != is_real) {
|
|
/* is_real can only become false when dir is copied up */
|
|
if (WARN_ON(is_real))
|
|
return;
|
|
od->is_real = false;
|
|
}
|
|
}
|
|
|
|
static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list,
|
|
struct rb_root *root)
|
|
{
|
|
int err;
|
|
struct path realpath;
|
|
struct ovl_readdir_data rdd = {
|
|
.ctx.actor = ovl_fill_merge,
|
|
.dentry = dentry,
|
|
.list = list,
|
|
.root = root,
|
|
.is_lowest = false,
|
|
};
|
|
int idx, next;
|
|
|
|
for (idx = 0; idx != -1; idx = next) {
|
|
next = ovl_path_next(idx, dentry, &realpath);
|
|
rdd.is_upper = ovl_dentry_upper(dentry) == realpath.dentry;
|
|
|
|
if (next != -1) {
|
|
err = ovl_dir_read(&realpath, &rdd);
|
|
if (err)
|
|
break;
|
|
} else {
|
|
/*
|
|
* Insert lowest layer entries before upper ones, this
|
|
* allows offsets to be reasonably constant
|
|
*/
|
|
list_add(&rdd.middle, rdd.list);
|
|
rdd.is_lowest = true;
|
|
err = ovl_dir_read(&realpath, &rdd);
|
|
list_del(&rdd.middle);
|
|
}
|
|
}
|
|
return err;
|
|
}
|
|
|
|
static void ovl_seek_cursor(struct ovl_dir_file *od, loff_t pos)
|
|
{
|
|
struct list_head *p;
|
|
loff_t off = 0;
|
|
|
|
list_for_each(p, &od->cache->entries) {
|
|
if (off >= pos)
|
|
break;
|
|
off++;
|
|
}
|
|
/* Cursor is safe since the cache is stable */
|
|
od->cursor = p;
|
|
}
|
|
|
|
static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry)
|
|
{
|
|
int res;
|
|
struct ovl_dir_cache *cache;
|
|
|
|
cache = ovl_dir_cache(d_inode(dentry));
|
|
if (cache && ovl_dentry_version_get(dentry) == cache->version) {
|
|
WARN_ON(!cache->refcount);
|
|
cache->refcount++;
|
|
return cache;
|
|
}
|
|
ovl_set_dir_cache(d_inode(dentry), NULL);
|
|
|
|
cache = kzalloc(sizeof(struct ovl_dir_cache), GFP_KERNEL);
|
|
if (!cache)
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
cache->refcount = 1;
|
|
INIT_LIST_HEAD(&cache->entries);
|
|
cache->root = RB_ROOT;
|
|
|
|
res = ovl_dir_read_merged(dentry, &cache->entries, &cache->root);
|
|
if (res) {
|
|
ovl_cache_free(&cache->entries);
|
|
kfree(cache);
|
|
return ERR_PTR(res);
|
|
}
|
|
|
|
cache->version = ovl_dentry_version_get(dentry);
|
|
ovl_set_dir_cache(d_inode(dentry), cache);
|
|
|
|
return cache;
|
|
}
|
|
|
|
/*
|
|
* Set d_ino for upper entries. Non-upper entries should always report
|
|
* the uppermost real inode ino and should not call this function.
|
|
*
|
|
* When not all layer are on same fs, report real ino also for upper.
|
|
*
|
|
* When all layers are on the same fs, and upper has a reference to
|
|
* copy up origin, call vfs_getattr() on the overlay entry to make
|
|
* sure that d_ino will be consistent with st_ino from stat(2).
|
|
*/
|
|
static int ovl_cache_update_ino(struct path *path, struct ovl_cache_entry *p)
|
|
|
|
{
|
|
struct dentry *dir = path->dentry;
|
|
struct dentry *this = NULL;
|
|
enum ovl_path_type type;
|
|
u64 ino = p->real_ino;
|
|
int err = 0;
|
|
|
|
if (!ovl_same_sb(dir->d_sb))
|
|
goto out;
|
|
|
|
if (p->name[0] == '.') {
|
|
if (p->len == 1) {
|
|
this = dget(dir);
|
|
goto get;
|
|
}
|
|
if (p->len == 2 && p->name[1] == '.') {
|
|
/* we shall not be moved */
|
|
this = dget(dir->d_parent);
|
|
goto get;
|
|
}
|
|
}
|
|
this = lookup_one_len(p->name, dir, p->len);
|
|
if (IS_ERR_OR_NULL(this) || !this->d_inode) {
|
|
if (IS_ERR(this)) {
|
|
err = PTR_ERR(this);
|
|
this = NULL;
|
|
goto fail;
|
|
}
|
|
goto out;
|
|
}
|
|
|
|
get:
|
|
type = ovl_path_type(this);
|
|
if (OVL_TYPE_ORIGIN(type)) {
|
|
struct kstat stat;
|
|
struct path statpath = *path;
|
|
|
|
statpath.dentry = this;
|
|
err = vfs_getattr(&statpath, &stat, STATX_INO, 0);
|
|
if (err)
|
|
goto fail;
|
|
|
|
WARN_ON_ONCE(dir->d_sb->s_dev != stat.dev);
|
|
ino = stat.ino;
|
|
}
|
|
|
|
out:
|
|
p->ino = ino;
|
|
dput(this);
|
|
return err;
|
|
|
|
fail:
|
|
pr_warn_ratelimited("overlayfs: failed to look up (%s) for ino (%i)\n",
|
|
p->name, err);
|
|
goto out;
|
|
}
|
|
|
|
static int ovl_fill_plain(struct dir_context *ctx, const char *name,
|
|
int namelen, loff_t offset, u64 ino,
|
|
unsigned int d_type)
|
|
{
|
|
struct ovl_cache_entry *p;
|
|
struct ovl_readdir_data *rdd =
|
|
container_of(ctx, struct ovl_readdir_data, ctx);
|
|
|
|
rdd->count++;
|
|
p = ovl_cache_entry_new(rdd, name, namelen, ino, d_type);
|
|
if (p == NULL) {
|
|
rdd->err = -ENOMEM;
|
|
return -ENOMEM;
|
|
}
|
|
list_add_tail(&p->l_node, rdd->list);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int ovl_dir_read_impure(struct path *path, struct list_head *list,
|
|
struct rb_root *root)
|
|
{
|
|
int err;
|
|
struct path realpath;
|
|
struct ovl_cache_entry *p, *n;
|
|
struct ovl_readdir_data rdd = {
|
|
.ctx.actor = ovl_fill_plain,
|
|
.list = list,
|
|
.root = root,
|
|
};
|
|
|
|
INIT_LIST_HEAD(list);
|
|
*root = RB_ROOT;
|
|
ovl_path_upper(path->dentry, &realpath);
|
|
|
|
err = ovl_dir_read(&realpath, &rdd);
|
|
if (err)
|
|
return err;
|
|
|
|
list_for_each_entry_safe(p, n, list, l_node) {
|
|
if (strcmp(p->name, ".") != 0 &&
|
|
strcmp(p->name, "..") != 0) {
|
|
err = ovl_cache_update_ino(path, p);
|
|
if (err)
|
|
return err;
|
|
}
|
|
if (p->ino == p->real_ino) {
|
|
list_del(&p->l_node);
|
|
kfree(p);
|
|
} else {
|
|
struct rb_node **newp = &root->rb_node;
|
|
struct rb_node *parent = NULL;
|
|
|
|
if (WARN_ON(ovl_cache_entry_find_link(p->name, p->len,
|
|
&newp, &parent)))
|
|
return -EIO;
|
|
|
|
rb_link_node(&p->node, parent, newp);
|
|
rb_insert_color(&p->node, root);
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static struct ovl_dir_cache *ovl_cache_get_impure(struct path *path)
|
|
{
|
|
int res;
|
|
struct dentry *dentry = path->dentry;
|
|
struct ovl_dir_cache *cache;
|
|
|
|
cache = ovl_dir_cache(d_inode(dentry));
|
|
if (cache && ovl_dentry_version_get(dentry) == cache->version)
|
|
return cache;
|
|
|
|
/* Impure cache is not refcounted, free it here */
|
|
ovl_dir_cache_free(d_inode(dentry));
|
|
ovl_set_dir_cache(d_inode(dentry), NULL);
|
|
|
|
cache = kzalloc(sizeof(struct ovl_dir_cache), GFP_KERNEL);
|
|
if (!cache)
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
res = ovl_dir_read_impure(path, &cache->entries, &cache->root);
|
|
if (res) {
|
|
ovl_cache_free(&cache->entries);
|
|
kfree(cache);
|
|
return ERR_PTR(res);
|
|
}
|
|
if (list_empty(&cache->entries)) {
|
|
/* Good oportunity to get rid of an unnecessary "impure" flag */
|
|
ovl_do_removexattr(ovl_dentry_upper(dentry), OVL_XATTR_IMPURE);
|
|
ovl_clear_flag(OVL_IMPURE, d_inode(dentry));
|
|
kfree(cache);
|
|
return NULL;
|
|
}
|
|
|
|
cache->version = ovl_dentry_version_get(dentry);
|
|
ovl_set_dir_cache(d_inode(dentry), cache);
|
|
|
|
return cache;
|
|
}
|
|
|
|
struct ovl_readdir_translate {
|
|
struct dir_context *orig_ctx;
|
|
struct ovl_dir_cache *cache;
|
|
struct dir_context ctx;
|
|
u64 parent_ino;
|
|
};
|
|
|
|
static int ovl_fill_real(struct dir_context *ctx, const char *name,
|
|
int namelen, loff_t offset, u64 ino,
|
|
unsigned int d_type)
|
|
{
|
|
struct ovl_readdir_translate *rdt =
|
|
container_of(ctx, struct ovl_readdir_translate, ctx);
|
|
struct dir_context *orig_ctx = rdt->orig_ctx;
|
|
|
|
if (rdt->parent_ino && strcmp(name, "..") == 0)
|
|
ino = rdt->parent_ino;
|
|
else if (rdt->cache) {
|
|
struct ovl_cache_entry *p;
|
|
|
|
p = ovl_cache_entry_find(&rdt->cache->root, name, namelen);
|
|
if (p)
|
|
ino = p->ino;
|
|
}
|
|
|
|
return orig_ctx->actor(orig_ctx, name, namelen, offset, ino, d_type);
|
|
}
|
|
|
|
static int ovl_iterate_real(struct file *file, struct dir_context *ctx)
|
|
{
|
|
int err;
|
|
struct ovl_dir_file *od = file->private_data;
|
|
struct dentry *dir = file->f_path.dentry;
|
|
struct ovl_readdir_translate rdt = {
|
|
.ctx.actor = ovl_fill_real,
|
|
.orig_ctx = ctx,
|
|
};
|
|
|
|
if (OVL_TYPE_MERGE(ovl_path_type(dir->d_parent))) {
|
|
struct kstat stat;
|
|
struct path statpath = file->f_path;
|
|
|
|
statpath.dentry = dir->d_parent;
|
|
err = vfs_getattr(&statpath, &stat, STATX_INO, 0);
|
|
if (err)
|
|
return err;
|
|
|
|
WARN_ON_ONCE(dir->d_sb->s_dev != stat.dev);
|
|
rdt.parent_ino = stat.ino;
|
|
}
|
|
|
|
if (ovl_test_flag(OVL_IMPURE, d_inode(dir))) {
|
|
rdt.cache = ovl_cache_get_impure(&file->f_path);
|
|
if (IS_ERR(rdt.cache))
|
|
return PTR_ERR(rdt.cache);
|
|
}
|
|
|
|
err = iterate_dir(od->realfile, &rdt.ctx);
|
|
ctx->pos = rdt.ctx.pos;
|
|
|
|
return err;
|
|
}
|
|
|
|
|
|
static int ovl_iterate(struct file *file, struct dir_context *ctx)
|
|
{
|
|
struct ovl_dir_file *od = file->private_data;
|
|
struct dentry *dentry = file->f_path.dentry;
|
|
struct ovl_cache_entry *p;
|
|
int err;
|
|
|
|
if (!ctx->pos)
|
|
ovl_dir_reset(file);
|
|
|
|
if (od->is_real) {
|
|
/*
|
|
* If parent is merge, then need to adjust d_ino for '..', if
|
|
* dir is impure then need to adjust d_ino for copied up
|
|
* entries.
|
|
*/
|
|
if (ovl_same_sb(dentry->d_sb) &&
|
|
(ovl_test_flag(OVL_IMPURE, d_inode(dentry)) ||
|
|
OVL_TYPE_MERGE(ovl_path_type(dentry->d_parent)))) {
|
|
return ovl_iterate_real(file, ctx);
|
|
}
|
|
return iterate_dir(od->realfile, ctx);
|
|
}
|
|
|
|
if (!od->cache) {
|
|
struct ovl_dir_cache *cache;
|
|
|
|
cache = ovl_cache_get(dentry);
|
|
if (IS_ERR(cache))
|
|
return PTR_ERR(cache);
|
|
|
|
od->cache = cache;
|
|
ovl_seek_cursor(od, ctx->pos);
|
|
}
|
|
|
|
while (od->cursor != &od->cache->entries) {
|
|
p = list_entry(od->cursor, struct ovl_cache_entry, l_node);
|
|
if (!p->is_whiteout) {
|
|
if (!p->ino) {
|
|
err = ovl_cache_update_ino(&file->f_path, p);
|
|
if (err)
|
|
return err;
|
|
}
|
|
if (!dir_emit(ctx, p->name, p->len, p->ino, p->type))
|
|
break;
|
|
}
|
|
od->cursor = p->l_node.next;
|
|
ctx->pos++;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static loff_t ovl_dir_llseek(struct file *file, loff_t offset, int origin)
|
|
{
|
|
loff_t res;
|
|
struct ovl_dir_file *od = file->private_data;
|
|
|
|
inode_lock(file_inode(file));
|
|
if (!file->f_pos)
|
|
ovl_dir_reset(file);
|
|
|
|
if (od->is_real) {
|
|
res = vfs_llseek(od->realfile, offset, origin);
|
|
file->f_pos = od->realfile->f_pos;
|
|
} else {
|
|
res = -EINVAL;
|
|
|
|
switch (origin) {
|
|
case SEEK_CUR:
|
|
offset += file->f_pos;
|
|
break;
|
|
case SEEK_SET:
|
|
break;
|
|
default:
|
|
goto out_unlock;
|
|
}
|
|
if (offset < 0)
|
|
goto out_unlock;
|
|
|
|
if (offset != file->f_pos) {
|
|
file->f_pos = offset;
|
|
if (od->cache)
|
|
ovl_seek_cursor(od, offset);
|
|
}
|
|
res = offset;
|
|
}
|
|
out_unlock:
|
|
inode_unlock(file_inode(file));
|
|
|
|
return res;
|
|
}
|
|
|
|
static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end,
|
|
int datasync)
|
|
{
|
|
struct ovl_dir_file *od = file->private_data;
|
|
struct dentry *dentry = file->f_path.dentry;
|
|
struct file *realfile = od->realfile;
|
|
|
|
/* Nothing to sync for lower */
|
|
if (!OVL_TYPE_UPPER(ovl_path_type(dentry)))
|
|
return 0;
|
|
|
|
/*
|
|
* Need to check if we started out being a lower dir, but got copied up
|
|
*/
|
|
if (!od->is_upper) {
|
|
struct inode *inode = file_inode(file);
|
|
|
|
realfile = READ_ONCE(od->upperfile);
|
|
if (!realfile) {
|
|
struct path upperpath;
|
|
|
|
ovl_path_upper(dentry, &upperpath);
|
|
realfile = ovl_path_open(&upperpath, O_RDONLY);
|
|
|
|
inode_lock(inode);
|
|
if (!od->upperfile) {
|
|
if (IS_ERR(realfile)) {
|
|
inode_unlock(inode);
|
|
return PTR_ERR(realfile);
|
|
}
|
|
smp_store_release(&od->upperfile, realfile);
|
|
} else {
|
|
/* somebody has beaten us to it */
|
|
if (!IS_ERR(realfile))
|
|
fput(realfile);
|
|
realfile = od->upperfile;
|
|
}
|
|
inode_unlock(inode);
|
|
}
|
|
}
|
|
|
|
return vfs_fsync_range(realfile, start, end, datasync);
|
|
}
|
|
|
|
static int ovl_dir_release(struct inode *inode, struct file *file)
|
|
{
|
|
struct ovl_dir_file *od = file->private_data;
|
|
|
|
if (od->cache) {
|
|
inode_lock(inode);
|
|
ovl_cache_put(od, file->f_path.dentry);
|
|
inode_unlock(inode);
|
|
}
|
|
fput(od->realfile);
|
|
if (od->upperfile)
|
|
fput(od->upperfile);
|
|
kfree(od);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int ovl_dir_open(struct inode *inode, struct file *file)
|
|
{
|
|
struct path realpath;
|
|
struct file *realfile;
|
|
struct ovl_dir_file *od;
|
|
enum ovl_path_type type;
|
|
|
|
od = kzalloc(sizeof(struct ovl_dir_file), GFP_KERNEL);
|
|
if (!od)
|
|
return -ENOMEM;
|
|
|
|
type = ovl_path_real(file->f_path.dentry, &realpath);
|
|
realfile = ovl_path_open(&realpath, file->f_flags);
|
|
if (IS_ERR(realfile)) {
|
|
kfree(od);
|
|
return PTR_ERR(realfile);
|
|
}
|
|
od->realfile = realfile;
|
|
od->is_real = ovl_dir_is_real(file->f_path.dentry);
|
|
od->is_upper = OVL_TYPE_UPPER(type);
|
|
file->private_data = od;
|
|
|
|
return 0;
|
|
}
|
|
|
|
const struct file_operations ovl_dir_operations = {
|
|
.read = generic_read_dir,
|
|
.open = ovl_dir_open,
|
|
.iterate = ovl_iterate,
|
|
.llseek = ovl_dir_llseek,
|
|
.fsync = ovl_dir_fsync,
|
|
.release = ovl_dir_release,
|
|
};
|
|
|
|
int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list)
|
|
{
|
|
int err;
|
|
struct ovl_cache_entry *p, *n;
|
|
struct rb_root root = RB_ROOT;
|
|
|
|
err = ovl_dir_read_merged(dentry, list, &root);
|
|
if (err)
|
|
return err;
|
|
|
|
err = 0;
|
|
|
|
list_for_each_entry_safe(p, n, list, l_node) {
|
|
/*
|
|
* Select whiteouts in upperdir, they should
|
|
* be cleared when deleting this directory.
|
|
*/
|
|
if (p->is_whiteout) {
|
|
if (p->is_upper)
|
|
continue;
|
|
goto del_entry;
|
|
}
|
|
|
|
if (p->name[0] == '.') {
|
|
if (p->len == 1)
|
|
goto del_entry;
|
|
if (p->len == 2 && p->name[1] == '.')
|
|
goto del_entry;
|
|
}
|
|
err = -ENOTEMPTY;
|
|
break;
|
|
|
|
del_entry:
|
|
list_del(&p->l_node);
|
|
kfree(p);
|
|
}
|
|
|
|
return err;
|
|
}
|
|
|
|
void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list)
|
|
{
|
|
struct ovl_cache_entry *p;
|
|
|
|
inode_lock_nested(upper->d_inode, I_MUTEX_CHILD);
|
|
list_for_each_entry(p, list, l_node) {
|
|
struct dentry *dentry;
|
|
|
|
if (WARN_ON(!p->is_whiteout || !p->is_upper))
|
|
continue;
|
|
|
|
dentry = lookup_one_len(p->name, upper, p->len);
|
|
if (IS_ERR(dentry)) {
|
|
pr_err("overlayfs: lookup '%s/%.*s' failed (%i)\n",
|
|
upper->d_name.name, p->len, p->name,
|
|
(int) PTR_ERR(dentry));
|
|
continue;
|
|
}
|
|
if (dentry->d_inode)
|
|
ovl_cleanup(upper->d_inode, dentry);
|
|
dput(dentry);
|
|
}
|
|
inode_unlock(upper->d_inode);
|
|
}
|
|
|
|
static int ovl_check_d_type(struct dir_context *ctx, const char *name,
|
|
int namelen, loff_t offset, u64 ino,
|
|
unsigned int d_type)
|
|
{
|
|
struct ovl_readdir_data *rdd =
|
|
container_of(ctx, struct ovl_readdir_data, ctx);
|
|
|
|
/* Even if d_type is not supported, DT_DIR is returned for . and .. */
|
|
if (!strncmp(name, ".", namelen) || !strncmp(name, "..", namelen))
|
|
return 0;
|
|
|
|
if (d_type != DT_UNKNOWN)
|
|
rdd->d_type_supported = true;
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Returns 1 if d_type is supported, 0 not supported/unknown. Negative values
|
|
* if error is encountered.
|
|
*/
|
|
int ovl_check_d_type_supported(struct path *realpath)
|
|
{
|
|
int err;
|
|
struct ovl_readdir_data rdd = {
|
|
.ctx.actor = ovl_check_d_type,
|
|
.d_type_supported = false,
|
|
};
|
|
|
|
err = ovl_dir_read(realpath, &rdd);
|
|
if (err)
|
|
return err;
|
|
|
|
return rdd.d_type_supported;
|
|
}
|
|
|
|
static void ovl_workdir_cleanup_recurse(struct path *path, int level)
|
|
{
|
|
int err;
|
|
struct inode *dir = path->dentry->d_inode;
|
|
LIST_HEAD(list);
|
|
struct rb_root root = RB_ROOT;
|
|
struct ovl_cache_entry *p;
|
|
struct ovl_readdir_data rdd = {
|
|
.ctx.actor = ovl_fill_merge,
|
|
.dentry = NULL,
|
|
.list = &list,
|
|
.root = &root,
|
|
.is_lowest = false,
|
|
};
|
|
|
|
err = ovl_dir_read(path, &rdd);
|
|
if (err)
|
|
goto out;
|
|
|
|
inode_lock_nested(dir, I_MUTEX_PARENT);
|
|
list_for_each_entry(p, &list, l_node) {
|
|
struct dentry *dentry;
|
|
|
|
if (p->name[0] == '.') {
|
|
if (p->len == 1)
|
|
continue;
|
|
if (p->len == 2 && p->name[1] == '.')
|
|
continue;
|
|
}
|
|
dentry = lookup_one_len(p->name, path->dentry, p->len);
|
|
if (IS_ERR(dentry))
|
|
continue;
|
|
if (dentry->d_inode)
|
|
ovl_workdir_cleanup(dir, path->mnt, dentry, level);
|
|
dput(dentry);
|
|
}
|
|
inode_unlock(dir);
|
|
out:
|
|
ovl_cache_free(&list);
|
|
}
|
|
|
|
void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt,
|
|
struct dentry *dentry, int level)
|
|
{
|
|
int err;
|
|
|
|
if (!d_is_dir(dentry) || level > 1) {
|
|
ovl_cleanup(dir, dentry);
|
|
return;
|
|
}
|
|
|
|
err = ovl_do_rmdir(dir, dentry);
|
|
if (err) {
|
|
struct path path = { .mnt = mnt, .dentry = dentry };
|
|
|
|
inode_unlock(dir);
|
|
ovl_workdir_cleanup_recurse(&path, level + 1);
|
|
inode_lock_nested(dir, I_MUTEX_PARENT);
|
|
ovl_cleanup(dir, dentry);
|
|
}
|
|
}
|
|
|
|
int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt,
|
|
struct ovl_path *lower, unsigned int numlower)
|
|
{
|
|
int err;
|
|
struct dentry *index = NULL;
|
|
struct inode *dir = dentry->d_inode;
|
|
struct path path = { .mnt = mnt, .dentry = dentry };
|
|
LIST_HEAD(list);
|
|
struct rb_root root = RB_ROOT;
|
|
struct ovl_cache_entry *p;
|
|
struct ovl_readdir_data rdd = {
|
|
.ctx.actor = ovl_fill_merge,
|
|
.dentry = NULL,
|
|
.list = &list,
|
|
.root = &root,
|
|
.is_lowest = false,
|
|
};
|
|
|
|
err = ovl_dir_read(&path, &rdd);
|
|
if (err)
|
|
goto out;
|
|
|
|
inode_lock_nested(dir, I_MUTEX_PARENT);
|
|
list_for_each_entry(p, &list, l_node) {
|
|
if (p->name[0] == '.') {
|
|
if (p->len == 1)
|
|
continue;
|
|
if (p->len == 2 && p->name[1] == '.')
|
|
continue;
|
|
}
|
|
index = lookup_one_len(p->name, dentry, p->len);
|
|
if (IS_ERR(index)) {
|
|
err = PTR_ERR(index);
|
|
index = NULL;
|
|
break;
|
|
}
|
|
err = ovl_verify_index(index, lower, numlower);
|
|
/* Cleanup stale and orphan index entries */
|
|
if (err && (err == -ESTALE || err == -ENOENT))
|
|
err = ovl_cleanup(dir, index);
|
|
if (err)
|
|
break;
|
|
|
|
dput(index);
|
|
index = NULL;
|
|
}
|
|
dput(index);
|
|
inode_unlock(dir);
|
|
out:
|
|
ovl_cache_free(&list);
|
|
if (err)
|
|
pr_err("overlayfs: failed index dir cleanup (%i)\n", err);
|
|
return err;
|
|
}
|