-----BEGIN PGP SIGNATURE-----
 
 iQEzBAABCAAdFiEEq1nRK9aeMoq1VSgcnJ2qBz9kQNkFAl/bPRMACgkQnJ2qBz9k
 QNmktwf7BE+H0PEgm3VfEs8uKUnmgr/TTBd9rhuKVa8NeYrT1YlX2ocCykawaLSW
 ppyXkr2rWKwvRO5P9hZPUsMbjvp7ucz14imBHlhiQpPyfh8cqMazPJLySqbAI/M+
 Eo8WIl74EqQ4VIgCGgfIVD073yjA4FWvO+5/CITYR44Pc2WzyCdU/1oKGBrs4+Cg
 OZAsHvg+2uKiEVeaBwbII+X/jChCJwEfHEYry3A8oRL427HrDir7Jc9i3SNGTDnc
 SE6DPj9X5HWOfoXjVrMratnaz654isvdRdP6GRAFKX8rJlNPGLMZbQ3DTzLGTYKL
 7r9KylGD5nCkL1SXjUOLCqHgVRrgpg==
 =xcC/
 -----END PGP SIGNATURE-----

Merge tag 'fsnotify_for_v5.11-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs

Pull fsnotify updates from Jan Kara:
 "A few fsnotify fixes from Amir fixing fallout from big fsnotify
  overhaul a few months back and an improvement of defaults limiting
  maximum number of inotify watches from Waiman"

* tag 'fsnotify_for_v5.11-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs:
  fsnotify: fix events reported to watching parent and child
  inotify: convert to handle_inode_event() interface
  fsnotify: generalize handle_inode_event()
  inotify: Increase default inotify.max_user_watches limit to 1048576
This commit is contained in:
Linus Torvalds 2020-12-17 10:56:27 -08:00
commit 14bd41e418
11 changed files with 120 additions and 104 deletions

View File

@ -600,7 +600,7 @@ static struct notifier_block nfsd_file_lease_notifier = {
static int
nfsd_file_fsnotify_handle_event(struct fsnotify_mark *mark, u32 mask,
struct inode *inode, struct inode *dir,
const struct qstr *name)
const struct qstr *name, u32 cookie)
{
trace_nfsd_file_fsnotify_handle_event(inode, mask);

View File

@ -72,7 +72,7 @@ static void dnotify_recalc_inode_mask(struct fsnotify_mark *fsn_mark)
*/
static int dnotify_handle_event(struct fsnotify_mark *inode_mark, u32 mask,
struct inode *inode, struct inode *dir,
const struct qstr *name)
const struct qstr *name, u32 cookie)
{
struct dnotify_mark *dn_mark;
struct dnotify_struct *dn;

View File

@ -268,12 +268,11 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group,
continue;
/*
* If the event is for a child and this mark is on a parent not
* If the event is on a child and this mark is on a parent not
* watching children, don't send it!
*/
if (event_mask & FS_EVENT_ON_CHILD &&
type == FSNOTIFY_OBJ_TYPE_INODE &&
!(mark->mask & FS_EVENT_ON_CHILD))
if (type == FSNOTIFY_OBJ_TYPE_PARENT &&
!(mark->mask & FS_EVENT_ON_CHILD))
continue;
marks_mask |= mark->mask;

View File

@ -152,6 +152,13 @@ static bool fsnotify_event_needs_parent(struct inode *inode, struct mount *mnt,
if (mask & FS_ISDIR)
return false;
/*
* All events that are possible on child can also may be reported with
* parent/name info to inode/sb/mount. Otherwise, a watching parent
* could result in events reported with unexpected name info to sb/mount.
*/
BUILD_BUG_ON(FS_EVENTS_POSS_ON_CHILD & ~FS_EVENTS_POSS_TO_PARENT);
/* Did either inode/sb/mount subscribe for events with parent/name? */
marks_mask |= fsnotify_parent_needed_mask(inode->i_fsnotify_mask);
marks_mask |= fsnotify_parent_needed_mask(inode->i_sb->s_fsnotify_mask);
@ -232,47 +239,76 @@ notify:
}
EXPORT_SYMBOL_GPL(__fsnotify_parent);
static int fsnotify_handle_inode_event(struct fsnotify_group *group,
struct fsnotify_mark *inode_mark,
u32 mask, const void *data, int data_type,
struct inode *dir, const struct qstr *name,
u32 cookie)
{
const struct path *path = fsnotify_data_path(data, data_type);
struct inode *inode = fsnotify_data_inode(data, data_type);
const struct fsnotify_ops *ops = group->ops;
if (WARN_ON_ONCE(!ops->handle_inode_event))
return 0;
if ((inode_mark->mask & FS_EXCL_UNLINK) &&
path && d_unlinked(path->dentry))
return 0;
/* Check interest of this mark in case event was sent with two marks */
if (!(mask & inode_mark->mask & ALL_FSNOTIFY_EVENTS))
return 0;
return ops->handle_inode_event(inode_mark, mask, inode, dir, name, cookie);
}
static int fsnotify_handle_event(struct fsnotify_group *group, __u32 mask,
const void *data, int data_type,
struct inode *dir, const struct qstr *name,
u32 cookie, struct fsnotify_iter_info *iter_info)
{
struct fsnotify_mark *inode_mark = fsnotify_iter_inode_mark(iter_info);
struct fsnotify_mark *child_mark = fsnotify_iter_child_mark(iter_info);
struct inode *inode = fsnotify_data_inode(data, data_type);
const struct fsnotify_ops *ops = group->ops;
struct fsnotify_mark *parent_mark = fsnotify_iter_parent_mark(iter_info);
int ret;
if (WARN_ON_ONCE(!ops->handle_inode_event))
return 0;
if (WARN_ON_ONCE(fsnotify_iter_sb_mark(iter_info)) ||
WARN_ON_ONCE(fsnotify_iter_vfsmount_mark(iter_info)))
return 0;
/*
* An event can be sent on child mark iterator instead of inode mark
* iterator because of other groups that have interest of this inode
* and have marks on both parent and child. We can simplify this case.
*/
if (!inode_mark) {
inode_mark = child_mark;
child_mark = NULL;
if (parent_mark) {
/*
* parent_mark indicates that the parent inode is watching
* children and interested in this event, which is an event
* possible on child. But is *this mark* watching children and
* interested in this event?
*/
if (parent_mark->mask & FS_EVENT_ON_CHILD) {
ret = fsnotify_handle_inode_event(group, parent_mark, mask,
data, data_type, dir, name, 0);
if (ret)
return ret;
}
if (!inode_mark)
return 0;
}
if (mask & FS_EVENT_ON_CHILD) {
/*
* Some events can be sent on both parent dir and child marks
* (e.g. FS_ATTRIB). If both parent dir and child are
* watching, report the event once to parent dir with name (if
* interested) and once to child without name (if interested).
* The child watcher is expecting an event without a file name
* and without the FS_EVENT_ON_CHILD flag.
*/
mask &= ~FS_EVENT_ON_CHILD;
dir = NULL;
name = NULL;
}
ret = ops->handle_inode_event(inode_mark, mask, inode, dir, name);
if (ret || !child_mark)
return ret;
/*
* Some events can be sent on both parent dir and child marks
* (e.g. FS_ATTRIB). If both parent dir and child are watching,
* report the event once to parent dir with name and once to child
* without name.
*/
return ops->handle_inode_event(child_mark, mask, inode, NULL, NULL);
return fsnotify_handle_inode_event(group, inode_mark, mask, data, data_type,
dir, name, cookie);
}
static int send_to_group(__u32 mask, const void *data, int data_type,
@ -430,7 +466,7 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
struct fsnotify_iter_info iter_info = {};
struct super_block *sb;
struct mount *mnt = NULL;
struct inode *child = NULL;
struct inode *parent = NULL;
int ret = 0;
__u32 test_mask, marks_mask;
@ -442,11 +478,10 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
inode = dir;
} else if (mask & FS_EVENT_ON_CHILD) {
/*
* Event on child - report on TYPE_INODE to dir if it is
* watching children and on TYPE_CHILD to child.
* Event on child - report on TYPE_PARENT to dir if it is
* watching children and on TYPE_INODE to child.
*/
child = inode;
inode = dir;
parent = dir;
}
sb = inode->i_sb;
@ -460,7 +495,7 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
if (!sb->s_fsnotify_marks &&
(!mnt || !mnt->mnt_fsnotify_marks) &&
(!inode || !inode->i_fsnotify_marks) &&
(!child || !child->i_fsnotify_marks))
(!parent || !parent->i_fsnotify_marks))
return 0;
marks_mask = sb->s_fsnotify_mask;
@ -468,8 +503,8 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
marks_mask |= mnt->mnt_fsnotify_mask;
if (inode)
marks_mask |= inode->i_fsnotify_mask;
if (child)
marks_mask |= child->i_fsnotify_mask;
if (parent)
marks_mask |= parent->i_fsnotify_mask;
/*
@ -492,9 +527,9 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
iter_info.marks[FSNOTIFY_OBJ_TYPE_INODE] =
fsnotify_first_mark(&inode->i_fsnotify_marks);
}
if (child) {
iter_info.marks[FSNOTIFY_OBJ_TYPE_CHILD] =
fsnotify_first_mark(&child->i_fsnotify_marks);
if (parent) {
iter_info.marks[FSNOTIFY_OBJ_TYPE_PARENT] =
fsnotify_first_mark(&parent->i_fsnotify_marks);
}
/*

View File

@ -24,11 +24,10 @@ static inline struct inotify_event_info *INOTIFY_E(struct fsnotify_event *fse)
extern void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark,
struct fsnotify_group *group);
extern int inotify_handle_event(struct fsnotify_group *group, u32 mask,
const void *data, int data_type,
struct inode *dir,
const struct qstr *file_name, u32 cookie,
struct fsnotify_iter_info *iter_info);
extern int inotify_handle_inode_event(struct fsnotify_mark *inode_mark,
u32 mask, struct inode *inode,
struct inode *dir,
const struct qstr *name, u32 cookie);
extern const struct fsnotify_ops inotify_fsnotify_ops;
extern struct kmem_cache *inotify_inode_mark_cachep;

View File

@ -55,25 +55,21 @@ static int inotify_merge(struct list_head *list,
return event_compare(last_event, event);
}
static int inotify_one_event(struct fsnotify_group *group, u32 mask,
struct fsnotify_mark *inode_mark,
const struct path *path,
const struct qstr *file_name, u32 cookie)
int inotify_handle_inode_event(struct fsnotify_mark *inode_mark, u32 mask,
struct inode *inode, struct inode *dir,
const struct qstr *name, u32 cookie)
{
struct inotify_inode_mark *i_mark;
struct inotify_event_info *event;
struct fsnotify_event *fsn_event;
struct fsnotify_group *group = inode_mark->group;
int ret;
int len = 0;
int alloc_len = sizeof(struct inotify_event_info);
struct mem_cgroup *old_memcg;
if ((inode_mark->mask & FS_EXCL_UNLINK) &&
path && d_unlinked(path->dentry))
return 0;
if (file_name) {
len = file_name->len;
if (name) {
len = name->len;
alloc_len += len + 1;
}
@ -117,7 +113,7 @@ static int inotify_one_event(struct fsnotify_group *group, u32 mask,
event->sync_cookie = cookie;
event->name_len = len;
if (len)
strcpy(event->name, file_name->name);
strcpy(event->name, name->name);
ret = fsnotify_add_event(group, fsn_event, inotify_merge);
if (ret) {
@ -131,37 +127,6 @@ static int inotify_one_event(struct fsnotify_group *group, u32 mask,
return 0;
}
int inotify_handle_event(struct fsnotify_group *group, u32 mask,
const void *data, int data_type, struct inode *dir,
const struct qstr *file_name, u32 cookie,
struct fsnotify_iter_info *iter_info)
{
const struct path *path = fsnotify_data_path(data, data_type);
struct fsnotify_mark *inode_mark = fsnotify_iter_inode_mark(iter_info);
struct fsnotify_mark *child_mark = fsnotify_iter_child_mark(iter_info);
int ret = 0;
if (WARN_ON(fsnotify_iter_vfsmount_mark(iter_info)))
return 0;
/*
* Some events cannot be sent on both parent and child marks
* (e.g. IN_CREATE). Those events are always sent on inode_mark.
* For events that are possible on both parent and child (e.g. IN_OPEN),
* event is sent on inode_mark with name if the parent is watching and
* is sent on child_mark without name if child is watching.
* If both parent and child are watching, report the event with child's
* name here and report another event without child's name below.
*/
if (inode_mark)
ret = inotify_one_event(group, mask, inode_mark, path,
file_name, cookie);
if (ret || !child_mark)
return ret;
return inotify_one_event(group, mask, child_mark, path, NULL, 0);
}
static void inotify_freeing_mark(struct fsnotify_mark *fsn_mark, struct fsnotify_group *group)
{
inotify_ignored_and_remove_idr(fsn_mark, group);
@ -227,7 +192,7 @@ static void inotify_free_mark(struct fsnotify_mark *fsn_mark)
}
const struct fsnotify_ops inotify_fsnotify_ops = {
.handle_event = inotify_handle_event,
.handle_inode_event = inotify_handle_inode_event,
.free_group_priv = inotify_free_group_priv,
.free_event = inotify_free_event,
.freeing_mark = inotify_freeing_mark,

View File

@ -37,6 +37,15 @@
#include <asm/ioctls.h>
/*
* An inotify watch requires allocating an inotify_inode_mark structure as
* well as pinning the watched inode. Doubling the size of a VFS inode
* should be more than enough to cover the additional filesystem inode
* size increase.
*/
#define INOTIFY_WATCH_COST (sizeof(struct inotify_inode_mark) + \
2 * sizeof(struct inode))
/* configurable via /proc/sys/fs/inotify/ */
static int inotify_max_queued_events __read_mostly;
@ -486,14 +495,10 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark,
struct fsnotify_group *group)
{
struct inotify_inode_mark *i_mark;
struct fsnotify_iter_info iter_info = { };
fsnotify_iter_set_report_type_mark(&iter_info, FSNOTIFY_OBJ_TYPE_INODE,
fsn_mark);
/* Queue ignore event for the watch */
inotify_handle_event(group, FS_IN_IGNORED, NULL, FSNOTIFY_EVENT_NONE,
NULL, NULL, 0, &iter_info);
inotify_handle_inode_event(fsn_mark, FS_IN_IGNORED, NULL, NULL, NULL,
0);
i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark);
/* remove this mark from the idr */
@ -801,6 +806,18 @@ out:
*/
static int __init inotify_user_setup(void)
{
unsigned long watches_max;
struct sysinfo si;
si_meminfo(&si);
/*
* Allow up to 1% of addressable memory to be allocated for inotify
* watches (per user) limited to the range [8192, 1048576].
*/
watches_max = (((si.totalram - si.totalhigh) / 100) << PAGE_SHIFT) /
INOTIFY_WATCH_COST;
watches_max = clamp(watches_max, 8192UL, 1048576UL);
BUILD_BUG_ON(IN_ACCESS != FS_ACCESS);
BUILD_BUG_ON(IN_MODIFY != FS_MODIFY);
BUILD_BUG_ON(IN_ATTRIB != FS_ATTRIB);
@ -827,7 +844,7 @@ static int __init inotify_user_setup(void)
inotify_max_queued_events = 16384;
init_user_ns.ucount_max[UCOUNT_INOTIFY_INSTANCES] = 128;
init_user_ns.ucount_max[UCOUNT_INOTIFY_WATCHES] = 8192;
init_user_ns.ucount_max[UCOUNT_INOTIFY_WATCHES] = watches_max;
return 0;
}

View File

@ -137,6 +137,7 @@ struct mem_cgroup;
* if @file_name is not NULL, this is the directory that
* @file_name is relative to.
* @file_name: optional file name associated with event
* @cookie: inotify rename cookie
*
* free_group_priv - called when a group refcnt hits 0 to clean up the private union
* freeing_mark - called when a mark is being destroyed for some reason. The group
@ -151,7 +152,7 @@ struct fsnotify_ops {
struct fsnotify_iter_info *iter_info);
int (*handle_inode_event)(struct fsnotify_mark *mark, u32 mask,
struct inode *inode, struct inode *dir,
const struct qstr *file_name);
const struct qstr *file_name, u32 cookie);
void (*free_group_priv)(struct fsnotify_group *group);
void (*freeing_mark)(struct fsnotify_mark *mark, struct fsnotify_group *group);
void (*free_event)(struct fsnotify_event *event);
@ -277,7 +278,7 @@ static inline const struct path *fsnotify_data_path(const void *data,
enum fsnotify_obj_type {
FSNOTIFY_OBJ_TYPE_INODE,
FSNOTIFY_OBJ_TYPE_CHILD,
FSNOTIFY_OBJ_TYPE_PARENT,
FSNOTIFY_OBJ_TYPE_VFSMOUNT,
FSNOTIFY_OBJ_TYPE_SB,
FSNOTIFY_OBJ_TYPE_COUNT,
@ -285,7 +286,7 @@ enum fsnotify_obj_type {
};
#define FSNOTIFY_OBJ_TYPE_INODE_FL (1U << FSNOTIFY_OBJ_TYPE_INODE)
#define FSNOTIFY_OBJ_TYPE_CHILD_FL (1U << FSNOTIFY_OBJ_TYPE_CHILD)
#define FSNOTIFY_OBJ_TYPE_PARENT_FL (1U << FSNOTIFY_OBJ_TYPE_PARENT)
#define FSNOTIFY_OBJ_TYPE_VFSMOUNT_FL (1U << FSNOTIFY_OBJ_TYPE_VFSMOUNT)
#define FSNOTIFY_OBJ_TYPE_SB_FL (1U << FSNOTIFY_OBJ_TYPE_SB)
#define FSNOTIFY_OBJ_ALL_TYPES_MASK ((1U << FSNOTIFY_OBJ_TYPE_COUNT) - 1)
@ -330,7 +331,7 @@ static inline struct fsnotify_mark *fsnotify_iter_##name##_mark( \
}
FSNOTIFY_ITER_FUNCS(inode, INODE)
FSNOTIFY_ITER_FUNCS(child, CHILD)
FSNOTIFY_ITER_FUNCS(parent, PARENT)
FSNOTIFY_ITER_FUNCS(vfsmount, VFSMOUNT)
FSNOTIFY_ITER_FUNCS(sb, SB)

View File

@ -154,7 +154,7 @@ static void audit_autoremove_mark_rule(struct audit_fsnotify_mark *audit_mark)
/* Update mark data in audit rules based on fsnotify events. */
static int audit_mark_handle_event(struct fsnotify_mark *inode_mark, u32 mask,
struct inode *inode, struct inode *dir,
const struct qstr *dname)
const struct qstr *dname, u32 cookie)
{
struct audit_fsnotify_mark *audit_mark;

View File

@ -1037,7 +1037,7 @@ static void evict_chunk(struct audit_chunk *chunk)
static int audit_tree_handle_event(struct fsnotify_mark *mark, u32 mask,
struct inode *inode, struct inode *dir,
const struct qstr *file_name)
const struct qstr *file_name, u32 cookie)
{
return 0;
}

View File

@ -466,7 +466,7 @@ void audit_remove_watch_rule(struct audit_krule *krule)
/* Update watch data in audit rules based on fsnotify events. */
static int audit_watch_handle_event(struct fsnotify_mark *inode_mark, u32 mask,
struct inode *inode, struct inode *dir,
const struct qstr *dname)
const struct qstr *dname, u32 cookie)
{
struct audit_parent *parent;