-----BEGIN PGP SIGNATURE-----
 
 iQEzBAABCAAdFiEEq1nRK9aeMoq1VSgcnJ2qBz9kQNkFAmZLJS0ACgkQnJ2qBz9k
 QNmFlggAlIg5oDZfOhJur6h3Icldrl2DsnKer0CAP7TFK+GfkFTEb25paoydBEu4
 Y0VzZ3n3EqhmsJ8P515k1UPPPXlqqZwSRWGAek0FDhQCXhqEYxiWwf9U343hJNBS
 rya4Rnwc1pxqmJU2hrY5R5kEbugUFAIL+qNXzhhLpWonYiy/ya7P5n/qz5F5HJH2
 FufRRaPHcHFfk1u0+PvFrk019AS9C6Y3bkcUGtbpdwmFsuN3D4HKuLEkr1+C9Apb
 NmkoAwCiSobQhAxGDr6Szqu6r1VCuM+n/O9fqLknnL9u0jm95AmGdIMOdQ/ofx6d
 xn3mfRp8gUbPD8PubHhQsMjCmSjGwg==
 =kwWW
 -----END PGP SIGNATURE-----

Merge tag 'fsnotify_for_v6.10-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs

Pull fsnotify updates from Jan Kara:

 - reduce overhead of fsnotify infrastructure when no permission events
   are in use

 - a few small cleanups

* tag 'fsnotify_for_v6.10-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs:
  fsnotify: fix UAF from FS_ERROR event on a shutting down filesystem
  fsnotify: optimize the case of no permission event watchers
  fsnotify: use an enum for group priority constants
  fsnotify: move s_fsnotify_connectors into fsnotify_sb_info
  fsnotify: lazy attach fsnotify_sb_info state to sb
  fsnotify: create helper fsnotify_update_sb_watchers()
  fsnotify: pass object pointer and type to fsnotify mark helpers
  fanotify: merge two checks regarding add of ignore mark
  fsnotify: create a wrapper fsnotify_find_inode_mark()
  fsnotify: create helpers to get sb and connp from object
  fsnotify: rename fsnotify_{get,put}_sb_connectors()
  fsnotify: Avoid -Wflex-array-member-not-at-end warning
  fanotify: remove unneeded sub-zero check for unsigned value
This commit is contained in:
Linus Torvalds 2024-05-20 12:31:43 -07:00
commit 5af9d1cf39
14 changed files with 338 additions and 220 deletions

View File

@ -159,8 +159,8 @@ nfsd_file_mark_find_or_create(struct nfsd_file *nf, struct inode *inode)
do {
fsnotify_group_lock(nfsd_file_fsnotify_group);
mark = fsnotify_find_mark(&inode->i_fsnotify_marks,
nfsd_file_fsnotify_group);
mark = fsnotify_find_inode_mark(inode,
nfsd_file_fsnotify_group);
if (mark) {
nfm = nfsd_file_mark_get(container_of(mark,
struct nfsd_file_mark,

View File

@ -162,7 +162,7 @@ void dnotify_flush(struct file *filp, fl_owner_t id)
if (!S_ISDIR(inode->i_mode))
return;
fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, dnotify_group);
fsn_mark = fsnotify_find_inode_mark(inode, dnotify_group);
if (!fsn_mark)
return;
dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark);
@ -326,7 +326,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned int arg)
fsnotify_group_lock(dnotify_group);
/* add the new_fsn_mark or find an old one. */
fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, dnotify_group);
fsn_mark = fsnotify_find_inode_mark(inode, dnotify_group);
if (fsn_mark) {
dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark);
spin_lock(&fsn_mark->lock);

View File

@ -502,7 +502,7 @@ static int copy_fid_info_to_user(__kernel_fsid_t *fsid, struct fanotify_fh *fh,
}
/* Pad with 0's */
WARN_ON_ONCE(len < 0 || len >= FANOTIFY_EVENT_ALIGN);
WARN_ON_ONCE(len >= FANOTIFY_EVENT_ALIGN);
if (len > 0 && clear_user(buf, len))
return -EFAULT;
@ -1076,7 +1076,7 @@ static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark,
}
static int fanotify_remove_mark(struct fsnotify_group *group,
fsnotify_connp_t *connp, __u32 mask,
void *obj, unsigned int obj_type, __u32 mask,
unsigned int flags, __u32 umask)
{
struct fsnotify_mark *fsn_mark = NULL;
@ -1084,7 +1084,7 @@ static int fanotify_remove_mark(struct fsnotify_group *group,
int destroy_mark;
fsnotify_group_lock(group);
fsn_mark = fsnotify_find_mark(connp, group);
fsn_mark = fsnotify_find_mark(obj, obj_type, group);
if (!fsn_mark) {
fsnotify_group_unlock(group);
return -ENOENT;
@ -1105,30 +1105,6 @@ static int fanotify_remove_mark(struct fsnotify_group *group,
return 0;
}
static int fanotify_remove_vfsmount_mark(struct fsnotify_group *group,
struct vfsmount *mnt, __u32 mask,
unsigned int flags, __u32 umask)
{
return fanotify_remove_mark(group, &real_mount(mnt)->mnt_fsnotify_marks,
mask, flags, umask);
}
static int fanotify_remove_sb_mark(struct fsnotify_group *group,
struct super_block *sb, __u32 mask,
unsigned int flags, __u32 umask)
{
return fanotify_remove_mark(group, &sb->s_fsnotify_marks, mask,
flags, umask);
}
static int fanotify_remove_inode_mark(struct fsnotify_group *group,
struct inode *inode, __u32 mask,
unsigned int flags, __u32 umask)
{
return fanotify_remove_mark(group, &inode->i_fsnotify_marks, mask,
flags, umask);
}
static bool fanotify_mark_update_flags(struct fsnotify_mark *fsn_mark,
unsigned int fan_flags)
{
@ -1249,7 +1225,7 @@ static int fanotify_set_mark_fsid(struct fsnotify_group *group,
}
static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group,
fsnotify_connp_t *connp,
void *obj,
unsigned int obj_type,
unsigned int fan_flags,
struct fan_fsid *fsid)
@ -1288,7 +1264,7 @@ static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group,
fan_mark->fsid.val[0] = fan_mark->fsid.val[1] = 0;
}
ret = fsnotify_add_mark_locked(mark, connp, obj_type, 0);
ret = fsnotify_add_mark_locked(mark, obj, obj_type, 0);
if (ret)
goto out_put_mark;
@ -1344,7 +1320,7 @@ static int fanotify_may_update_existing_mark(struct fsnotify_mark *fsn_mark,
}
static int fanotify_add_mark(struct fsnotify_group *group,
fsnotify_connp_t *connp, unsigned int obj_type,
void *obj, unsigned int obj_type,
__u32 mask, unsigned int fan_flags,
struct fan_fsid *fsid)
{
@ -1353,9 +1329,9 @@ static int fanotify_add_mark(struct fsnotify_group *group,
int ret = 0;
fsnotify_group_lock(group);
fsn_mark = fsnotify_find_mark(connp, group);
fsn_mark = fsnotify_find_mark(obj, obj_type, group);
if (!fsn_mark) {
fsn_mark = fanotify_add_new_mark(group, connp, obj_type,
fsn_mark = fanotify_add_new_mark(group, obj, obj_type,
fan_flags, fsid);
if (IS_ERR(fsn_mark)) {
fsnotify_group_unlock(group);
@ -1392,42 +1368,6 @@ out:
return ret;
}
static int fanotify_add_vfsmount_mark(struct fsnotify_group *group,
struct vfsmount *mnt, __u32 mask,
unsigned int flags, struct fan_fsid *fsid)
{
return fanotify_add_mark(group, &real_mount(mnt)->mnt_fsnotify_marks,
FSNOTIFY_OBJ_TYPE_VFSMOUNT, mask, flags, fsid);
}
static int fanotify_add_sb_mark(struct fsnotify_group *group,
struct super_block *sb, __u32 mask,
unsigned int flags, struct fan_fsid *fsid)
{
return fanotify_add_mark(group, &sb->s_fsnotify_marks,
FSNOTIFY_OBJ_TYPE_SB, mask, flags, fsid);
}
static int fanotify_add_inode_mark(struct fsnotify_group *group,
struct inode *inode, __u32 mask,
unsigned int flags, struct fan_fsid *fsid)
{
pr_debug("%s: group=%p inode=%p\n", __func__, group, inode);
/*
* If some other task has this inode open for write we should not add
* an ignore mask, unless that ignore mask is supposed to survive
* modification changes anyway.
*/
if ((flags & FANOTIFY_MARK_IGNORE_BITS) &&
!(flags & FAN_MARK_IGNORED_SURV_MODIFY) &&
inode_is_open_for_write(inode))
return 0;
return fanotify_add_mark(group, &inode->i_fsnotify_marks,
FSNOTIFY_OBJ_TYPE_INODE, mask, flags, fsid);
}
static struct fsnotify_event *fanotify_alloc_overflow_event(void)
{
struct fanotify_event *oevent;
@ -1576,13 +1516,13 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
INIT_LIST_HEAD(&group->fanotify_data.access_list);
switch (class) {
case FAN_CLASS_NOTIF:
group->priority = FS_PRIO_0;
group->priority = FSNOTIFY_PRIO_NORMAL;
break;
case FAN_CLASS_CONTENT:
group->priority = FS_PRIO_1;
group->priority = FSNOTIFY_PRIO_CONTENT;
break;
case FAN_CLASS_PRE_CONTENT:
group->priority = FS_PRIO_2;
group->priority = FSNOTIFY_PRIO_PRE_CONTENT;
break;
default:
fd = -EINVAL;
@ -1750,6 +1690,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
unsigned int mark_cmd = flags & FANOTIFY_MARK_CMD_BITS;
unsigned int ignore = flags & FANOTIFY_MARK_IGNORE_BITS;
unsigned int obj_type, fid_mode;
void *obj;
u32 umask = 0;
int ret;
@ -1833,12 +1774,11 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
goto fput_and_out;
/*
* group->priority == FS_PRIO_0 == FAN_CLASS_NOTIF. These are not
* allowed to set permissions events.
* Permission events require minimum priority FAN_CLASS_CONTENT.
*/
ret = -EINVAL;
if (mask & FANOTIFY_PERM_EVENTS &&
group->priority == FS_PRIO_0)
group->priority < FSNOTIFY_PRIO_CONTENT)
goto fput_and_out;
if (mask & FAN_FS_ERROR &&
@ -1908,17 +1848,34 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
}
/* inode held in place by reference to path; group by fget on fd */
if (mark_type == FAN_MARK_INODE)
if (mark_type == FAN_MARK_INODE) {
inode = path.dentry->d_inode;
else
obj = inode;
} else {
mnt = path.mnt;
if (mark_type == FAN_MARK_MOUNT)
obj = mnt;
else
obj = mnt->mnt_sb;
}
ret = mnt ? -EINVAL : -EISDIR;
/* FAN_MARK_IGNORE requires SURV_MODIFY for sb/mount/dir marks */
if (mark_cmd == FAN_MARK_ADD && ignore == FAN_MARK_IGNORE &&
(mnt || S_ISDIR(inode->i_mode)) &&
!(flags & FAN_MARK_IGNORED_SURV_MODIFY))
goto path_put_and_out;
/*
* If some other task has this inode open for write we should not add
* an ignore mask, unless that ignore mask is supposed to survive
* modification changes anyway.
*/
if (mark_cmd == FAN_MARK_ADD && (flags & FANOTIFY_MARK_IGNORE_BITS) &&
!(flags & FAN_MARK_IGNORED_SURV_MODIFY)) {
ret = mnt ? -EINVAL : -EISDIR;
/* FAN_MARK_IGNORE requires SURV_MODIFY for sb/mount/dir marks */
if (ignore == FAN_MARK_IGNORE &&
(mnt || S_ISDIR(inode->i_mode)))
goto path_put_and_out;
ret = 0;
if (inode && inode_is_open_for_write(inode))
goto path_put_and_out;
}
/* Mask out FAN_EVENT_ON_CHILD flag for sb/mount/non-dir marks */
if (mnt || !S_ISDIR(inode->i_mode)) {
@ -1936,26 +1893,12 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
/* create/update an inode mark */
switch (mark_cmd) {
case FAN_MARK_ADD:
if (mark_type == FAN_MARK_MOUNT)
ret = fanotify_add_vfsmount_mark(group, mnt, mask,
flags, fsid);
else if (mark_type == FAN_MARK_FILESYSTEM)
ret = fanotify_add_sb_mark(group, mnt->mnt_sb, mask,
flags, fsid);
else
ret = fanotify_add_inode_mark(group, inode, mask,
flags, fsid);
ret = fanotify_add_mark(group, obj, obj_type, mask, flags,
fsid);
break;
case FAN_MARK_REMOVE:
if (mark_type == FAN_MARK_MOUNT)
ret = fanotify_remove_vfsmount_mark(group, mnt, mask,
flags, umask);
else if (mark_type == FAN_MARK_FILESYSTEM)
ret = fanotify_remove_sb_mark(group, mnt->mnt_sb, mask,
flags, umask);
else
ret = fanotify_remove_inode_mark(group, inode, mask,
flags, umask);
ret = fanotify_remove_mark(group, obj, obj_type, mask, flags,
umask);
break;
default:
ret = -EINVAL;

View File

@ -41,29 +41,25 @@ static void show_fdinfo(struct seq_file *m, struct file *f,
#if defined(CONFIG_EXPORTFS)
static void show_mark_fhandle(struct seq_file *m, struct inode *inode)
{
struct {
struct file_handle handle;
u8 pad[MAX_HANDLE_SZ];
} f;
DEFINE_FLEX(struct file_handle, f, f_handle, handle_bytes, MAX_HANDLE_SZ);
int size, ret, i;
f.handle.handle_bytes = sizeof(f.pad);
size = f.handle.handle_bytes >> 2;
size = f->handle_bytes >> 2;
ret = exportfs_encode_fid(inode, (struct fid *)f.handle.f_handle, &size);
ret = exportfs_encode_fid(inode, (struct fid *)f->f_handle, &size);
if ((ret == FILEID_INVALID) || (ret < 0)) {
WARN_ONCE(1, "Can't encode file handler for inotify: %d\n", ret);
return;
}
f.handle.handle_type = ret;
f.handle.handle_bytes = size * sizeof(u32);
f->handle_type = ret;
f->handle_bytes = size * sizeof(u32);
seq_printf(m, "fhandle-bytes:%x fhandle-type:%x f_handle:",
f.handle.handle_bytes, f.handle.handle_type);
f->handle_bytes, f->handle_type);
for (i = 0; i < f.handle.handle_bytes; i++)
seq_printf(m, "%02x", (int)f.handle.f_handle[i]);
for (i = 0; i < f->handle_bytes; i++)
seq_printf(m, "%02x", (int)f->f_handle[i]);
}
#else
static void show_mark_fhandle(struct seq_file *m, struct inode *inode)

View File

@ -89,11 +89,25 @@ static void fsnotify_unmount_inodes(struct super_block *sb)
void fsnotify_sb_delete(struct super_block *sb)
{
struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(sb);
/* Were any marks ever added to any object on this sb? */
if (!sbinfo)
return;
fsnotify_unmount_inodes(sb);
fsnotify_clear_marks_by_sb(sb);
/* Wait for outstanding object references from connectors */
wait_var_event(&sb->s_fsnotify_connectors,
!atomic_long_read(&sb->s_fsnotify_connectors));
wait_var_event(fsnotify_sb_watched_objects(sb),
!atomic_long_read(fsnotify_sb_watched_objects(sb)));
WARN_ON(fsnotify_sb_has_priority_watchers(sb, FSNOTIFY_PRIO_CONTENT));
WARN_ON(fsnotify_sb_has_priority_watchers(sb,
FSNOTIFY_PRIO_PRE_CONTENT));
}
void fsnotify_sb_free(struct super_block *sb)
{
kfree(sb->s_fsnotify_info);
}
/*
@ -489,6 +503,7 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
{
const struct path *path = fsnotify_data_path(data, data_type);
struct super_block *sb = fsnotify_data_sb(data, data_type);
struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(sb);
struct fsnotify_iter_info iter_info = {};
struct mount *mnt = NULL;
struct inode *inode2 = NULL;
@ -525,7 +540,7 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
* SRCU because we have no references to any objects and do not
* need SRCU to keep them "alive".
*/
if (!sb->s_fsnotify_marks &&
if ((!sbinfo || !sbinfo->sb_marks) &&
(!mnt || !mnt->mnt_fsnotify_marks) &&
(!inode || !inode->i_fsnotify_marks) &&
(!inode2 || !inode2->i_fsnotify_marks))
@ -552,8 +567,10 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
iter_info.srcu_idx = srcu_read_lock(&fsnotify_mark_srcu);
iter_info.marks[FSNOTIFY_ITER_TYPE_SB] =
fsnotify_first_mark(&sb->s_fsnotify_marks);
if (sbinfo) {
iter_info.marks[FSNOTIFY_ITER_TYPE_SB] =
fsnotify_first_mark(&sbinfo->sb_marks);
}
if (mnt) {
iter_info.marks[FSNOTIFY_ITER_TYPE_VFSMOUNT] =
fsnotify_first_mark(&mnt->mnt_fsnotify_marks);

View File

@ -9,37 +9,56 @@
#include "../mount.h"
/*
* fsnotify_connp_t is what we embed in objects which connector can be attached
* to.
*/
typedef struct fsnotify_mark_connector __rcu *fsnotify_connp_t;
static inline struct inode *fsnotify_conn_inode(
struct fsnotify_mark_connector *conn)
{
return container_of(conn->obj, struct inode, i_fsnotify_marks);
return conn->obj;
}
static inline struct mount *fsnotify_conn_mount(
struct fsnotify_mark_connector *conn)
{
return container_of(conn->obj, struct mount, mnt_fsnotify_marks);
return real_mount(conn->obj);
}
static inline struct super_block *fsnotify_conn_sb(
struct fsnotify_mark_connector *conn)
{
return container_of(conn->obj, struct super_block, s_fsnotify_marks);
return conn->obj;
}
static inline struct super_block *fsnotify_object_sb(void *obj,
enum fsnotify_obj_type obj_type)
{
switch (obj_type) {
case FSNOTIFY_OBJ_TYPE_INODE:
return ((struct inode *)obj)->i_sb;
case FSNOTIFY_OBJ_TYPE_VFSMOUNT:
return ((struct vfsmount *)obj)->mnt_sb;
case FSNOTIFY_OBJ_TYPE_SB:
return (struct super_block *)obj;
default:
return NULL;
}
}
static inline struct super_block *fsnotify_connector_sb(
struct fsnotify_mark_connector *conn)
{
switch (conn->type) {
case FSNOTIFY_OBJ_TYPE_INODE:
return fsnotify_conn_inode(conn)->i_sb;
case FSNOTIFY_OBJ_TYPE_VFSMOUNT:
return fsnotify_conn_mount(conn)->mnt.mnt_sb;
case FSNOTIFY_OBJ_TYPE_SB:
return fsnotify_conn_sb(conn);
default:
return NULL;
}
return fsnotify_object_sb(conn->obj, conn->type);
}
static inline fsnotify_connp_t *fsnotify_sb_marks(struct super_block *sb)
{
struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(sb);
return sbinfo ? &sbinfo->sb_marks : NULL;
}
/* destroy all events sitting in this groups notification queue */
@ -67,7 +86,7 @@ static inline void fsnotify_clear_marks_by_mount(struct vfsmount *mnt)
/* run the list of all marks associated with sb and destroy them */
static inline void fsnotify_clear_marks_by_sb(struct super_block *sb)
{
fsnotify_destroy_marks(&sb->s_fsnotify_marks);
fsnotify_destroy_marks(fsnotify_sb_marks(sb));
}
/*

View File

@ -544,7 +544,7 @@ static int inotify_update_existing_watch(struct fsnotify_group *group,
int create = (arg & IN_MASK_CREATE);
int ret;
fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, group);
fsn_mark = fsnotify_find_inode_mark(inode, group);
if (!fsn_mark)
return -ENOENT;
else if (create) {

View File

@ -97,6 +97,21 @@ void fsnotify_get_mark(struct fsnotify_mark *mark)
refcount_inc(&mark->refcnt);
}
static fsnotify_connp_t *fsnotify_object_connp(void *obj,
enum fsnotify_obj_type obj_type)
{
switch (obj_type) {
case FSNOTIFY_OBJ_TYPE_INODE:
return &((struct inode *)obj)->i_fsnotify_marks;
case FSNOTIFY_OBJ_TYPE_VFSMOUNT:
return &real_mount(obj)->mnt_fsnotify_marks;
case FSNOTIFY_OBJ_TYPE_SB:
return fsnotify_sb_marks(obj);
default:
return NULL;
}
}
static __u32 *fsnotify_conn_mask_p(struct fsnotify_mark_connector *conn)
{
if (conn->type == FSNOTIFY_OBJ_TYPE_INODE)
@ -116,10 +131,69 @@ __u32 fsnotify_conn_mask(struct fsnotify_mark_connector *conn)
return *fsnotify_conn_mask_p(conn);
}
static void fsnotify_get_sb_watched_objects(struct super_block *sb)
{
atomic_long_inc(fsnotify_sb_watched_objects(sb));
}
static void fsnotify_put_sb_watched_objects(struct super_block *sb)
{
if (atomic_long_dec_and_test(fsnotify_sb_watched_objects(sb)))
wake_up_var(fsnotify_sb_watched_objects(sb));
}
static void fsnotify_get_inode_ref(struct inode *inode)
{
ihold(inode);
atomic_long_inc(&inode->i_sb->s_fsnotify_connectors);
fsnotify_get_sb_watched_objects(inode->i_sb);
}
static void fsnotify_put_inode_ref(struct inode *inode)
{
fsnotify_put_sb_watched_objects(inode->i_sb);
iput(inode);
}
/*
* Grab or drop watched objects reference depending on whether the connector
* is attached and has any marks attached.
*/
static void fsnotify_update_sb_watchers(struct super_block *sb,
struct fsnotify_mark_connector *conn)
{
struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(sb);
bool is_watched = conn->flags & FSNOTIFY_CONN_FLAG_IS_WATCHED;
struct fsnotify_mark *first_mark = NULL;
unsigned int highest_prio = 0;
if (conn->obj)
first_mark = hlist_entry_safe(conn->list.first,
struct fsnotify_mark, obj_list);
if (first_mark)
highest_prio = first_mark->group->priority;
if (WARN_ON(highest_prio >= __FSNOTIFY_PRIO_NUM))
highest_prio = 0;
/*
* If the highest priority of group watching this object is prio,
* then watched object has a reference on counters [0..prio].
* Update priority >= 1 watched objects counters.
*/
for (unsigned int p = conn->prio + 1; p <= highest_prio; p++)
atomic_long_inc(&sbinfo->watched_objects[p]);
for (unsigned int p = conn->prio; p > highest_prio; p--)
atomic_long_dec(&sbinfo->watched_objects[p]);
conn->prio = highest_prio;
/* Update priority >= 0 (a.k.a total) watched objects counter */
BUILD_BUG_ON(FSNOTIFY_PRIO_NORMAL != 0);
if (first_mark && !is_watched) {
conn->flags |= FSNOTIFY_CONN_FLAG_IS_WATCHED;
fsnotify_get_sb_watched_objects(sb);
} else if (!first_mark && is_watched) {
conn->flags &= ~FSNOTIFY_CONN_FLAG_IS_WATCHED;
fsnotify_put_sb_watched_objects(sb);
}
}
/*
@ -213,35 +287,12 @@ static void fsnotify_connector_destroy_workfn(struct work_struct *work)
}
}
static void fsnotify_put_inode_ref(struct inode *inode)
{
struct super_block *sb = inode->i_sb;
iput(inode);
if (atomic_long_dec_and_test(&sb->s_fsnotify_connectors))
wake_up_var(&sb->s_fsnotify_connectors);
}
static void fsnotify_get_sb_connectors(struct fsnotify_mark_connector *conn)
{
struct super_block *sb = fsnotify_connector_sb(conn);
if (sb)
atomic_long_inc(&sb->s_fsnotify_connectors);
}
static void fsnotify_put_sb_connectors(struct fsnotify_mark_connector *conn)
{
struct super_block *sb = fsnotify_connector_sb(conn);
if (sb && atomic_long_dec_and_test(&sb->s_fsnotify_connectors))
wake_up_var(&sb->s_fsnotify_connectors);
}
static void *fsnotify_detach_connector_from_object(
struct fsnotify_mark_connector *conn,
unsigned int *type)
{
fsnotify_connp_t *connp = fsnotify_object_connp(conn->obj, conn->type);
struct super_block *sb = fsnotify_connector_sb(conn);
struct inode *inode = NULL;
*type = conn->type;
@ -261,10 +312,10 @@ static void *fsnotify_detach_connector_from_object(
fsnotify_conn_sb(conn)->s_fsnotify_mask = 0;
}
fsnotify_put_sb_connectors(conn);
rcu_assign_pointer(*(conn->obj), NULL);
rcu_assign_pointer(*connp, NULL);
conn->obj = NULL;
conn->type = FSNOTIFY_OBJ_TYPE_DETACHED;
fsnotify_update_sb_watchers(sb, conn);
return inode;
}
@ -316,6 +367,11 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
objp = fsnotify_detach_connector_from_object(conn, &type);
free_conn = true;
} else {
struct super_block *sb = fsnotify_connector_sb(conn);
/* Update watched objects after detaching mark */
if (sb)
fsnotify_update_sb_watchers(sb, conn);
objp = __fsnotify_recalc_mask(conn);
type = conn->type;
}
@ -536,8 +592,28 @@ int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b)
return -1;
}
static int fsnotify_attach_info_to_sb(struct super_block *sb)
{
struct fsnotify_sb_info *sbinfo;
/* sb info is freed on fsnotify_sb_delete() */
sbinfo = kzalloc(sizeof(*sbinfo), GFP_KERNEL);
if (!sbinfo)
return -ENOMEM;
/*
* cmpxchg() provides the barrier so that callers of fsnotify_sb_info()
* will observe an initialized structure
*/
if (cmpxchg(&sb->s_fsnotify_info, NULL, sbinfo)) {
/* Someone else created sbinfo for us */
kfree(sbinfo);
}
return 0;
}
static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp,
unsigned int obj_type)
void *obj, unsigned int obj_type)
{
struct fsnotify_mark_connector *conn;
@ -547,10 +623,9 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp,
spin_lock_init(&conn->lock);
INIT_HLIST_HEAD(&conn->list);
conn->flags = 0;
conn->prio = 0;
conn->type = obj_type;
conn->obj = connp;
conn->flags = 0;
fsnotify_get_sb_connectors(conn);
conn->obj = obj;
/*
* cmpxchg() provides the barrier so that readers of *connp can see
@ -558,10 +633,8 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp,
*/
if (cmpxchg(connp, NULL, conn)) {
/* Someone else created list structure for us */
fsnotify_put_sb_connectors(conn);
kmem_cache_free(fsnotify_mark_connector_cachep, conn);
}
return 0;
}
@ -598,24 +671,36 @@ out:
* to which group and for which inodes. These marks are ordered according to
* priority, highest number first, and then by the group's location in memory.
*/
static int fsnotify_add_mark_list(struct fsnotify_mark *mark,
fsnotify_connp_t *connp,
static int fsnotify_add_mark_list(struct fsnotify_mark *mark, void *obj,
unsigned int obj_type, int add_flags)
{
struct super_block *sb = fsnotify_object_sb(obj, obj_type);
struct fsnotify_mark *lmark, *last = NULL;
struct fsnotify_mark_connector *conn;
fsnotify_connp_t *connp;
int cmp;
int err = 0;
if (WARN_ON(!fsnotify_valid_obj_type(obj_type)))
return -EINVAL;
/*
* Attach the sb info before attaching a connector to any object on sb.
* The sb info will remain attached as long as sb lives.
*/
if (!fsnotify_sb_info(sb)) {
err = fsnotify_attach_info_to_sb(sb);
if (err)
return err;
}
connp = fsnotify_object_connp(obj, obj_type);
restart:
spin_lock(&mark->lock);
conn = fsnotify_grab_connector(connp);
if (!conn) {
spin_unlock(&mark->lock);
err = fsnotify_attach_connector_to_object(connp, obj_type);
err = fsnotify_attach_connector_to_object(connp, obj, obj_type);
if (err)
return err;
goto restart;
@ -649,6 +734,7 @@ restart:
/* mark should be the last entry. last is the current last entry */
hlist_add_behind_rcu(&mark->obj_list, &last->obj_list);
added:
fsnotify_update_sb_watchers(sb, conn);
/*
* Since connector is attached to object using cmpxchg() we are
* guaranteed that connector initialization is fully visible by anyone
@ -667,7 +753,7 @@ out_err:
* event types should be delivered to which group.
*/
int fsnotify_add_mark_locked(struct fsnotify_mark *mark,
fsnotify_connp_t *connp, unsigned int obj_type,
void *obj, unsigned int obj_type,
int add_flags)
{
struct fsnotify_group *group = mark->group;
@ -688,7 +774,7 @@ int fsnotify_add_mark_locked(struct fsnotify_mark *mark,
fsnotify_get_mark(mark); /* for g_list */
spin_unlock(&mark->lock);
ret = fsnotify_add_mark_list(mark, connp, obj_type, add_flags);
ret = fsnotify_add_mark_list(mark, obj, obj_type, add_flags);
if (ret)
goto err;
@ -706,14 +792,14 @@ err:
return ret;
}
int fsnotify_add_mark(struct fsnotify_mark *mark, fsnotify_connp_t *connp,
int fsnotify_add_mark(struct fsnotify_mark *mark, void *obj,
unsigned int obj_type, int add_flags)
{
int ret;
struct fsnotify_group *group = mark->group;
fsnotify_group_lock(group);
ret = fsnotify_add_mark_locked(mark, connp, obj_type, add_flags);
ret = fsnotify_add_mark_locked(mark, obj, obj_type, add_flags);
fsnotify_group_unlock(group);
return ret;
}
@ -723,12 +809,16 @@ EXPORT_SYMBOL_GPL(fsnotify_add_mark);
* Given a list of marks, find the mark associated with given group. If found
* take a reference to that mark and return it, else return NULL.
*/
struct fsnotify_mark *fsnotify_find_mark(fsnotify_connp_t *connp,
struct fsnotify_mark *fsnotify_find_mark(void *obj, unsigned int obj_type,
struct fsnotify_group *group)
{
fsnotify_connp_t *connp = fsnotify_object_connp(obj, obj_type);
struct fsnotify_mark_connector *conn;
struct fsnotify_mark *mark;
if (!connp)
return NULL;
conn = fsnotify_grab_connector(connp);
if (!conn)
return NULL;

View File

@ -274,6 +274,7 @@ static void destroy_super_work(struct work_struct *work)
{
struct super_block *s = container_of(work, struct super_block,
destroy_work);
fsnotify_sb_free(s);
security_sb_free(s);
put_user_ns(s->s_user_ns);
kfree(s->s_subtype);

View File

@ -73,6 +73,8 @@ struct fscrypt_inode_info;
struct fscrypt_operations;
struct fsverity_info;
struct fsverity_operations;
struct fsnotify_mark_connector;
struct fsnotify_sb_info;
struct fs_context;
struct fs_parameter_spec;
struct fileattr;
@ -618,8 +620,6 @@ is_uncached_acl(struct posix_acl *acl)
#define IOP_XATTR 0x0008
#define IOP_DEFAULT_READLINK 0x0010
struct fsnotify_mark_connector;
/*
* Keep mostly read-only and often accessed (especially for
* the RCU path lookup and 'stat' data) fields at the beginning
@ -1248,7 +1248,7 @@ struct super_block {
/*
* Keep s_fs_info, s_time_gran, s_fsnotify_mask, and
* s_fsnotify_marks together for cache efficiency. They are frequently
* s_fsnotify_info together for cache efficiency. They are frequently
* accessed and rarely modified.
*/
void *s_fs_info; /* Filesystem private info */
@ -1260,7 +1260,7 @@ struct super_block {
time64_t s_time_max;
#ifdef CONFIG_FSNOTIFY
__u32 s_fsnotify_mask;
struct fsnotify_mark_connector __rcu *s_fsnotify_marks;
struct fsnotify_sb_info *s_fsnotify_info;
#endif
/*
@ -1301,12 +1301,6 @@ struct super_block {
/* Number of inodes with nlink == 0 but still referenced */
atomic_long_t s_remove_count;
/*
* Number of inode/mount/sb objects that are being watched, note that
* inodes objects are currently double-accounted.
*/
atomic_long_t s_fsnotify_connectors;
/* Read-only state of the superblock is being changed */
int s_readonly_remount;

View File

@ -17,10 +17,23 @@
#include <linux/slab.h>
#include <linux/bug.h>
/* Are there any inode/mount/sb objects watched with priority prio or above? */
static inline bool fsnotify_sb_has_priority_watchers(struct super_block *sb,
int prio)
{
struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(sb);
/* Were any marks ever added to any object on this sb? */
if (!sbinfo)
return false;
return atomic_long_read(&sbinfo->watched_objects[prio]);
}
/* Are there any inode/mount/sb objects that are being watched at all? */
static inline bool fsnotify_sb_has_watchers(struct super_block *sb)
{
return atomic_long_read(&sb->s_fsnotify_connectors);
return fsnotify_sb_has_priority_watchers(sb, 0);
}
/*
@ -103,6 +116,12 @@ static inline int fsnotify_file(struct file *file, __u32 mask)
return 0;
path = &file->f_path;
/* Permission events require group prio >= FSNOTIFY_PRIO_CONTENT */
if (mask & ALL_FSNOTIFY_PERM_EVENTS &&
!fsnotify_sb_has_priority_watchers(path->dentry->d_sb,
FSNOTIFY_PRIO_CONTENT))
return 0;
return fsnotify_parent(path->dentry, mask, path, FSNOTIFY_EVENT_PATH);
}

View File

@ -176,6 +176,17 @@ struct fsnotify_event {
struct list_head list;
};
/*
* fsnotify group priorities.
* Events are sent in order from highest priority to lowest priority.
*/
enum fsnotify_group_prio {
FSNOTIFY_PRIO_NORMAL = 0, /* normal notifiers, no permissions */
FSNOTIFY_PRIO_CONTENT, /* fanotify permission events */
FSNOTIFY_PRIO_PRE_CONTENT, /* fanotify pre-content events */
__FSNOTIFY_PRIO_NUM
};
/*
* A group is a "thing" that wants to receive notification about filesystem
* events. The mask holds the subset of event types this group cares about.
@ -201,14 +212,7 @@ struct fsnotify_group {
wait_queue_head_t notification_waitq; /* read() on the notification file blocks on this waitq */
unsigned int q_len; /* events on the queue */
unsigned int max_events; /* maximum events allowed on the list */
/*
* Valid fsnotify group priorities. Events are send in order from highest
* priority to lowest priority. We default to the lowest priority.
*/
#define FS_PRIO_0 0 /* normal notifiers, no permissions */
#define FS_PRIO_1 1 /* fanotify content based access control */
#define FS_PRIO_2 2 /* fanotify pre-content access */
unsigned int priority;
enum fsnotify_group_prio priority; /* priority for sending events */
bool shutdown; /* group is being shut down, don't queue more events */
#define FSNOTIFY_GROUP_USER 0x01 /* user allocated group */
@ -456,13 +460,6 @@ FSNOTIFY_ITER_FUNCS(sb, SB)
type < FSNOTIFY_ITER_TYPE_COUNT; \
type++)
/*
* fsnotify_connp_t is what we embed in objects which connector can be attached
* to. fsnotify_connp_t * is how we refer from connector back to object.
*/
struct fsnotify_mark_connector;
typedef struct fsnotify_mark_connector __rcu *fsnotify_connp_t;
/*
* Inode/vfsmount/sb point to this structure which tracks all marks attached to
* the inode/vfsmount/sb. The reference to inode/vfsmount/sb is held by this
@ -471,18 +468,51 @@ typedef struct fsnotify_mark_connector __rcu *fsnotify_connp_t;
*/
struct fsnotify_mark_connector {
spinlock_t lock;
unsigned short type; /* Type of object [lock] */
unsigned char type; /* Type of object [lock] */
unsigned char prio; /* Highest priority group */
#define FSNOTIFY_CONN_FLAG_IS_WATCHED 0x01
#define FSNOTIFY_CONN_FLAG_HAS_IREF 0x02
unsigned short flags; /* flags [lock] */
union {
/* Object pointer [lock] */
fsnotify_connp_t *obj;
void *obj;
/* Used listing heads to free after srcu period expires */
struct fsnotify_mark_connector *destroy_next;
};
struct hlist_head list;
};
/*
* Container for per-sb fsnotify state (sb marks and more).
* Attached lazily on first marked object on the sb and freed when killing sb.
*/
struct fsnotify_sb_info {
struct fsnotify_mark_connector __rcu *sb_marks;
/*
* Number of inode/mount/sb objects that are being watched in this sb.
* Note that inodes objects are currently double-accounted.
*
* The value in watched_objects[prio] is the number of objects that are
* watched by groups of priority >= prio, so watched_objects[0] is the
* total number of watched objects in this sb.
*/
atomic_long_t watched_objects[__FSNOTIFY_PRIO_NUM];
};
static inline struct fsnotify_sb_info *fsnotify_sb_info(struct super_block *sb)
{
#ifdef CONFIG_FSNOTIFY
return READ_ONCE(sb->s_fsnotify_info);
#else
return NULL;
#endif
}
static inline atomic_long_t *fsnotify_sb_watched_objects(struct super_block *sb)
{
return &fsnotify_sb_info(sb)->watched_objects[0];
}
/*
* A mark is simply an object attached to an in core inode which allows an
* fsnotify listener to indicate they are either no longer interested in events
@ -546,6 +576,7 @@ extern int __fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data
extern void __fsnotify_inode_delete(struct inode *inode);
extern void __fsnotify_vfsmount_delete(struct vfsmount *mnt);
extern void fsnotify_sb_delete(struct super_block *sb);
extern void fsnotify_sb_free(struct super_block *sb);
extern u32 fsnotify_get_cookie(void);
static inline __u32 fsnotify_parent_needed_mask(__u32 mask)
@ -758,30 +789,35 @@ extern void fsnotify_recalc_mask(struct fsnotify_mark_connector *conn);
extern void fsnotify_init_mark(struct fsnotify_mark *mark,
struct fsnotify_group *group);
/* Find mark belonging to given group in the list of marks */
extern struct fsnotify_mark *fsnotify_find_mark(fsnotify_connp_t *connp,
struct fsnotify_group *group);
struct fsnotify_mark *fsnotify_find_mark(void *obj, unsigned int obj_type,
struct fsnotify_group *group);
/* attach the mark to the object */
extern int fsnotify_add_mark(struct fsnotify_mark *mark,
fsnotify_connp_t *connp, unsigned int obj_type,
int add_flags);
extern int fsnotify_add_mark_locked(struct fsnotify_mark *mark,
fsnotify_connp_t *connp,
unsigned int obj_type, int add_flags);
int fsnotify_add_mark(struct fsnotify_mark *mark, void *obj,
unsigned int obj_type, int add_flags);
int fsnotify_add_mark_locked(struct fsnotify_mark *mark, void *obj,
unsigned int obj_type, int add_flags);
/* attach the mark to the inode */
static inline int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
struct inode *inode,
int add_flags)
{
return fsnotify_add_mark(mark, &inode->i_fsnotify_marks,
FSNOTIFY_OBJ_TYPE_INODE, add_flags);
return fsnotify_add_mark(mark, inode, FSNOTIFY_OBJ_TYPE_INODE,
add_flags);
}
static inline int fsnotify_add_inode_mark_locked(struct fsnotify_mark *mark,
struct inode *inode,
int add_flags)
{
return fsnotify_add_mark_locked(mark, &inode->i_fsnotify_marks,
FSNOTIFY_OBJ_TYPE_INODE, add_flags);
return fsnotify_add_mark_locked(mark, inode, FSNOTIFY_OBJ_TYPE_INODE,
add_flags);
}
static inline struct fsnotify_mark *fsnotify_find_inode_mark(
struct inode *inode,
struct fsnotify_group *group)
{
return fsnotify_find_mark(inode, FSNOTIFY_OBJ_TYPE_INODE, group);
}
/* given a group and a mark, flag mark to be freed when all references are dropped */
@ -845,6 +881,9 @@ static inline void __fsnotify_vfsmount_delete(struct vfsmount *mnt)
static inline void fsnotify_sb_delete(struct super_block *sb)
{}
static inline void fsnotify_sb_free(struct super_block *sb)
{}
static inline void fsnotify_update_flags(struct dentry *dentry)
{}

View File

@ -463,7 +463,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
int n;
fsnotify_group_lock(audit_tree_group);
mark = fsnotify_find_mark(&inode->i_fsnotify_marks, audit_tree_group);
mark = fsnotify_find_inode_mark(inode, audit_tree_group);
if (!mark)
return create_chunk(inode, tree);

View File

@ -90,7 +90,7 @@ static inline struct audit_parent *audit_find_parent(struct inode *inode)
struct audit_parent *parent = NULL;
struct fsnotify_mark *entry;
entry = fsnotify_find_mark(&inode->i_fsnotify_marks, audit_watch_group);
entry = fsnotify_find_inode_mark(inode, audit_watch_group);
if (entry)
parent = container_of(entry, struct audit_parent, mark);