89cfa89960
fstrim will hold the AGF lock for as long as it takes to walk and discard all the free space in the AG that meets the userspace trim criteria. For AGs with lots of free space extents (e.g. millions) or the underlying device is really slow at processing discard requests (e.g. Ceph RBD), this means the AGF hold time is often measured in minutes to hours, not a few milliseconds as we normal see with non-discard based operations. This can result in the entire filesystem hanging whilst the long-running fstrim is in progress. We can have transactions get stuck waiting for the AGF lock (data or metadata extent allocation and freeing), and then more transactions get stuck waiting on the locks those transactions hold. We can get to the point where fstrim blocks an extent allocation or free operation long enough that it ends up pinning the tail of the log and the log then runs out of space. At this point, every modification in the filesystem gets blocked. This includes read operations, if atime updates need to be made. To fix this problem, we need to be able to discard free space extents safely without holding the AGF lock. Fortunately, we already do this with online discard via busy extents. We can mark free space extents as "busy being discarded" under the AGF lock and then unlock the AGF, knowing that nobody will be able to allocate that free space extent until we remove it from the busy tree. Modify xfs_trim_extents to use the same asynchronous discard mechanism backed by busy extents as is used with online discard. This results in the AGF only needing to be held for short periods of time and it is never held while we issue discards. Hence if discard submission gets throttled because it is slow and/or there are lots of them, we aren't preventing other operations from being performed on AGF while we wait for discards to complete... Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Darrick J. Wong <djwong@kernel.org>
89 lines
2.4 KiB
C
89 lines
2.4 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
|
|
* Copyright (c) 2010 David Chinner.
|
|
* Copyright (c) 2011 Christoph Hellwig.
|
|
* All Rights Reserved.
|
|
*/
|
|
#ifndef __XFS_EXTENT_BUSY_H__
|
|
#define __XFS_EXTENT_BUSY_H__
|
|
|
|
struct xfs_mount;
|
|
struct xfs_perag;
|
|
struct xfs_trans;
|
|
struct xfs_alloc_arg;
|
|
|
|
/*
|
|
* Busy block/extent entry. Indexed by a rbtree in perag to mark blocks that
|
|
* have been freed but whose transactions aren't committed to disk yet.
|
|
*/
|
|
struct xfs_extent_busy {
|
|
struct rb_node rb_node; /* ag by-bno indexed search tree */
|
|
struct list_head list; /* transaction busy extent list */
|
|
xfs_agnumber_t agno;
|
|
xfs_agblock_t bno;
|
|
xfs_extlen_t length;
|
|
unsigned int flags;
|
|
#define XFS_EXTENT_BUSY_DISCARDED 0x01 /* undergoing a discard op. */
|
|
#define XFS_EXTENT_BUSY_SKIP_DISCARD 0x02 /* do not discard */
|
|
};
|
|
|
|
/*
|
|
* List used to track groups of related busy extents all the way through
|
|
* to discard completion.
|
|
*/
|
|
struct xfs_busy_extents {
|
|
struct xfs_mount *mount;
|
|
struct list_head extent_list;
|
|
struct work_struct endio_work;
|
|
|
|
/*
|
|
* Owner is the object containing the struct xfs_busy_extents to free
|
|
* once the busy extents have been processed. If only the
|
|
* xfs_busy_extents object needs freeing, then point this at itself.
|
|
*/
|
|
void *owner;
|
|
};
|
|
|
|
void
|
|
xfs_extent_busy_insert(struct xfs_trans *tp, struct xfs_perag *pag,
|
|
xfs_agblock_t bno, xfs_extlen_t len, unsigned int flags);
|
|
|
|
void
|
|
xfs_extent_busy_insert_discard(struct xfs_perag *pag, xfs_agblock_t bno,
|
|
xfs_extlen_t len, struct list_head *busy_list);
|
|
|
|
void
|
|
xfs_extent_busy_clear(struct xfs_mount *mp, struct list_head *list,
|
|
bool do_discard);
|
|
|
|
int
|
|
xfs_extent_busy_search(struct xfs_mount *mp, struct xfs_perag *pag,
|
|
xfs_agblock_t bno, xfs_extlen_t len);
|
|
|
|
void
|
|
xfs_extent_busy_reuse(struct xfs_mount *mp, struct xfs_perag *pag,
|
|
xfs_agblock_t fbno, xfs_extlen_t flen, bool userdata);
|
|
|
|
bool
|
|
xfs_extent_busy_trim(struct xfs_alloc_arg *args, xfs_agblock_t *bno,
|
|
xfs_extlen_t *len, unsigned *busy_gen);
|
|
|
|
int
|
|
xfs_extent_busy_flush(struct xfs_trans *tp, struct xfs_perag *pag,
|
|
unsigned busy_gen, uint32_t alloc_flags);
|
|
|
|
void
|
|
xfs_extent_busy_wait_all(struct xfs_mount *mp);
|
|
|
|
int
|
|
xfs_extent_busy_ag_cmp(void *priv, const struct list_head *a,
|
|
const struct list_head *b);
|
|
|
|
static inline void xfs_extent_busy_sort(struct list_head *list)
|
|
{
|
|
list_sort(NULL, list, xfs_extent_busy_ag_cmp);
|
|
}
|
|
|
|
#endif /* __XFS_EXTENT_BUSY_H__ */
|