ab23a77687
Move inode inactivation to background work contexts so that it no longer runs in the context that releases the final reference to an inode. This will allow process work that ends up blocking on inactivation to continue doing work while the filesytem processes the inactivation in the background. A typical demonstration of this is unlinking an inode with lots of extents. The extents are removed during inactivation, so this blocks the process that unlinked the inode from the directory structure. By moving the inactivation to the background process, the userspace applicaiton can keep working (e.g. unlinking the next inode in the directory) while the inactivation work on the previous inode is done by a different CPU. The implementation of the queue is relatively simple. We use a per-cpu lockless linked list (llist) to queue inodes for inactivation without requiring serialisation mechanisms, and a work item to allow the queue to be processed by a CPU bound worker thread. We also keep a count of the queue depth so that we can trigger work after a number of deferred inactivations have been queued. The use of a bound workqueue with a single work depth allows the workqueue to run one work item per CPU. We queue the work item on the CPU we are currently running on, and so this essentially gives us affine per-cpu worker threads for the per-cpu queues. THis maintains the effective CPU affinity that occurs within XFS at the AG level due to all objects in a directory being local to an AG. Hence inactivation work tends to run on the same CPU that last accessed all the objects that inactivation accesses and this maintains hot CPU caches for unlink workloads. A depth of 32 inodes was chosen to match the number of inodes in an inode cluster buffer. This hopefully allows sequential allocation/unlink behaviours to defering inactivation of all the inodes in a single cluster buffer at a time, further helping maintain hot CPU and buffer cache accesses while running inactivations. A hard per-cpu queue throttle of 256 inode has been set to avoid runaway queuing when inodes that take a long to time inactivate are being processed. For example, when unlinking inodes with large numbers of extents that can take a lot of processing to free. Signed-off-by: Dave Chinner <dchinner@redhat.com> [djwong: tweak comments and tracepoints, convert opflags to state bits] Reviewed-by: Darrick J. Wong <djwong@kernel.org> Signed-off-by: Darrick J. Wong <djwong@kernel.org>
84 lines
2.7 KiB
C
84 lines
2.7 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* Copyright (c) 2000-2006 Silicon Graphics, Inc.
|
|
* All Rights Reserved.
|
|
*/
|
|
#ifndef XFS_SYNC_H
|
|
#define XFS_SYNC_H 1
|
|
|
|
struct xfs_mount;
|
|
struct xfs_perag;
|
|
|
|
struct xfs_icwalk {
|
|
__u32 icw_flags;
|
|
kuid_t icw_uid;
|
|
kgid_t icw_gid;
|
|
prid_t icw_prid;
|
|
__u64 icw_min_file_size;
|
|
long icw_scan_limit;
|
|
};
|
|
|
|
/* Flags that reflect xfs_fs_eofblocks functionality. */
|
|
#define XFS_ICWALK_FLAG_SYNC (1U << 0) /* sync/wait mode scan */
|
|
#define XFS_ICWALK_FLAG_UID (1U << 1) /* filter by uid */
|
|
#define XFS_ICWALK_FLAG_GID (1U << 2) /* filter by gid */
|
|
#define XFS_ICWALK_FLAG_PRID (1U << 3) /* filter by project id */
|
|
#define XFS_ICWALK_FLAG_MINFILESIZE (1U << 4) /* filter by min file size */
|
|
|
|
#define XFS_ICWALK_FLAGS_VALID (XFS_ICWALK_FLAG_SYNC | \
|
|
XFS_ICWALK_FLAG_UID | \
|
|
XFS_ICWALK_FLAG_GID | \
|
|
XFS_ICWALK_FLAG_PRID | \
|
|
XFS_ICWALK_FLAG_MINFILESIZE)
|
|
|
|
/*
|
|
* Flags for xfs_iget()
|
|
*/
|
|
#define XFS_IGET_CREATE 0x1
|
|
#define XFS_IGET_UNTRUSTED 0x2
|
|
#define XFS_IGET_DONTCACHE 0x4
|
|
#define XFS_IGET_INCORE 0x8 /* don't read from disk or reinit */
|
|
|
|
int xfs_iget(struct xfs_mount *mp, struct xfs_trans *tp, xfs_ino_t ino,
|
|
uint flags, uint lock_flags, xfs_inode_t **ipp);
|
|
|
|
/* recovery needs direct inode allocation capability */
|
|
struct xfs_inode * xfs_inode_alloc(struct xfs_mount *mp, xfs_ino_t ino);
|
|
void xfs_inode_free(struct xfs_inode *ip);
|
|
|
|
void xfs_reclaim_worker(struct work_struct *work);
|
|
|
|
void xfs_reclaim_inodes(struct xfs_mount *mp);
|
|
long xfs_reclaim_inodes_count(struct xfs_mount *mp);
|
|
long xfs_reclaim_inodes_nr(struct xfs_mount *mp, unsigned long nr_to_scan);
|
|
|
|
void xfs_inode_mark_reclaimable(struct xfs_inode *ip);
|
|
|
|
int xfs_blockgc_free_dquots(struct xfs_mount *mp, struct xfs_dquot *udqp,
|
|
struct xfs_dquot *gdqp, struct xfs_dquot *pdqp,
|
|
unsigned int iwalk_flags);
|
|
int xfs_blockgc_free_quota(struct xfs_inode *ip, unsigned int iwalk_flags);
|
|
int xfs_blockgc_free_space(struct xfs_mount *mp, struct xfs_icwalk *icm);
|
|
|
|
void xfs_inode_set_eofblocks_tag(struct xfs_inode *ip);
|
|
void xfs_inode_clear_eofblocks_tag(struct xfs_inode *ip);
|
|
|
|
void xfs_inode_set_cowblocks_tag(struct xfs_inode *ip);
|
|
void xfs_inode_clear_cowblocks_tag(struct xfs_inode *ip);
|
|
|
|
void xfs_blockgc_worker(struct work_struct *work);
|
|
|
|
int xfs_icache_inode_is_allocated(struct xfs_mount *mp, struct xfs_trans *tp,
|
|
xfs_ino_t ino, bool *inuse);
|
|
|
|
void xfs_blockgc_stop(struct xfs_mount *mp);
|
|
void xfs_blockgc_start(struct xfs_mount *mp);
|
|
|
|
void xfs_inodegc_worker(struct work_struct *work);
|
|
void xfs_inodegc_flush(struct xfs_mount *mp);
|
|
void xfs_inodegc_stop(struct xfs_mount *mp);
|
|
void xfs_inodegc_start(struct xfs_mount *mp);
|
|
void xfs_inodegc_cpu_dead(struct xfs_mount *mp, unsigned int cpu);
|
|
|
|
#endif
|