4ed6435cc3
In commit e1a4e37cc7b6, we clamped the length of bunmapi calls on the data forks of shared files to avoid two failure scenarios: one where the extent being unmapped is so sparsely shared that we exceed the transaction reservation with the sheer number of refcount btree updates and EFI intent items; and the other where we attach so many deferred updates to the transaction that we pin the log tail and later the log head meets the tail, causing the log to livelock. We avoid triggering the first problem by tracking the number of ops in the refcount btree cursor and forcing a requeue of the refcount intent item any time we think that we might be close to overflowing. This has been baked into XFS since before the original e1a4 patch. A recent patchset fixed the second problem by changing the deferred ops code to finish all the work items created by each round of trying to complete a refcount intent item, which eliminates the long chains of deferred items (27dad); and causing long-running transactions to relog their intent log items when space in the log gets low (74f4d). Because this clamp affects /any/ unmapping request regardless of the sharing factors of the component blocks, it degrades the performance of all large unmapping requests -- whereas with an unshared file we can unmap millions of blocks in one go, shared files are limited to unmapping a few thousand blocks at a time, which causes the upper level code to spin in a bunmapi loop even if it wasn't needed. This also eliminates one more place where log recovery behavior can differ from online behavior, because bunmapi operations no longer need to requeue. The fstest generic/447 was created to test the old fix, and it still passes with this applied. Partial-revert-of: e1a4e37cc7b6 ("xfs: try to avoid blowing out the transaction reservation when bunmaping a shared extent") Depends: 27dada070d59 ("xfs: change the order in which child and parent defer ops ar finished") Depends: 74f4d6a1e065 ("xfs: only relog deferred intent items if free space in the log gets low") Signed-off-by: Darrick J. Wong <djwong@kernel.org> Reviewed-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
95 lines
3.4 KiB
C
95 lines
3.4 KiB
C
// SPDX-License-Identifier: GPL-2.0+
|
|
/*
|
|
* Copyright (C) 2016 Oracle. All Rights Reserved.
|
|
* Author: Darrick J. Wong <darrick.wong@oracle.com>
|
|
*/
|
|
#ifndef __XFS_REFCOUNT_H__
|
|
#define __XFS_REFCOUNT_H__
|
|
|
|
struct xfs_trans;
|
|
struct xfs_mount;
|
|
struct xfs_perag;
|
|
struct xfs_btree_cur;
|
|
struct xfs_bmbt_irec;
|
|
struct xfs_refcount_irec;
|
|
|
|
extern int xfs_refcount_lookup_le(struct xfs_btree_cur *cur,
|
|
xfs_agblock_t bno, int *stat);
|
|
extern int xfs_refcount_lookup_ge(struct xfs_btree_cur *cur,
|
|
xfs_agblock_t bno, int *stat);
|
|
extern int xfs_refcount_lookup_eq(struct xfs_btree_cur *cur,
|
|
xfs_agblock_t bno, int *stat);
|
|
extern int xfs_refcount_get_rec(struct xfs_btree_cur *cur,
|
|
struct xfs_refcount_irec *irec, int *stat);
|
|
|
|
enum xfs_refcount_intent_type {
|
|
XFS_REFCOUNT_INCREASE = 1,
|
|
XFS_REFCOUNT_DECREASE,
|
|
XFS_REFCOUNT_ALLOC_COW,
|
|
XFS_REFCOUNT_FREE_COW,
|
|
};
|
|
|
|
struct xfs_refcount_intent {
|
|
struct list_head ri_list;
|
|
enum xfs_refcount_intent_type ri_type;
|
|
xfs_extlen_t ri_blockcount;
|
|
xfs_fsblock_t ri_startblock;
|
|
};
|
|
|
|
void xfs_refcount_increase_extent(struct xfs_trans *tp,
|
|
struct xfs_bmbt_irec *irec);
|
|
void xfs_refcount_decrease_extent(struct xfs_trans *tp,
|
|
struct xfs_bmbt_irec *irec);
|
|
|
|
extern void xfs_refcount_finish_one_cleanup(struct xfs_trans *tp,
|
|
struct xfs_btree_cur *rcur, int error);
|
|
extern int xfs_refcount_finish_one(struct xfs_trans *tp,
|
|
enum xfs_refcount_intent_type type, xfs_fsblock_t startblock,
|
|
xfs_extlen_t blockcount, xfs_fsblock_t *new_fsb,
|
|
xfs_extlen_t *new_len, struct xfs_btree_cur **pcur);
|
|
|
|
extern int xfs_refcount_find_shared(struct xfs_btree_cur *cur,
|
|
xfs_agblock_t agbno, xfs_extlen_t aglen, xfs_agblock_t *fbno,
|
|
xfs_extlen_t *flen, bool find_end_of_shared);
|
|
|
|
void xfs_refcount_alloc_cow_extent(struct xfs_trans *tp, xfs_fsblock_t fsb,
|
|
xfs_extlen_t len);
|
|
void xfs_refcount_free_cow_extent(struct xfs_trans *tp, xfs_fsblock_t fsb,
|
|
xfs_extlen_t len);
|
|
extern int xfs_refcount_recover_cow_leftovers(struct xfs_mount *mp,
|
|
struct xfs_perag *pag);
|
|
|
|
/*
|
|
* While we're adjusting the refcounts records of an extent, we have
|
|
* to keep an eye on the number of extents we're dirtying -- run too
|
|
* many in a single transaction and we'll exceed the transaction's
|
|
* reservation and crash the fs. Each record adds 12 bytes to the
|
|
* log (plus any key updates) so we'll conservatively assume 32 bytes
|
|
* per record. We must also leave space for btree splits on both ends
|
|
* of the range and space for the CUD and a new CUI.
|
|
*
|
|
* Each EFI that we attach to the transaction is assumed to consume ~32 bytes.
|
|
* This is a low estimate for an EFI tracking a single extent (16 bytes for the
|
|
* EFI header, 16 for the extent, and 12 for the xlog op header), but the
|
|
* estimate is acceptable if there's more than one extent being freed.
|
|
* In the worst case of freeing every other block during a refcount decrease
|
|
* operation, we amortize the space used for one EFI log item across 16
|
|
* extents.
|
|
*/
|
|
#define XFS_REFCOUNT_ITEM_OVERHEAD 32
|
|
|
|
extern int xfs_refcount_has_record(struct xfs_btree_cur *cur,
|
|
xfs_agblock_t bno, xfs_extlen_t len, bool *exists);
|
|
union xfs_btree_rec;
|
|
extern void xfs_refcount_btrec_to_irec(const union xfs_btree_rec *rec,
|
|
struct xfs_refcount_irec *irec);
|
|
extern int xfs_refcount_insert(struct xfs_btree_cur *cur,
|
|
struct xfs_refcount_irec *irec, int *stat);
|
|
|
|
extern struct kmem_cache *xfs_refcount_intent_cache;
|
|
|
|
int __init xfs_refcount_intent_init_cache(void);
|
|
void xfs_refcount_intent_destroy_cache(void);
|
|
|
|
#endif /* __XFS_REFCOUNT_H__ */
|