5a605fd6cb
I've been observing periodic corruption reports from xfs_scrub involving the free rt extent counter (frextents) while running xfs/141. That test uses an error injection knob to induce a torn write to the log, and an arbitrary number of recovery mounts, frextents will count fewer free rt extents than can be found the rtbitmap. The root cause of the problem is a combination of the misuse of sb_frextents in the incore mount to reflect both incore reservations made by running transactions as well as the actual count of free rt extents on disk. The following sequence can reproduce the undercount: Thread 1 Thread 2 xfs_trans_alloc(rtextents=3) xfs_mod_frextents(-3) <blocks> xfs_attr_set() xfs_bmap_attr_addfork() xfs_add_attr2() xfs_log_sb() xfs_sb_to_disk() xfs_trans_commit() <log flushed to disk> <log goes down> Note that thread 1 subtracts 3 from sb_frextents even though it never commits to using that space. Thread 2 writes the undercounted value to the ondisk superblock and logs it to the xattr transaction, which is then flushed to disk. At next mount, log recovery will find the logged superblock and write that back into the filesystem. At the end of log recovery, we reread the superblock and install the recovered undercounted frextents value into the incore superblock. From that point on, we've effectively leaked thread 1's transaction reservation. The correct fix for this is to separate the incore reservation from the ondisk usage, but that's a matter for the next patch. Because the kernel has been logging superblocks with undercounted frextents for a very long time and we don't demand that sysadmins run xfs_repair after a crash, fix the undercount by recomputing frextents after log recovery. Gating this on log recovery is a reasonable balance (I think) between correcting the problem and slowing down every mount attempt. Note that xfs_repair will fix undercounted frextents. Signed-off-by: Darrick J. Wong <djwong@kernel.org> Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
165 lines
5.6 KiB
C
165 lines
5.6 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
|
|
* All Rights Reserved.
|
|
*/
|
|
#ifndef __XFS_RTALLOC_H__
|
|
#define __XFS_RTALLOC_H__
|
|
|
|
/* kernel only definitions and functions */
|
|
|
|
struct xfs_mount;
|
|
struct xfs_trans;
|
|
|
|
/*
|
|
* XXX: Most of the realtime allocation functions deal in units of realtime
|
|
* extents, not realtime blocks. This looks funny when paired with the type
|
|
* name and screams for a larger cleanup.
|
|
*/
|
|
struct xfs_rtalloc_rec {
|
|
xfs_rtblock_t ar_startext;
|
|
xfs_rtblock_t ar_extcount;
|
|
};
|
|
|
|
typedef int (*xfs_rtalloc_query_range_fn)(
|
|
struct xfs_mount *mp,
|
|
struct xfs_trans *tp,
|
|
const struct xfs_rtalloc_rec *rec,
|
|
void *priv);
|
|
|
|
#ifdef CONFIG_XFS_RT
|
|
/*
|
|
* Function prototypes for exported functions.
|
|
*/
|
|
|
|
/*
|
|
* Allocate an extent in the realtime subvolume, with the usual allocation
|
|
* parameters. The length units are all in realtime extents, as is the
|
|
* result block number.
|
|
*/
|
|
int /* error */
|
|
xfs_rtallocate_extent(
|
|
struct xfs_trans *tp, /* transaction pointer */
|
|
xfs_rtblock_t bno, /* starting block number to allocate */
|
|
xfs_extlen_t minlen, /* minimum length to allocate */
|
|
xfs_extlen_t maxlen, /* maximum length to allocate */
|
|
xfs_extlen_t *len, /* out: actual length allocated */
|
|
int wasdel, /* was a delayed allocation extent */
|
|
xfs_extlen_t prod, /* extent product factor */
|
|
xfs_rtblock_t *rtblock); /* out: start block allocated */
|
|
|
|
/*
|
|
* Free an extent in the realtime subvolume. Length is expressed in
|
|
* realtime extents, as is the block number.
|
|
*/
|
|
int /* error */
|
|
xfs_rtfree_extent(
|
|
struct xfs_trans *tp, /* transaction pointer */
|
|
xfs_rtblock_t bno, /* starting block number to free */
|
|
xfs_extlen_t len); /* length of extent freed */
|
|
|
|
/*
|
|
* Initialize realtime fields in the mount structure.
|
|
*/
|
|
int /* error */
|
|
xfs_rtmount_init(
|
|
struct xfs_mount *mp); /* file system mount structure */
|
|
void
|
|
xfs_rtunmount_inodes(
|
|
struct xfs_mount *mp);
|
|
|
|
/*
|
|
* Get the bitmap and summary inodes into the mount structure
|
|
* at mount time.
|
|
*/
|
|
int /* error */
|
|
xfs_rtmount_inodes(
|
|
struct xfs_mount *mp); /* file system mount structure */
|
|
|
|
/*
|
|
* Pick an extent for allocation at the start of a new realtime file.
|
|
* Use the sequence number stored in the atime field of the bitmap inode.
|
|
* Translate this to a fraction of the rtextents, and return the product
|
|
* of rtextents and the fraction.
|
|
* The fraction sequence is 0, 1/2, 1/4, 3/4, 1/8, ..., 7/8, 1/16, ...
|
|
*/
|
|
int /* error */
|
|
xfs_rtpick_extent(
|
|
struct xfs_mount *mp, /* file system mount point */
|
|
struct xfs_trans *tp, /* transaction pointer */
|
|
xfs_extlen_t len, /* allocation length (rtextents) */
|
|
xfs_rtblock_t *pick); /* result rt extent */
|
|
|
|
/*
|
|
* Grow the realtime area of the filesystem.
|
|
*/
|
|
int
|
|
xfs_growfs_rt(
|
|
struct xfs_mount *mp, /* file system mount structure */
|
|
xfs_growfs_rt_t *in); /* user supplied growfs struct */
|
|
|
|
/*
|
|
* From xfs_rtbitmap.c
|
|
*/
|
|
int xfs_rtbuf_get(struct xfs_mount *mp, struct xfs_trans *tp,
|
|
xfs_rtblock_t block, int issum, struct xfs_buf **bpp);
|
|
int xfs_rtcheck_range(struct xfs_mount *mp, struct xfs_trans *tp,
|
|
xfs_rtblock_t start, xfs_extlen_t len, int val,
|
|
xfs_rtblock_t *new, int *stat);
|
|
int xfs_rtfind_back(struct xfs_mount *mp, struct xfs_trans *tp,
|
|
xfs_rtblock_t start, xfs_rtblock_t limit,
|
|
xfs_rtblock_t *rtblock);
|
|
int xfs_rtfind_forw(struct xfs_mount *mp, struct xfs_trans *tp,
|
|
xfs_rtblock_t start, xfs_rtblock_t limit,
|
|
xfs_rtblock_t *rtblock);
|
|
int xfs_rtmodify_range(struct xfs_mount *mp, struct xfs_trans *tp,
|
|
xfs_rtblock_t start, xfs_extlen_t len, int val);
|
|
int xfs_rtmodify_summary_int(struct xfs_mount *mp, struct xfs_trans *tp,
|
|
int log, xfs_rtblock_t bbno, int delta,
|
|
struct xfs_buf **rbpp, xfs_fsblock_t *rsb,
|
|
xfs_suminfo_t *sum);
|
|
int xfs_rtmodify_summary(struct xfs_mount *mp, struct xfs_trans *tp, int log,
|
|
xfs_rtblock_t bbno, int delta, struct xfs_buf **rbpp,
|
|
xfs_fsblock_t *rsb);
|
|
int xfs_rtfree_range(struct xfs_mount *mp, struct xfs_trans *tp,
|
|
xfs_rtblock_t start, xfs_extlen_t len,
|
|
struct xfs_buf **rbpp, xfs_fsblock_t *rsb);
|
|
int xfs_rtalloc_query_range(struct xfs_mount *mp, struct xfs_trans *tp,
|
|
const struct xfs_rtalloc_rec *low_rec,
|
|
const struct xfs_rtalloc_rec *high_rec,
|
|
xfs_rtalloc_query_range_fn fn, void *priv);
|
|
int xfs_rtalloc_query_all(struct xfs_mount *mp, struct xfs_trans *tp,
|
|
xfs_rtalloc_query_range_fn fn,
|
|
void *priv);
|
|
bool xfs_verify_rtbno(struct xfs_mount *mp, xfs_rtblock_t rtbno);
|
|
int xfs_rtalloc_extent_is_free(struct xfs_mount *mp, struct xfs_trans *tp,
|
|
xfs_rtblock_t start, xfs_extlen_t len,
|
|
bool *is_free);
|
|
int xfs_rtalloc_reinit_frextents(struct xfs_mount *mp);
|
|
#else
|
|
# define xfs_rtallocate_extent(t,b,min,max,l,f,p,rb) (ENOSYS)
|
|
# define xfs_rtfree_extent(t,b,l) (ENOSYS)
|
|
# define xfs_rtpick_extent(m,t,l,rb) (ENOSYS)
|
|
# define xfs_growfs_rt(mp,in) (ENOSYS)
|
|
# define xfs_rtalloc_query_range(t,l,h,f,p) (ENOSYS)
|
|
# define xfs_rtalloc_query_all(m,t,f,p) (ENOSYS)
|
|
# define xfs_rtbuf_get(m,t,b,i,p) (ENOSYS)
|
|
# define xfs_verify_rtbno(m, r) (false)
|
|
# define xfs_rtalloc_extent_is_free(m,t,s,l,i) (ENOSYS)
|
|
# define xfs_rtalloc_reinit_frextents(m) (0)
|
|
static inline int /* error */
|
|
xfs_rtmount_init(
|
|
xfs_mount_t *mp) /* file system mount structure */
|
|
{
|
|
if (mp->m_sb.sb_rblocks == 0)
|
|
return 0;
|
|
|
|
xfs_warn(mp, "Not built with CONFIG_XFS_RT");
|
|
return -ENOSYS;
|
|
}
|
|
# define xfs_rtmount_inodes(m) (((mp)->m_sb.sb_rblocks == 0)? 0 : (ENOSYS))
|
|
# define xfs_rtunmount_inodes(m)
|
|
#endif /* CONFIG_XFS_RT */
|
|
|
|
#endif /* __XFS_RTALLOC_H__ */
|