Merge branch 'xfs-4.9-log-recovery-fixes' into for-next
This commit is contained in:
commit
155cd433b5
@ -258,7 +258,7 @@ xfs_alloc_compute_diff(
|
||||
xfs_agblock_t wantbno, /* target starting block */
|
||||
xfs_extlen_t wantlen, /* target length */
|
||||
xfs_extlen_t alignment, /* target alignment */
|
||||
char userdata, /* are we allocating data? */
|
||||
int datatype, /* are we allocating data? */
|
||||
xfs_agblock_t freebno, /* freespace's starting block */
|
||||
xfs_extlen_t freelen, /* freespace's length */
|
||||
xfs_agblock_t *newbnop) /* result: best start block from free */
|
||||
@ -269,6 +269,7 @@ xfs_alloc_compute_diff(
|
||||
xfs_extlen_t newlen1=0; /* length with newbno1 */
|
||||
xfs_extlen_t newlen2=0; /* length with newbno2 */
|
||||
xfs_agblock_t wantend; /* end of target extent */
|
||||
bool userdata = xfs_alloc_is_userdata(datatype);
|
||||
|
||||
ASSERT(freelen >= wantlen);
|
||||
freeend = freebno + freelen;
|
||||
@ -924,7 +925,7 @@ xfs_alloc_find_best_extent(
|
||||
|
||||
sdiff = xfs_alloc_compute_diff(args->agbno, args->len,
|
||||
args->alignment,
|
||||
args->userdata, *sbnoa,
|
||||
args->datatype, *sbnoa,
|
||||
*slena, &new);
|
||||
|
||||
/*
|
||||
@ -1108,7 +1109,7 @@ restart:
|
||||
if (args->len < blen)
|
||||
continue;
|
||||
ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
|
||||
args->alignment, args->userdata, ltbnoa,
|
||||
args->alignment, args->datatype, ltbnoa,
|
||||
ltlena, <new);
|
||||
if (ltnew != NULLAGBLOCK &&
|
||||
(args->len > blen || ltdiff < bdiff)) {
|
||||
@ -1261,7 +1262,7 @@ restart:
|
||||
args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
|
||||
xfs_alloc_fix_len(args);
|
||||
ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
|
||||
args->alignment, args->userdata, ltbnoa,
|
||||
args->alignment, args->datatype, ltbnoa,
|
||||
ltlena, <new);
|
||||
|
||||
error = xfs_alloc_find_best_extent(args,
|
||||
@ -1278,7 +1279,7 @@ restart:
|
||||
args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen);
|
||||
xfs_alloc_fix_len(args);
|
||||
gtdiff = xfs_alloc_compute_diff(args->agbno, args->len,
|
||||
args->alignment, args->userdata, gtbnoa,
|
||||
args->alignment, args->datatype, gtbnoa,
|
||||
gtlena, >new);
|
||||
|
||||
error = xfs_alloc_find_best_extent(args,
|
||||
@ -1338,7 +1339,7 @@ restart:
|
||||
}
|
||||
rlen = args->len;
|
||||
(void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment,
|
||||
args->userdata, ltbnoa, ltlena, <new);
|
||||
args->datatype, ltbnoa, ltlena, <new);
|
||||
ASSERT(ltnew >= ltbno);
|
||||
ASSERT(ltnew + rlen <= ltbnoa + ltlena);
|
||||
ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
|
||||
@ -1617,9 +1618,9 @@ xfs_alloc_ag_vextent_small(
|
||||
goto error0;
|
||||
if (fbno != NULLAGBLOCK) {
|
||||
xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1,
|
||||
args->userdata);
|
||||
xfs_alloc_allow_busy_reuse(args->datatype));
|
||||
|
||||
if (args->userdata) {
|
||||
if (xfs_alloc_is_userdata(args->datatype)) {
|
||||
xfs_buf_t *bp;
|
||||
|
||||
bp = xfs_btree_get_bufs(args->mp, args->tp,
|
||||
@ -2099,7 +2100,7 @@ xfs_alloc_fix_freelist(
|
||||
* somewhere else if we are not being asked to try harder at this
|
||||
* point
|
||||
*/
|
||||
if (pag->pagf_metadata && args->userdata &&
|
||||
if (pag->pagf_metadata && xfs_alloc_is_userdata(args->datatype) &&
|
||||
(flags & XFS_ALLOC_FLAG_TRYLOCK)) {
|
||||
ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
|
||||
goto out_agbp_relse;
|
||||
@ -2675,7 +2676,7 @@ xfs_alloc_vextent(
|
||||
* Try near allocation first, then anywhere-in-ag after
|
||||
* the first a.g. fails.
|
||||
*/
|
||||
if ((args->userdata & XFS_ALLOC_INITIAL_USER_DATA) &&
|
||||
if ((args->datatype & XFS_ALLOC_INITIAL_USER_DATA) &&
|
||||
(mp->m_flags & XFS_MOUNT_32BITINODES)) {
|
||||
args->fsbno = XFS_AGB_TO_FSB(mp,
|
||||
((mp->m_agfrotor / rotorstep) %
|
||||
@ -2808,7 +2809,7 @@ xfs_alloc_vextent(
|
||||
#endif
|
||||
|
||||
/* Zero the extent if we were asked to do so */
|
||||
if (args->userdata & XFS_ALLOC_USERDATA_ZERO) {
|
||||
if (args->datatype & XFS_ALLOC_USERDATA_ZERO) {
|
||||
error = xfs_zero_extent(args->ip, args->fsbno, args->len);
|
||||
if (error)
|
||||
goto error0;
|
||||
|
@ -85,20 +85,33 @@ typedef struct xfs_alloc_arg {
|
||||
xfs_extlen_t len; /* output: actual size of extent */
|
||||
xfs_alloctype_t type; /* allocation type XFS_ALLOCTYPE_... */
|
||||
xfs_alloctype_t otype; /* original allocation type */
|
||||
int datatype; /* mask defining data type treatment */
|
||||
char wasdel; /* set if allocation was prev delayed */
|
||||
char wasfromfl; /* set if allocation is from freelist */
|
||||
char userdata; /* mask defining userdata treatment */
|
||||
xfs_fsblock_t firstblock; /* io first block allocated */
|
||||
struct xfs_owner_info oinfo; /* owner of blocks being allocated */
|
||||
enum xfs_ag_resv_type resv; /* block reservation to use */
|
||||
} xfs_alloc_arg_t;
|
||||
|
||||
/*
|
||||
* Defines for userdata
|
||||
* Defines for datatype
|
||||
*/
|
||||
#define XFS_ALLOC_USERDATA (1 << 0)/* allocation is for user data*/
|
||||
#define XFS_ALLOC_INITIAL_USER_DATA (1 << 1)/* special case start of file */
|
||||
#define XFS_ALLOC_USERDATA_ZERO (1 << 2)/* zero extent on allocation */
|
||||
#define XFS_ALLOC_NOBUSY (1 << 3)/* Busy extents not allowed */
|
||||
|
||||
static inline bool
|
||||
xfs_alloc_is_userdata(int datatype)
|
||||
{
|
||||
return (datatype & ~XFS_ALLOC_NOBUSY) != 0;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
xfs_alloc_allow_busy_reuse(int datatype)
|
||||
{
|
||||
return (datatype & XFS_ALLOC_NOBUSY) == 0;
|
||||
}
|
||||
|
||||
/* freespace limit calculations */
|
||||
#define XFS_ALLOC_AGFL_RESERVE 4
|
||||
|
@ -3348,7 +3348,8 @@ xfs_bmap_adjacent(
|
||||
|
||||
mp = ap->ip->i_mount;
|
||||
nullfb = *ap->firstblock == NULLFSBLOCK;
|
||||
rt = XFS_IS_REALTIME_INODE(ap->ip) && ap->userdata;
|
||||
rt = XFS_IS_REALTIME_INODE(ap->ip) &&
|
||||
xfs_alloc_is_userdata(ap->datatype);
|
||||
fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, *ap->firstblock);
|
||||
/*
|
||||
* If allocating at eof, and there's a previous real block,
|
||||
@ -3624,7 +3625,7 @@ xfs_bmap_btalloc(
|
||||
{
|
||||
xfs_mount_t *mp; /* mount point structure */
|
||||
xfs_alloctype_t atype = 0; /* type for allocation routines */
|
||||
xfs_extlen_t align; /* minimum allocation alignment */
|
||||
xfs_extlen_t align = 0; /* minimum allocation alignment */
|
||||
xfs_agnumber_t fb_agno; /* ag number of ap->firstblock */
|
||||
xfs_agnumber_t ag;
|
||||
xfs_alloc_arg_t args;
|
||||
@ -3647,7 +3648,8 @@ xfs_bmap_btalloc(
|
||||
else if (mp->m_dalign)
|
||||
stripe_align = mp->m_dalign;
|
||||
|
||||
align = ap->userdata ? xfs_get_extsz_hint(ap->ip) : 0;
|
||||
if (xfs_alloc_is_userdata(ap->datatype))
|
||||
align = xfs_get_extsz_hint(ap->ip);
|
||||
if (unlikely(align)) {
|
||||
error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev,
|
||||
align, 0, ap->eof, 0, ap->conv,
|
||||
@ -3660,7 +3662,8 @@ xfs_bmap_btalloc(
|
||||
nullfb = *ap->firstblock == NULLFSBLOCK;
|
||||
fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, *ap->firstblock);
|
||||
if (nullfb) {
|
||||
if (ap->userdata && xfs_inode_is_filestream(ap->ip)) {
|
||||
if (xfs_alloc_is_userdata(ap->datatype) &&
|
||||
xfs_inode_is_filestream(ap->ip)) {
|
||||
ag = xfs_filestream_lookup_ag(ap->ip);
|
||||
ag = (ag != NULLAGNUMBER) ? ag : 0;
|
||||
ap->blkno = XFS_AGB_TO_FSB(mp, ag, 0);
|
||||
@ -3700,7 +3703,8 @@ xfs_bmap_btalloc(
|
||||
* enough for the request. If one isn't found, then adjust
|
||||
* the minimum allocation size to the largest space found.
|
||||
*/
|
||||
if (ap->userdata && xfs_inode_is_filestream(ap->ip))
|
||||
if (xfs_alloc_is_userdata(ap->datatype) &&
|
||||
xfs_inode_is_filestream(ap->ip))
|
||||
error = xfs_bmap_btalloc_filestreams(ap, &args, &blen);
|
||||
else
|
||||
error = xfs_bmap_btalloc_nullfb(ap, &args, &blen);
|
||||
@ -3784,8 +3788,8 @@ xfs_bmap_btalloc(
|
||||
args.minleft = ap->minleft;
|
||||
args.wasdel = ap->wasdel;
|
||||
args.resv = XFS_AG_RESV_NONE;
|
||||
args.userdata = ap->userdata;
|
||||
if (ap->userdata & XFS_ALLOC_USERDATA_ZERO)
|
||||
args.datatype = ap->datatype;
|
||||
if (ap->datatype & XFS_ALLOC_USERDATA_ZERO)
|
||||
args.ip = ap->ip;
|
||||
|
||||
error = xfs_alloc_vextent(&args);
|
||||
@ -3879,7 +3883,8 @@ STATIC int
|
||||
xfs_bmap_alloc(
|
||||
struct xfs_bmalloca *ap) /* bmap alloc argument struct */
|
||||
{
|
||||
if (XFS_IS_REALTIME_INODE(ap->ip) && ap->userdata)
|
||||
if (XFS_IS_REALTIME_INODE(ap->ip) &&
|
||||
xfs_alloc_is_userdata(ap->datatype))
|
||||
return xfs_bmap_rtalloc(ap);
|
||||
return xfs_bmap_btalloc(ap);
|
||||
}
|
||||
@ -4204,15 +4209,21 @@ xfs_bmapi_allocate(
|
||||
}
|
||||
|
||||
/*
|
||||
* Indicate if this is the first user data in the file, or just any
|
||||
* user data. And if it is userdata, indicate whether it needs to
|
||||
* be initialised to zero during allocation.
|
||||
* Set the data type being allocated. For the data fork, the first data
|
||||
* in the file is treated differently to all other allocations. For the
|
||||
* attribute fork, we only need to ensure the allocated range is not on
|
||||
* the busy list.
|
||||
*/
|
||||
if (!(bma->flags & XFS_BMAPI_METADATA)) {
|
||||
bma->userdata = (bma->offset == 0) ?
|
||||
XFS_ALLOC_INITIAL_USER_DATA : XFS_ALLOC_USERDATA;
|
||||
bma->datatype = XFS_ALLOC_NOBUSY;
|
||||
if (whichfork == XFS_DATA_FORK) {
|
||||
if (bma->offset == 0)
|
||||
bma->datatype |= XFS_ALLOC_INITIAL_USER_DATA;
|
||||
else
|
||||
bma->datatype |= XFS_ALLOC_USERDATA;
|
||||
}
|
||||
if (bma->flags & XFS_BMAPI_ZERO)
|
||||
bma->userdata |= XFS_ALLOC_USERDATA_ZERO;
|
||||
bma->datatype |= XFS_ALLOC_USERDATA_ZERO;
|
||||
}
|
||||
|
||||
bma->minlen = (bma->flags & XFS_BMAPI_CONTIG) ? bma->length : 1;
|
||||
@ -4482,7 +4493,7 @@ xfs_bmapi_write(
|
||||
bma.tp = tp;
|
||||
bma.ip = ip;
|
||||
bma.total = total;
|
||||
bma.userdata = 0;
|
||||
bma.datatype = 0;
|
||||
bma.dfops = dfops;
|
||||
bma.firstblock = firstblock;
|
||||
|
||||
|
@ -54,7 +54,7 @@ struct xfs_bmalloca {
|
||||
bool wasdel; /* replacing a delayed allocation */
|
||||
bool aeof; /* allocated space at eof */
|
||||
bool conv; /* overwriting unwritten extents */
|
||||
char userdata;/* userdata mask */
|
||||
int datatype;/* data type being allocated */
|
||||
int flags;
|
||||
};
|
||||
|
||||
|
@ -182,7 +182,7 @@ xfs_bmap_rtalloc(
|
||||
XFS_TRANS_DQ_RTBCOUNT, (long) ralen);
|
||||
|
||||
/* Zero the extent if we were asked to do so */
|
||||
if (ap->userdata & XFS_ALLOC_USERDATA_ZERO) {
|
||||
if (ap->datatype & XFS_ALLOC_USERDATA_ZERO) {
|
||||
error = xfs_zero_extent(ap->ip, ap->blkno, ap->length);
|
||||
if (error)
|
||||
return error;
|
||||
|
@ -384,7 +384,7 @@ restart:
|
||||
* If this is a metadata allocation, try to reuse the busy
|
||||
* extent instead of trimming the allocation.
|
||||
*/
|
||||
if (!args->userdata &&
|
||||
if (!xfs_alloc_is_userdata(args->datatype) &&
|
||||
!(busyp->flags & XFS_EXTENT_BUSY_DISCARDED)) {
|
||||
if (!xfs_extent_busy_update_extent(args->mp, args->pag,
|
||||
busyp, fbno, flen,
|
||||
|
@ -371,7 +371,8 @@ xfs_filestream_new_ag(
|
||||
struct xfs_mount *mp = ip->i_mount;
|
||||
xfs_extlen_t minlen = ap->length;
|
||||
xfs_agnumber_t startag = 0;
|
||||
int flags, err = 0;
|
||||
int flags = 0;
|
||||
int err = 0;
|
||||
struct xfs_mru_cache_elem *mru;
|
||||
|
||||
*agp = NULLAGNUMBER;
|
||||
@ -387,8 +388,10 @@ xfs_filestream_new_ag(
|
||||
startag = (item->ag + 1) % mp->m_sb.sb_agcount;
|
||||
}
|
||||
|
||||
flags = (ap->userdata ? XFS_PICK_USERDATA : 0) |
|
||||
(ap->dfops->dop_low ? XFS_PICK_LOWSPACE : 0);
|
||||
if (xfs_alloc_is_userdata(ap->datatype))
|
||||
flags |= XFS_PICK_USERDATA;
|
||||
if (ap->dfops->dop_low)
|
||||
flags |= XFS_PICK_LOWSPACE;
|
||||
|
||||
err = xfs_filestream_pick_ag(pip, startag, agp, flags, minlen);
|
||||
|
||||
|
@ -413,7 +413,8 @@ struct xlog {
|
||||
/* log record crc error injection factor */
|
||||
uint32_t l_badcrc_factor;
|
||||
#endif
|
||||
|
||||
/* log recovery lsn tracking (for buffer submission */
|
||||
xfs_lsn_t l_recovery_lsn;
|
||||
};
|
||||
|
||||
#define XLOG_BUF_CANCEL_BUCKET(log, blkno) \
|
||||
|
@ -44,6 +44,7 @@
|
||||
#include "xfs_error.h"
|
||||
#include "xfs_dir2.h"
|
||||
#include "xfs_rmap_item.h"
|
||||
#include "xfs_buf_item.h"
|
||||
|
||||
#define BLK_AVG(blk1, blk2) ((blk1+blk2) >> 1)
|
||||
|
||||
@ -381,6 +382,15 @@ xlog_recover_iodone(
|
||||
SHUTDOWN_META_IO_ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* On v5 supers, a bli could be attached to update the metadata LSN.
|
||||
* Clean it up.
|
||||
*/
|
||||
if (bp->b_fspriv)
|
||||
xfs_buf_item_relse(bp);
|
||||
ASSERT(bp->b_fspriv == NULL);
|
||||
|
||||
bp->b_iodone = NULL;
|
||||
xfs_buf_ioend(bp);
|
||||
}
|
||||
@ -2360,12 +2370,14 @@ static void
|
||||
xlog_recover_validate_buf_type(
|
||||
struct xfs_mount *mp,
|
||||
struct xfs_buf *bp,
|
||||
xfs_buf_log_format_t *buf_f)
|
||||
xfs_buf_log_format_t *buf_f,
|
||||
xfs_lsn_t current_lsn)
|
||||
{
|
||||
struct xfs_da_blkinfo *info = bp->b_addr;
|
||||
__uint32_t magic32;
|
||||
__uint16_t magic16;
|
||||
__uint16_t magicda;
|
||||
char *warnmsg = NULL;
|
||||
|
||||
/*
|
||||
* We can only do post recovery validation on items on CRC enabled
|
||||
@ -2404,31 +2416,27 @@ xlog_recover_validate_buf_type(
|
||||
bp->b_ops = &xfs_rmapbt_buf_ops;
|
||||
break;
|
||||
default:
|
||||
xfs_warn(mp, "Bad btree block magic!");
|
||||
ASSERT(0);
|
||||
warnmsg = "Bad btree block magic!";
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case XFS_BLFT_AGF_BUF:
|
||||
if (magic32 != XFS_AGF_MAGIC) {
|
||||
xfs_warn(mp, "Bad AGF block magic!");
|
||||
ASSERT(0);
|
||||
warnmsg = "Bad AGF block magic!";
|
||||
break;
|
||||
}
|
||||
bp->b_ops = &xfs_agf_buf_ops;
|
||||
break;
|
||||
case XFS_BLFT_AGFL_BUF:
|
||||
if (magic32 != XFS_AGFL_MAGIC) {
|
||||
xfs_warn(mp, "Bad AGFL block magic!");
|
||||
ASSERT(0);
|
||||
warnmsg = "Bad AGFL block magic!";
|
||||
break;
|
||||
}
|
||||
bp->b_ops = &xfs_agfl_buf_ops;
|
||||
break;
|
||||
case XFS_BLFT_AGI_BUF:
|
||||
if (magic32 != XFS_AGI_MAGIC) {
|
||||
xfs_warn(mp, "Bad AGI block magic!");
|
||||
ASSERT(0);
|
||||
warnmsg = "Bad AGI block magic!";
|
||||
break;
|
||||
}
|
||||
bp->b_ops = &xfs_agi_buf_ops;
|
||||
@ -2438,8 +2446,7 @@ xlog_recover_validate_buf_type(
|
||||
case XFS_BLFT_GDQUOT_BUF:
|
||||
#ifdef CONFIG_XFS_QUOTA
|
||||
if (magic16 != XFS_DQUOT_MAGIC) {
|
||||
xfs_warn(mp, "Bad DQUOT block magic!");
|
||||
ASSERT(0);
|
||||
warnmsg = "Bad DQUOT block magic!";
|
||||
break;
|
||||
}
|
||||
bp->b_ops = &xfs_dquot_buf_ops;
|
||||
@ -2451,16 +2458,14 @@ xlog_recover_validate_buf_type(
|
||||
break;
|
||||
case XFS_BLFT_DINO_BUF:
|
||||
if (magic16 != XFS_DINODE_MAGIC) {
|
||||
xfs_warn(mp, "Bad INODE block magic!");
|
||||
ASSERT(0);
|
||||
warnmsg = "Bad INODE block magic!";
|
||||
break;
|
||||
}
|
||||
bp->b_ops = &xfs_inode_buf_ops;
|
||||
break;
|
||||
case XFS_BLFT_SYMLINK_BUF:
|
||||
if (magic32 != XFS_SYMLINK_MAGIC) {
|
||||
xfs_warn(mp, "Bad symlink block magic!");
|
||||
ASSERT(0);
|
||||
warnmsg = "Bad symlink block magic!";
|
||||
break;
|
||||
}
|
||||
bp->b_ops = &xfs_symlink_buf_ops;
|
||||
@ -2468,8 +2473,7 @@ xlog_recover_validate_buf_type(
|
||||
case XFS_BLFT_DIR_BLOCK_BUF:
|
||||
if (magic32 != XFS_DIR2_BLOCK_MAGIC &&
|
||||
magic32 != XFS_DIR3_BLOCK_MAGIC) {
|
||||
xfs_warn(mp, "Bad dir block magic!");
|
||||
ASSERT(0);
|
||||
warnmsg = "Bad dir block magic!";
|
||||
break;
|
||||
}
|
||||
bp->b_ops = &xfs_dir3_block_buf_ops;
|
||||
@ -2477,8 +2481,7 @@ xlog_recover_validate_buf_type(
|
||||
case XFS_BLFT_DIR_DATA_BUF:
|
||||
if (magic32 != XFS_DIR2_DATA_MAGIC &&
|
||||
magic32 != XFS_DIR3_DATA_MAGIC) {
|
||||
xfs_warn(mp, "Bad dir data magic!");
|
||||
ASSERT(0);
|
||||
warnmsg = "Bad dir data magic!";
|
||||
break;
|
||||
}
|
||||
bp->b_ops = &xfs_dir3_data_buf_ops;
|
||||
@ -2486,8 +2489,7 @@ xlog_recover_validate_buf_type(
|
||||
case XFS_BLFT_DIR_FREE_BUF:
|
||||
if (magic32 != XFS_DIR2_FREE_MAGIC &&
|
||||
magic32 != XFS_DIR3_FREE_MAGIC) {
|
||||
xfs_warn(mp, "Bad dir3 free magic!");
|
||||
ASSERT(0);
|
||||
warnmsg = "Bad dir3 free magic!";
|
||||
break;
|
||||
}
|
||||
bp->b_ops = &xfs_dir3_free_buf_ops;
|
||||
@ -2495,8 +2497,7 @@ xlog_recover_validate_buf_type(
|
||||
case XFS_BLFT_DIR_LEAF1_BUF:
|
||||
if (magicda != XFS_DIR2_LEAF1_MAGIC &&
|
||||
magicda != XFS_DIR3_LEAF1_MAGIC) {
|
||||
xfs_warn(mp, "Bad dir leaf1 magic!");
|
||||
ASSERT(0);
|
||||
warnmsg = "Bad dir leaf1 magic!";
|
||||
break;
|
||||
}
|
||||
bp->b_ops = &xfs_dir3_leaf1_buf_ops;
|
||||
@ -2504,8 +2505,7 @@ xlog_recover_validate_buf_type(
|
||||
case XFS_BLFT_DIR_LEAFN_BUF:
|
||||
if (magicda != XFS_DIR2_LEAFN_MAGIC &&
|
||||
magicda != XFS_DIR3_LEAFN_MAGIC) {
|
||||
xfs_warn(mp, "Bad dir leafn magic!");
|
||||
ASSERT(0);
|
||||
warnmsg = "Bad dir leafn magic!";
|
||||
break;
|
||||
}
|
||||
bp->b_ops = &xfs_dir3_leafn_buf_ops;
|
||||
@ -2513,8 +2513,7 @@ xlog_recover_validate_buf_type(
|
||||
case XFS_BLFT_DA_NODE_BUF:
|
||||
if (magicda != XFS_DA_NODE_MAGIC &&
|
||||
magicda != XFS_DA3_NODE_MAGIC) {
|
||||
xfs_warn(mp, "Bad da node magic!");
|
||||
ASSERT(0);
|
||||
warnmsg = "Bad da node magic!";
|
||||
break;
|
||||
}
|
||||
bp->b_ops = &xfs_da3_node_buf_ops;
|
||||
@ -2522,24 +2521,21 @@ xlog_recover_validate_buf_type(
|
||||
case XFS_BLFT_ATTR_LEAF_BUF:
|
||||
if (magicda != XFS_ATTR_LEAF_MAGIC &&
|
||||
magicda != XFS_ATTR3_LEAF_MAGIC) {
|
||||
xfs_warn(mp, "Bad attr leaf magic!");
|
||||
ASSERT(0);
|
||||
warnmsg = "Bad attr leaf magic!";
|
||||
break;
|
||||
}
|
||||
bp->b_ops = &xfs_attr3_leaf_buf_ops;
|
||||
break;
|
||||
case XFS_BLFT_ATTR_RMT_BUF:
|
||||
if (magic32 != XFS_ATTR3_RMT_MAGIC) {
|
||||
xfs_warn(mp, "Bad attr remote magic!");
|
||||
ASSERT(0);
|
||||
warnmsg = "Bad attr remote magic!";
|
||||
break;
|
||||
}
|
||||
bp->b_ops = &xfs_attr3_rmt_buf_ops;
|
||||
break;
|
||||
case XFS_BLFT_SB_BUF:
|
||||
if (magic32 != XFS_SB_MAGIC) {
|
||||
xfs_warn(mp, "Bad SB block magic!");
|
||||
ASSERT(0);
|
||||
warnmsg = "Bad SB block magic!";
|
||||
break;
|
||||
}
|
||||
bp->b_ops = &xfs_sb_buf_ops;
|
||||
@ -2556,6 +2552,40 @@ xlog_recover_validate_buf_type(
|
||||
xfs_blft_from_flags(buf_f));
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* Nothing else to do in the case of a NULL current LSN as this means
|
||||
* the buffer is more recent than the change in the log and will be
|
||||
* skipped.
|
||||
*/
|
||||
if (current_lsn == NULLCOMMITLSN)
|
||||
return;
|
||||
|
||||
if (warnmsg) {
|
||||
xfs_warn(mp, warnmsg);
|
||||
ASSERT(0);
|
||||
}
|
||||
|
||||
/*
|
||||
* We must update the metadata LSN of the buffer as it is written out to
|
||||
* ensure that older transactions never replay over this one and corrupt
|
||||
* the buffer. This can occur if log recovery is interrupted at some
|
||||
* point after the current transaction completes, at which point a
|
||||
* subsequent mount starts recovery from the beginning.
|
||||
*
|
||||
* Write verifiers update the metadata LSN from log items attached to
|
||||
* the buffer. Therefore, initialize a bli purely to carry the LSN to
|
||||
* the verifier. We'll clean it up in our ->iodone() callback.
|
||||
*/
|
||||
if (bp->b_ops) {
|
||||
struct xfs_buf_log_item *bip;
|
||||
|
||||
ASSERT(!bp->b_iodone || bp->b_iodone == xlog_recover_iodone);
|
||||
bp->b_iodone = xlog_recover_iodone;
|
||||
xfs_buf_item_init(bp, mp);
|
||||
bip = bp->b_fspriv;
|
||||
bip->bli_item.li_lsn = current_lsn;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@ -2569,7 +2599,8 @@ xlog_recover_do_reg_buffer(
|
||||
struct xfs_mount *mp,
|
||||
xlog_recover_item_t *item,
|
||||
struct xfs_buf *bp,
|
||||
xfs_buf_log_format_t *buf_f)
|
||||
xfs_buf_log_format_t *buf_f,
|
||||
xfs_lsn_t current_lsn)
|
||||
{
|
||||
int i;
|
||||
int bit;
|
||||
@ -2642,7 +2673,7 @@ xlog_recover_do_reg_buffer(
|
||||
/* Shouldn't be any more regions */
|
||||
ASSERT(i == item->ri_total);
|
||||
|
||||
xlog_recover_validate_buf_type(mp, bp, buf_f);
|
||||
xlog_recover_validate_buf_type(mp, bp, buf_f, current_lsn);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -2685,7 +2716,7 @@ xlog_recover_do_dquot_buffer(
|
||||
if (log->l_quotaoffs_flag & type)
|
||||
return false;
|
||||
|
||||
xlog_recover_do_reg_buffer(mp, item, bp, buf_f);
|
||||
xlog_recover_do_reg_buffer(mp, item, bp, buf_f, NULLCOMMITLSN);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -2773,7 +2804,8 @@ xlog_recover_buffer_pass2(
|
||||
*/
|
||||
lsn = xlog_recover_get_buf_lsn(mp, bp);
|
||||
if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) {
|
||||
xlog_recover_validate_buf_type(mp, bp, buf_f);
|
||||
trace_xfs_log_recover_buf_skip(log, buf_f);
|
||||
xlog_recover_validate_buf_type(mp, bp, buf_f, NULLCOMMITLSN);
|
||||
goto out_release;
|
||||
}
|
||||
|
||||
@ -2789,7 +2821,7 @@ xlog_recover_buffer_pass2(
|
||||
if (!dirty)
|
||||
goto out_release;
|
||||
} else {
|
||||
xlog_recover_do_reg_buffer(mp, item, bp, buf_f);
|
||||
xlog_recover_do_reg_buffer(mp, item, bp, buf_f, current_lsn);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -3846,14 +3878,13 @@ STATIC int
|
||||
xlog_recover_commit_trans(
|
||||
struct xlog *log,
|
||||
struct xlog_recover *trans,
|
||||
int pass)
|
||||
int pass,
|
||||
struct list_head *buffer_list)
|
||||
{
|
||||
int error = 0;
|
||||
int error2;
|
||||
int items_queued = 0;
|
||||
struct xlog_recover_item *item;
|
||||
struct xlog_recover_item *next;
|
||||
LIST_HEAD (buffer_list);
|
||||
LIST_HEAD (ra_list);
|
||||
LIST_HEAD (done_list);
|
||||
|
||||
@ -3876,7 +3907,7 @@ xlog_recover_commit_trans(
|
||||
items_queued++;
|
||||
if (items_queued >= XLOG_RECOVER_COMMIT_QUEUE_MAX) {
|
||||
error = xlog_recover_items_pass2(log, trans,
|
||||
&buffer_list, &ra_list);
|
||||
buffer_list, &ra_list);
|
||||
list_splice_tail_init(&ra_list, &done_list);
|
||||
items_queued = 0;
|
||||
}
|
||||
@ -3894,15 +3925,14 @@ out:
|
||||
if (!list_empty(&ra_list)) {
|
||||
if (!error)
|
||||
error = xlog_recover_items_pass2(log, trans,
|
||||
&buffer_list, &ra_list);
|
||||
buffer_list, &ra_list);
|
||||
list_splice_tail_init(&ra_list, &done_list);
|
||||
}
|
||||
|
||||
if (!list_empty(&done_list))
|
||||
list_splice_init(&done_list, &trans->r_itemq);
|
||||
|
||||
error2 = xfs_buf_delwri_submit(&buffer_list);
|
||||
return error ? error : error2;
|
||||
return error;
|
||||
}
|
||||
|
||||
STATIC void
|
||||
@ -4085,7 +4115,8 @@ xlog_recovery_process_trans(
|
||||
char *dp,
|
||||
unsigned int len,
|
||||
unsigned int flags,
|
||||
int pass)
|
||||
int pass,
|
||||
struct list_head *buffer_list)
|
||||
{
|
||||
int error = 0;
|
||||
bool freeit = false;
|
||||
@ -4109,7 +4140,8 @@ xlog_recovery_process_trans(
|
||||
error = xlog_recover_add_to_cont_trans(log, trans, dp, len);
|
||||
break;
|
||||
case XLOG_COMMIT_TRANS:
|
||||
error = xlog_recover_commit_trans(log, trans, pass);
|
||||
error = xlog_recover_commit_trans(log, trans, pass,
|
||||
buffer_list);
|
||||
/* success or fail, we are now done with this transaction. */
|
||||
freeit = true;
|
||||
break;
|
||||
@ -4191,10 +4223,12 @@ xlog_recover_process_ophdr(
|
||||
struct xlog_op_header *ohead,
|
||||
char *dp,
|
||||
char *end,
|
||||
int pass)
|
||||
int pass,
|
||||
struct list_head *buffer_list)
|
||||
{
|
||||
struct xlog_recover *trans;
|
||||
unsigned int len;
|
||||
int error;
|
||||
|
||||
/* Do we understand who wrote this op? */
|
||||
if (ohead->oh_clientid != XFS_TRANSACTION &&
|
||||
@ -4221,8 +4255,39 @@ xlog_recover_process_ophdr(
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* The recovered buffer queue is drained only once we know that all
|
||||
* recovery items for the current LSN have been processed. This is
|
||||
* required because:
|
||||
*
|
||||
* - Buffer write submission updates the metadata LSN of the buffer.
|
||||
* - Log recovery skips items with a metadata LSN >= the current LSN of
|
||||
* the recovery item.
|
||||
* - Separate recovery items against the same metadata buffer can share
|
||||
* a current LSN. I.e., consider that the LSN of a recovery item is
|
||||
* defined as the starting LSN of the first record in which its
|
||||
* transaction appears, that a record can hold multiple transactions,
|
||||
* and/or that a transaction can span multiple records.
|
||||
*
|
||||
* In other words, we are allowed to submit a buffer from log recovery
|
||||
* once per current LSN. Otherwise, we may incorrectly skip recovery
|
||||
* items and cause corruption.
|
||||
*
|
||||
* We don't know up front whether buffers are updated multiple times per
|
||||
* LSN. Therefore, track the current LSN of each commit log record as it
|
||||
* is processed and drain the queue when it changes. Use commit records
|
||||
* because they are ordered correctly by the logging code.
|
||||
*/
|
||||
if (log->l_recovery_lsn != trans->r_lsn &&
|
||||
ohead->oh_flags & XLOG_COMMIT_TRANS) {
|
||||
error = xfs_buf_delwri_submit(buffer_list);
|
||||
if (error)
|
||||
return error;
|
||||
log->l_recovery_lsn = trans->r_lsn;
|
||||
}
|
||||
|
||||
return xlog_recovery_process_trans(log, trans, dp, len,
|
||||
ohead->oh_flags, pass);
|
||||
ohead->oh_flags, pass, buffer_list);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -4240,7 +4305,8 @@ xlog_recover_process_data(
|
||||
struct hlist_head rhash[],
|
||||
struct xlog_rec_header *rhead,
|
||||
char *dp,
|
||||
int pass)
|
||||
int pass,
|
||||
struct list_head *buffer_list)
|
||||
{
|
||||
struct xlog_op_header *ohead;
|
||||
char *end;
|
||||
@ -4254,6 +4320,7 @@ xlog_recover_process_data(
|
||||
if (xlog_header_check_recover(log->l_mp, rhead))
|
||||
return -EIO;
|
||||
|
||||
trace_xfs_log_recover_record(log, rhead, pass);
|
||||
while ((dp < end) && num_logops) {
|
||||
|
||||
ohead = (struct xlog_op_header *)dp;
|
||||
@ -4262,7 +4329,7 @@ xlog_recover_process_data(
|
||||
|
||||
/* errors will abort recovery */
|
||||
error = xlog_recover_process_ophdr(log, rhash, rhead, ohead,
|
||||
dp, end, pass);
|
||||
dp, end, pass, buffer_list);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
@ -4685,7 +4752,8 @@ xlog_recover_process(
|
||||
struct hlist_head rhash[],
|
||||
struct xlog_rec_header *rhead,
|
||||
char *dp,
|
||||
int pass)
|
||||
int pass,
|
||||
struct list_head *buffer_list)
|
||||
{
|
||||
int error;
|
||||
__le32 crc;
|
||||
@ -4732,7 +4800,8 @@ xlog_recover_process(
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
return xlog_recover_process_data(log, rhash, rhead, dp, pass);
|
||||
return xlog_recover_process_data(log, rhash, rhead, dp, pass,
|
||||
buffer_list);
|
||||
}
|
||||
|
||||
STATIC int
|
||||
@ -4793,9 +4862,11 @@ xlog_do_recovery_pass(
|
||||
char *offset;
|
||||
xfs_buf_t *hbp, *dbp;
|
||||
int error = 0, h_size, h_len;
|
||||
int error2 = 0;
|
||||
int bblks, split_bblks;
|
||||
int hblks, split_hblks, wrapped_hblks;
|
||||
struct hlist_head rhash[XLOG_RHASH_SIZE];
|
||||
LIST_HEAD (buffer_list);
|
||||
|
||||
ASSERT(head_blk != tail_blk);
|
||||
rhead_blk = 0;
|
||||
@ -4981,7 +5052,7 @@ xlog_do_recovery_pass(
|
||||
}
|
||||
|
||||
error = xlog_recover_process(log, rhash, rhead, offset,
|
||||
pass);
|
||||
pass, &buffer_list);
|
||||
if (error)
|
||||
goto bread_err2;
|
||||
|
||||
@ -5012,7 +5083,8 @@ xlog_do_recovery_pass(
|
||||
if (error)
|
||||
goto bread_err2;
|
||||
|
||||
error = xlog_recover_process(log, rhash, rhead, offset, pass);
|
||||
error = xlog_recover_process(log, rhash, rhead, offset, pass,
|
||||
&buffer_list);
|
||||
if (error)
|
||||
goto bread_err2;
|
||||
|
||||
@ -5025,10 +5097,17 @@ xlog_do_recovery_pass(
|
||||
bread_err1:
|
||||
xlog_put_bp(hbp);
|
||||
|
||||
/*
|
||||
* Submit buffers that have been added from the last record processed,
|
||||
* regardless of error status.
|
||||
*/
|
||||
if (!list_empty(&buffer_list))
|
||||
error2 = xfs_buf_delwri_submit(&buffer_list);
|
||||
|
||||
if (error && first_bad)
|
||||
*first_bad = rhead_blk;
|
||||
|
||||
return error;
|
||||
return error ? error : error2;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -933,6 +933,20 @@ xfs_mountfs(
|
||||
goto out_rtunmount;
|
||||
}
|
||||
|
||||
/*
|
||||
* Now the log is fully replayed, we can transition to full read-only
|
||||
* mode for read-only mounts. This will sync all the metadata and clean
|
||||
* the log so that the recovery we just performed does not have to be
|
||||
* replayed again on the next mount.
|
||||
*
|
||||
* We use the same quiesce mechanism as the rw->ro remount, as they are
|
||||
* semantically identical operations.
|
||||
*/
|
||||
if ((mp->m_flags & (XFS_MOUNT_RDONLY|XFS_MOUNT_NORECOVERY)) ==
|
||||
XFS_MOUNT_RDONLY) {
|
||||
xfs_quiesce_attr(mp);
|
||||
}
|
||||
|
||||
/*
|
||||
* Complete the quota initialisation, post-log-replay component.
|
||||
*/
|
||||
|
@ -1137,7 +1137,7 @@ xfs_restore_resvblks(struct xfs_mount *mp)
|
||||
* Note: xfs_log_quiesce() stops background log work - the callers must ensure
|
||||
* it is started again when appropriate.
|
||||
*/
|
||||
static void
|
||||
void
|
||||
xfs_quiesce_attr(
|
||||
struct xfs_mount *mp)
|
||||
{
|
||||
|
@ -61,6 +61,7 @@ struct xfs_mount;
|
||||
struct xfs_buftarg;
|
||||
struct block_device;
|
||||
|
||||
extern void xfs_quiesce_attr(struct xfs_mount *mp);
|
||||
extern void xfs_flush_inodes(struct xfs_mount *mp);
|
||||
extern void xfs_blkdev_issue_flush(struct xfs_buftarg *);
|
||||
extern xfs_agnumber_t xfs_set_inode_alloc(struct xfs_mount *,
|
||||
|
@ -1624,7 +1624,7 @@ DECLARE_EVENT_CLASS(xfs_alloc_class,
|
||||
__field(char, wasdel)
|
||||
__field(char, wasfromfl)
|
||||
__field(int, resv)
|
||||
__field(char, userdata)
|
||||
__field(int, datatype)
|
||||
__field(xfs_fsblock_t, firstblock)
|
||||
),
|
||||
TP_fast_assign(
|
||||
@ -1645,13 +1645,13 @@ DECLARE_EVENT_CLASS(xfs_alloc_class,
|
||||
__entry->wasdel = args->wasdel;
|
||||
__entry->wasfromfl = args->wasfromfl;
|
||||
__entry->resv = args->resv;
|
||||
__entry->userdata = args->userdata;
|
||||
__entry->datatype = args->datatype;
|
||||
__entry->firstblock = args->firstblock;
|
||||
),
|
||||
TP_printk("dev %d:%d agno %u agbno %u minlen %u maxlen %u mod %u "
|
||||
"prod %u minleft %u total %u alignment %u minalignslop %u "
|
||||
"len %u type %s otype %s wasdel %d wasfromfl %d resv %d "
|
||||
"userdata %d firstblock 0x%llx",
|
||||
"datatype 0x%x firstblock 0x%llx",
|
||||
MAJOR(__entry->dev), MINOR(__entry->dev),
|
||||
__entry->agno,
|
||||
__entry->agbno,
|
||||
@ -1669,7 +1669,7 @@ DECLARE_EVENT_CLASS(xfs_alloc_class,
|
||||
__entry->wasdel,
|
||||
__entry->wasfromfl,
|
||||
__entry->resv,
|
||||
__entry->userdata,
|
||||
__entry->datatype,
|
||||
(unsigned long long)__entry->firstblock)
|
||||
)
|
||||
|
||||
@ -1985,6 +1985,29 @@ DEFINE_EVENT(xfs_swap_extent_class, name, \
|
||||
DEFINE_SWAPEXT_EVENT(xfs_swap_extent_before);
|
||||
DEFINE_SWAPEXT_EVENT(xfs_swap_extent_after);
|
||||
|
||||
TRACE_EVENT(xfs_log_recover_record,
|
||||
TP_PROTO(struct xlog *log, struct xlog_rec_header *rhead, int pass),
|
||||
TP_ARGS(log, rhead, pass),
|
||||
TP_STRUCT__entry(
|
||||
__field(dev_t, dev)
|
||||
__field(xfs_lsn_t, lsn)
|
||||
__field(int, len)
|
||||
__field(int, num_logops)
|
||||
__field(int, pass)
|
||||
),
|
||||
TP_fast_assign(
|
||||
__entry->dev = log->l_mp->m_super->s_dev;
|
||||
__entry->lsn = be64_to_cpu(rhead->h_lsn);
|
||||
__entry->len = be32_to_cpu(rhead->h_len);
|
||||
__entry->num_logops = be32_to_cpu(rhead->h_num_logops);
|
||||
__entry->pass = pass;
|
||||
),
|
||||
TP_printk("dev %d:%d lsn 0x%llx len 0x%x num_logops 0x%x pass %d",
|
||||
MAJOR(__entry->dev), MINOR(__entry->dev),
|
||||
__entry->lsn, __entry->len, __entry->num_logops,
|
||||
__entry->pass)
|
||||
)
|
||||
|
||||
DECLARE_EVENT_CLASS(xfs_log_recover_item_class,
|
||||
TP_PROTO(struct xlog *log, struct xlog_recover *trans,
|
||||
struct xlog_recover_item *item, int pass),
|
||||
@ -1993,6 +2016,7 @@ DECLARE_EVENT_CLASS(xfs_log_recover_item_class,
|
||||
__field(dev_t, dev)
|
||||
__field(unsigned long, item)
|
||||
__field(xlog_tid_t, tid)
|
||||
__field(xfs_lsn_t, lsn)
|
||||
__field(int, type)
|
||||
__field(int, pass)
|
||||
__field(int, count)
|
||||
@ -2002,15 +2026,17 @@ DECLARE_EVENT_CLASS(xfs_log_recover_item_class,
|
||||
__entry->dev = log->l_mp->m_super->s_dev;
|
||||
__entry->item = (unsigned long)item;
|
||||
__entry->tid = trans->r_log_tid;
|
||||
__entry->lsn = trans->r_lsn;
|
||||
__entry->type = ITEM_TYPE(item);
|
||||
__entry->pass = pass;
|
||||
__entry->count = item->ri_cnt;
|
||||
__entry->total = item->ri_total;
|
||||
),
|
||||
TP_printk("dev %d:%d trans 0x%x, pass %d, item 0x%p, item type %s "
|
||||
"item region count/total %d/%d",
|
||||
TP_printk("dev %d:%d tid 0x%x lsn 0x%llx, pass %d, item 0x%p, "
|
||||
"item type %s item region count/total %d/%d",
|
||||
MAJOR(__entry->dev), MINOR(__entry->dev),
|
||||
__entry->tid,
|
||||
__entry->lsn,
|
||||
__entry->pass,
|
||||
(void *)__entry->item,
|
||||
__print_symbolic(__entry->type, XFS_LI_TYPE_DESC),
|
||||
@ -2069,6 +2095,7 @@ DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel);
|
||||
DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel_add);
|
||||
DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel_ref_inc);
|
||||
DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_recover);
|
||||
DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_skip);
|
||||
DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_inode_buf);
|
||||
DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_reg_buf);
|
||||
DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_dquot_buf);
|
||||
|
Loading…
Reference in New Issue
Block a user