xfs: update #2 for v3.12-rc1
Here we have defrag support for v5 superblock, a number of bugfixes and a cleanup or two. - defrag support for CRC filesystems - fix endian worning in xlog_recover_get_buf_lsn - fixes for sparse warnings - fix for assert in xfs_dir3_leaf_hdr_from_disk - fix for log recovery of remote symlinks - fix for log recovery of btree root splits - fixes formemory allocation failures with ACLs - fix for assert in xfs_buf_item_relse - fix for assert in xfs_inode_buf_verify - fix an assignment in an assert that should be a test in xfs_bmbt_change_owner - remove dead code in xlog_recover_inode_pass2 -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.10 (GNU/Linux) iQIcBAABAgAGBQJSMjQUAAoJENaLyazVq6ZOu2IP/1OHZYy+Bkmj0tO9pdsdEa4s w4FEBPsQePMJPjwdN693rKpW1exZue5sUmPMErH3ENzc2DPAwpUAlc9XAIohtdFx rTqrz2q+qTfZTq8oYBIA/RCOifJ2cHWN8tDYZPJpp5wceV7CRGYQeR1foiudE3ZH QDIPXioy8P9IkfGaXCtr/iWf9kycMO2lgNTNfdL6qtwX99HCqHZanTlsWx1BIYGQ Fa5TaOsXis6idPMCFMuEC15iEwA+YXc0HmXuHkMFLj+9mwFc4h/Aq65bwUkYZLmy +T1Wo/uQ/21rl6im/rWqgCh6fFS8NJQp8NIJeCIyihUEHbarfPyJIJRJjoP457YO cv8OkixCkt4zX6CkTxaL5ZFEBW9FYbRb13Gg96J6hb4WfdAFMtQg7FAjThSU/+Qr HwjaAso3GXimEaZD1C3c0TtZEQ0x9E6pENVI7/ewB1I0p92p7GJBMq4C7CTAYThV 5zhdcOnViSrJTJvVQxm+gfOYzubkWWiVmbVku3RCO6//kvPBOvJ9juSYsl0mKeRu v2DZZB3AYJE/qnbYfZBlktX9obE6k+keKF6w8Eiufr2IqwJaqfaM4h9eogzAwTJA vyXKeLxUEmgHuqivFSZjw3sEK6sY654GCMMTP+2IpD19vlAIioYXdgp0ZbkkdiE3 6twrzdFZAr1zy80xlM8W =2Uq6 -----END PGP SIGNATURE----- Merge tag 'xfs-for-linus-v3.12-rc1-2' of git://oss.sgi.com/xfs/xfs Pull xfs update #2 from Ben Myers: "Here we have defrag support for v5 superblock, a number of bugfixes and a cleanup or two. - defrag support for CRC filesystems - fix endian worning in xlog_recover_get_buf_lsn - fixes for sparse warnings - fix for assert in xfs_dir3_leaf_hdr_from_disk - fix for log recovery of remote symlinks - fix for log recovery of btree root splits - fixes formemory allocation failures with ACLs - fix for assert in xfs_buf_item_relse - fix for assert in xfs_inode_buf_verify - fix an assignment in an assert that should be a test in xfs_bmbt_change_owner - remove dead code in xlog_recover_inode_pass2" * tag 'xfs-for-linus-v3.12-rc1-2' of git://oss.sgi.com/xfs/xfs: xfs: remove dead code from xlog_recover_inode_pass2 xfs: = vs == typo in ASSERT() xfs: don't assert fail on bad inode numbers xfs: aborted buf items can be in the AIL. xfs: factor all the kmalloc-or-vmalloc fallback allocations xfs: fix memory allocation failures with ACLs xfs: ensure we copy buffer type in da btree root splits xfs: set remote symlink buffer type for recovery xfs: recovery of swap extents operations for CRC filesystems xfs: swap extents operations for CRC filesystems xfs: check magic numbers in dir3 leaf verifier first xfs: fix some minor sparse warnings xfs: fix endian warning in xlog_recover_get_buf_lsn()
This commit is contained in:
commit
e0ea4045bc
@ -27,8 +27,6 @@
|
||||
|
||||
/*
|
||||
* Greedy allocation. May fail and may return vmalloced memory.
|
||||
*
|
||||
* Must be freed using kmem_free_large.
|
||||
*/
|
||||
void *
|
||||
kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize)
|
||||
@ -36,7 +34,7 @@ kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize)
|
||||
void *ptr;
|
||||
size_t kmsize = maxsize;
|
||||
|
||||
while (!(ptr = kmem_zalloc_large(kmsize))) {
|
||||
while (!(ptr = vzalloc(kmsize))) {
|
||||
if ((kmsize >>= 1) <= minsize)
|
||||
kmsize = minsize;
|
||||
}
|
||||
@ -75,6 +73,17 @@ kmem_zalloc(size_t size, xfs_km_flags_t flags)
|
||||
return ptr;
|
||||
}
|
||||
|
||||
void *
|
||||
kmem_zalloc_large(size_t size, xfs_km_flags_t flags)
|
||||
{
|
||||
void *ptr;
|
||||
|
||||
ptr = kmem_zalloc(size, flags | KM_MAYFAIL);
|
||||
if (ptr)
|
||||
return ptr;
|
||||
return vzalloc(size);
|
||||
}
|
||||
|
||||
void
|
||||
kmem_free(const void *ptr)
|
||||
{
|
||||
|
@ -57,17 +57,10 @@ kmem_flags_convert(xfs_km_flags_t flags)
|
||||
|
||||
extern void *kmem_alloc(size_t, xfs_km_flags_t);
|
||||
extern void *kmem_zalloc(size_t, xfs_km_flags_t);
|
||||
extern void *kmem_zalloc_large(size_t size, xfs_km_flags_t);
|
||||
extern void *kmem_realloc(const void *, size_t, size_t, xfs_km_flags_t);
|
||||
extern void kmem_free(const void *);
|
||||
|
||||
static inline void *kmem_zalloc_large(size_t size)
|
||||
{
|
||||
return vzalloc(size);
|
||||
}
|
||||
static inline void kmem_free_large(void *ptr)
|
||||
{
|
||||
vfree(ptr);
|
||||
}
|
||||
|
||||
extern void *kmem_zalloc_greedy(size_t *, size_t, size_t);
|
||||
|
||||
|
@ -152,7 +152,7 @@ xfs_get_acl(struct inode *inode, int type)
|
||||
* go out to the disk.
|
||||
*/
|
||||
len = XFS_ACL_MAX_SIZE(ip->i_mount);
|
||||
xfs_acl = kzalloc(len, GFP_KERNEL);
|
||||
xfs_acl = kmem_zalloc_large(len, KM_SLEEP);
|
||||
if (!xfs_acl)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
@ -175,10 +175,10 @@ xfs_get_acl(struct inode *inode, int type)
|
||||
if (IS_ERR(acl))
|
||||
goto out;
|
||||
|
||||
out_update_cache:
|
||||
out_update_cache:
|
||||
set_cached_acl(inode, type, acl);
|
||||
out:
|
||||
kfree(xfs_acl);
|
||||
out:
|
||||
kmem_free(xfs_acl);
|
||||
return acl;
|
||||
}
|
||||
|
||||
@ -209,7 +209,7 @@ xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
|
||||
struct xfs_acl *xfs_acl;
|
||||
int len = XFS_ACL_MAX_SIZE(ip->i_mount);
|
||||
|
||||
xfs_acl = kzalloc(len, GFP_KERNEL);
|
||||
xfs_acl = kmem_zalloc_large(len, KM_SLEEP);
|
||||
if (!xfs_acl)
|
||||
return -ENOMEM;
|
||||
|
||||
@ -222,7 +222,7 @@ xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
|
||||
error = -xfs_attr_set(ip, ea_name, (unsigned char *)xfs_acl,
|
||||
len, ATTR_ROOT);
|
||||
|
||||
kfree(xfs_acl);
|
||||
kmem_free(xfs_acl);
|
||||
} else {
|
||||
/*
|
||||
* A NULL ACL argument means we want to remove the ACL.
|
||||
|
@ -4450,7 +4450,7 @@ xfs_bmapi_write(
|
||||
{
|
||||
struct xfs_mount *mp = ip->i_mount;
|
||||
struct xfs_ifork *ifp;
|
||||
struct xfs_bmalloca bma = { 0 }; /* args for xfs_bmap_alloc */
|
||||
struct xfs_bmalloca bma = { NULL }; /* args for xfs_bmap_alloc */
|
||||
xfs_fileoff_t end; /* end of mapped file region */
|
||||
int eof; /* after the end of extents */
|
||||
int error; /* error return */
|
||||
|
@ -925,3 +925,47 @@ xfs_bmdr_maxrecs(
|
||||
return blocklen / sizeof(xfs_bmdr_rec_t);
|
||||
return blocklen / (sizeof(xfs_bmdr_key_t) + sizeof(xfs_bmdr_ptr_t));
|
||||
}
|
||||
|
||||
/*
|
||||
* Change the owner of a btree format fork fo the inode passed in. Change it to
|
||||
* the owner of that is passed in so that we can change owners before or after
|
||||
* we switch forks between inodes. The operation that the caller is doing will
|
||||
* determine whether is needs to change owner before or after the switch.
|
||||
*
|
||||
* For demand paged transactional modification, the fork switch should be done
|
||||
* after reading in all the blocks, modifying them and pinning them in the
|
||||
* transaction. For modification when the buffers are already pinned in memory,
|
||||
* the fork switch can be done before changing the owner as we won't need to
|
||||
* validate the owner until the btree buffers are unpinned and writes can occur
|
||||
* again.
|
||||
*
|
||||
* For recovery based ownership change, there is no transactional context and
|
||||
* so a buffer list must be supplied so that we can record the buffers that we
|
||||
* modified for the caller to issue IO on.
|
||||
*/
|
||||
int
|
||||
xfs_bmbt_change_owner(
|
||||
struct xfs_trans *tp,
|
||||
struct xfs_inode *ip,
|
||||
int whichfork,
|
||||
xfs_ino_t new_owner,
|
||||
struct list_head *buffer_list)
|
||||
{
|
||||
struct xfs_btree_cur *cur;
|
||||
int error;
|
||||
|
||||
ASSERT(tp || buffer_list);
|
||||
ASSERT(!(tp && buffer_list));
|
||||
if (whichfork == XFS_DATA_FORK)
|
||||
ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_BTREE);
|
||||
else
|
||||
ASSERT(ip->i_d.di_aformat == XFS_DINODE_FMT_BTREE);
|
||||
|
||||
cur = xfs_bmbt_init_cursor(ip->i_mount, tp, ip, whichfork);
|
||||
if (!cur)
|
||||
return ENOMEM;
|
||||
|
||||
error = xfs_btree_change_owner(cur, new_owner, buffer_list);
|
||||
xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
|
||||
return error;
|
||||
}
|
||||
|
@ -236,6 +236,10 @@ extern int xfs_bmbt_get_maxrecs(struct xfs_btree_cur *, int level);
|
||||
extern int xfs_bmdr_maxrecs(struct xfs_mount *, int blocklen, int leaf);
|
||||
extern int xfs_bmbt_maxrecs(struct xfs_mount *, int blocklen, int leaf);
|
||||
|
||||
extern int xfs_bmbt_change_owner(struct xfs_trans *tp, struct xfs_inode *ip,
|
||||
int whichfork, xfs_ino_t new_owner,
|
||||
struct list_head *buffer_list);
|
||||
|
||||
extern struct xfs_btree_cur *xfs_bmbt_init_cursor(struct xfs_mount *,
|
||||
struct xfs_trans *, struct xfs_inode *, int);
|
||||
|
||||
|
@ -612,13 +612,9 @@ xfs_getbmap(
|
||||
|
||||
if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx))
|
||||
return XFS_ERROR(ENOMEM);
|
||||
out = kmem_zalloc(bmv->bmv_count * sizeof(struct getbmapx), KM_MAYFAIL);
|
||||
if (!out) {
|
||||
out = kmem_zalloc_large(bmv->bmv_count *
|
||||
sizeof(struct getbmapx));
|
||||
if (!out)
|
||||
return XFS_ERROR(ENOMEM);
|
||||
}
|
||||
out = kmem_zalloc_large(bmv->bmv_count * sizeof(struct getbmapx), 0);
|
||||
if (!out)
|
||||
return XFS_ERROR(ENOMEM);
|
||||
|
||||
xfs_ilock(ip, XFS_IOLOCK_SHARED);
|
||||
if (whichfork == XFS_DATA_FORK && !(iflags & BMV_IF_DELALLOC)) {
|
||||
@ -754,10 +750,7 @@ xfs_getbmap(
|
||||
break;
|
||||
}
|
||||
|
||||
if (is_vmalloc_addr(out))
|
||||
kmem_free_large(out);
|
||||
else
|
||||
kmem_free(out);
|
||||
kmem_free(out);
|
||||
return error;
|
||||
}
|
||||
|
||||
@ -1789,14 +1782,6 @@ xfs_swap_extents(
|
||||
int taforkblks = 0;
|
||||
__uint64_t tmp;
|
||||
|
||||
/*
|
||||
* We have no way of updating owner information in the BMBT blocks for
|
||||
* each inode on CRC enabled filesystems, so to avoid corrupting the
|
||||
* this metadata we simply don't allow extent swaps to occur.
|
||||
*/
|
||||
if (xfs_sb_version_hascrc(&mp->m_sb))
|
||||
return XFS_ERROR(EINVAL);
|
||||
|
||||
tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL);
|
||||
if (!tempifp) {
|
||||
error = XFS_ERROR(ENOMEM);
|
||||
@ -1920,6 +1905,42 @@ xfs_swap_extents(
|
||||
goto out_trans_cancel;
|
||||
}
|
||||
|
||||
xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
|
||||
xfs_trans_ijoin(tp, tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
|
||||
|
||||
/*
|
||||
* Before we've swapped the forks, lets set the owners of the forks
|
||||
* appropriately. We have to do this as we are demand paging the btree
|
||||
* buffers, and so the validation done on read will expect the owner
|
||||
* field to be correctly set. Once we change the owners, we can swap the
|
||||
* inode forks.
|
||||
*
|
||||
* Note the trickiness in setting the log flags - we set the owner log
|
||||
* flag on the opposite inode (i.e. the inode we are setting the new
|
||||
* owner to be) because once we swap the forks and log that, log
|
||||
* recovery is going to see the fork as owned by the swapped inode,
|
||||
* not the pre-swapped inodes.
|
||||
*/
|
||||
src_log_flags = XFS_ILOG_CORE;
|
||||
target_log_flags = XFS_ILOG_CORE;
|
||||
if (ip->i_d.di_version == 3 &&
|
||||
ip->i_d.di_format == XFS_DINODE_FMT_BTREE) {
|
||||
target_log_flags |= XFS_ILOG_DOWNER;
|
||||
error = xfs_bmbt_change_owner(tp, ip, XFS_DATA_FORK,
|
||||
tip->i_ino, NULL);
|
||||
if (error)
|
||||
goto out_trans_cancel;
|
||||
}
|
||||
|
||||
if (tip->i_d.di_version == 3 &&
|
||||
tip->i_d.di_format == XFS_DINODE_FMT_BTREE) {
|
||||
src_log_flags |= XFS_ILOG_DOWNER;
|
||||
error = xfs_bmbt_change_owner(tp, tip, XFS_DATA_FORK,
|
||||
ip->i_ino, NULL);
|
||||
if (error)
|
||||
goto out_trans_cancel;
|
||||
}
|
||||
|
||||
/*
|
||||
* Swap the data forks of the inodes
|
||||
*/
|
||||
@ -1957,7 +1978,6 @@ xfs_swap_extents(
|
||||
tip->i_delayed_blks = ip->i_delayed_blks;
|
||||
ip->i_delayed_blks = 0;
|
||||
|
||||
src_log_flags = XFS_ILOG_CORE;
|
||||
switch (ip->i_d.di_format) {
|
||||
case XFS_DINODE_FMT_EXTENTS:
|
||||
/* If the extents fit in the inode, fix the
|
||||
@ -1971,11 +1991,12 @@ xfs_swap_extents(
|
||||
src_log_flags |= XFS_ILOG_DEXT;
|
||||
break;
|
||||
case XFS_DINODE_FMT_BTREE:
|
||||
ASSERT(ip->i_d.di_version < 3 ||
|
||||
(src_log_flags & XFS_ILOG_DOWNER));
|
||||
src_log_flags |= XFS_ILOG_DBROOT;
|
||||
break;
|
||||
}
|
||||
|
||||
target_log_flags = XFS_ILOG_CORE;
|
||||
switch (tip->i_d.di_format) {
|
||||
case XFS_DINODE_FMT_EXTENTS:
|
||||
/* If the extents fit in the inode, fix the
|
||||
@ -1990,13 +2011,11 @@ xfs_swap_extents(
|
||||
break;
|
||||
case XFS_DINODE_FMT_BTREE:
|
||||
target_log_flags |= XFS_ILOG_DBROOT;
|
||||
ASSERT(tip->i_d.di_version < 3 ||
|
||||
(target_log_flags & XFS_ILOG_DOWNER));
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
|
||||
xfs_trans_ijoin(tp, tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
|
||||
|
||||
xfs_trans_log_inode(tp, ip, src_log_flags);
|
||||
xfs_trans_log_inode(tp, tip, target_log_flags);
|
||||
|
||||
|
@ -855,6 +855,41 @@ xfs_btree_readahead(
|
||||
return xfs_btree_readahead_sblock(cur, lr, block);
|
||||
}
|
||||
|
||||
STATIC xfs_daddr_t
|
||||
xfs_btree_ptr_to_daddr(
|
||||
struct xfs_btree_cur *cur,
|
||||
union xfs_btree_ptr *ptr)
|
||||
{
|
||||
if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
|
||||
ASSERT(ptr->l != cpu_to_be64(NULLDFSBNO));
|
||||
|
||||
return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l));
|
||||
} else {
|
||||
ASSERT(cur->bc_private.a.agno != NULLAGNUMBER);
|
||||
ASSERT(ptr->s != cpu_to_be32(NULLAGBLOCK));
|
||||
|
||||
return XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno,
|
||||
be32_to_cpu(ptr->s));
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Readahead @count btree blocks at the given @ptr location.
|
||||
*
|
||||
* We don't need to care about long or short form btrees here as we have a
|
||||
* method of converting the ptr directly to a daddr available to us.
|
||||
*/
|
||||
STATIC void
|
||||
xfs_btree_readahead_ptr(
|
||||
struct xfs_btree_cur *cur,
|
||||
union xfs_btree_ptr *ptr,
|
||||
xfs_extlen_t count)
|
||||
{
|
||||
xfs_buf_readahead(cur->bc_mp->m_ddev_targp,
|
||||
xfs_btree_ptr_to_daddr(cur, ptr),
|
||||
cur->bc_mp->m_bsize * count, cur->bc_ops->buf_ops);
|
||||
}
|
||||
|
||||
/*
|
||||
* Set the buffer for level "lev" in the cursor to bp, releasing
|
||||
* any previous buffer.
|
||||
@ -1073,24 +1108,6 @@ xfs_btree_buf_to_ptr(
|
||||
}
|
||||
}
|
||||
|
||||
STATIC xfs_daddr_t
|
||||
xfs_btree_ptr_to_daddr(
|
||||
struct xfs_btree_cur *cur,
|
||||
union xfs_btree_ptr *ptr)
|
||||
{
|
||||
if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
|
||||
ASSERT(ptr->l != cpu_to_be64(NULLDFSBNO));
|
||||
|
||||
return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l));
|
||||
} else {
|
||||
ASSERT(cur->bc_private.a.agno != NULLAGNUMBER);
|
||||
ASSERT(ptr->s != cpu_to_be32(NULLAGBLOCK));
|
||||
|
||||
return XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno,
|
||||
be32_to_cpu(ptr->s));
|
||||
}
|
||||
}
|
||||
|
||||
STATIC void
|
||||
xfs_btree_set_refs(
|
||||
struct xfs_btree_cur *cur,
|
||||
@ -3869,3 +3886,120 @@ xfs_btree_get_rec(
|
||||
*stat = 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Change the owner of a btree.
|
||||
*
|
||||
* The mechanism we use here is ordered buffer logging. Because we don't know
|
||||
* how many buffers were are going to need to modify, we don't really want to
|
||||
* have to make transaction reservations for the worst case of every buffer in a
|
||||
* full size btree as that may be more space that we can fit in the log....
|
||||
*
|
||||
* We do the btree walk in the most optimal manner possible - we have sibling
|
||||
* pointers so we can just walk all the blocks on each level from left to right
|
||||
* in a single pass, and then move to the next level and do the same. We can
|
||||
* also do readahead on the sibling pointers to get IO moving more quickly,
|
||||
* though for slow disks this is unlikely to make much difference to performance
|
||||
* as the amount of CPU work we have to do before moving to the next block is
|
||||
* relatively small.
|
||||
*
|
||||
* For each btree block that we load, modify the owner appropriately, set the
|
||||
* buffer as an ordered buffer and log it appropriately. We need to ensure that
|
||||
* we mark the region we change dirty so that if the buffer is relogged in
|
||||
* a subsequent transaction the changes we make here as an ordered buffer are
|
||||
* correctly relogged in that transaction. If we are in recovery context, then
|
||||
* just queue the modified buffer as delayed write buffer so the transaction
|
||||
* recovery completion writes the changes to disk.
|
||||
*/
|
||||
static int
|
||||
xfs_btree_block_change_owner(
|
||||
struct xfs_btree_cur *cur,
|
||||
int level,
|
||||
__uint64_t new_owner,
|
||||
struct list_head *buffer_list)
|
||||
{
|
||||
struct xfs_btree_block *block;
|
||||
struct xfs_buf *bp;
|
||||
union xfs_btree_ptr rptr;
|
||||
|
||||
/* do right sibling readahead */
|
||||
xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
|
||||
|
||||
/* modify the owner */
|
||||
block = xfs_btree_get_block(cur, level, &bp);
|
||||
if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
|
||||
block->bb_u.l.bb_owner = cpu_to_be64(new_owner);
|
||||
else
|
||||
block->bb_u.s.bb_owner = cpu_to_be32(new_owner);
|
||||
|
||||
/*
|
||||
* If the block is a root block hosted in an inode, we might not have a
|
||||
* buffer pointer here and we shouldn't attempt to log the change as the
|
||||
* information is already held in the inode and discarded when the root
|
||||
* block is formatted into the on-disk inode fork. We still change it,
|
||||
* though, so everything is consistent in memory.
|
||||
*/
|
||||
if (bp) {
|
||||
if (cur->bc_tp) {
|
||||
xfs_trans_ordered_buf(cur->bc_tp, bp);
|
||||
xfs_btree_log_block(cur, bp, XFS_BB_OWNER);
|
||||
} else {
|
||||
xfs_buf_delwri_queue(bp, buffer_list);
|
||||
}
|
||||
} else {
|
||||
ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
|
||||
ASSERT(level == cur->bc_nlevels - 1);
|
||||
}
|
||||
|
||||
/* now read rh sibling block for next iteration */
|
||||
xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
|
||||
if (xfs_btree_ptr_is_null(cur, &rptr))
|
||||
return ENOENT;
|
||||
|
||||
return xfs_btree_lookup_get_block(cur, level, &rptr, &block);
|
||||
}
|
||||
|
||||
int
|
||||
xfs_btree_change_owner(
|
||||
struct xfs_btree_cur *cur,
|
||||
__uint64_t new_owner,
|
||||
struct list_head *buffer_list)
|
||||
{
|
||||
union xfs_btree_ptr lptr;
|
||||
int level;
|
||||
struct xfs_btree_block *block = NULL;
|
||||
int error = 0;
|
||||
|
||||
cur->bc_ops->init_ptr_from_cur(cur, &lptr);
|
||||
|
||||
/* for each level */
|
||||
for (level = cur->bc_nlevels - 1; level >= 0; level--) {
|
||||
/* grab the left hand block */
|
||||
error = xfs_btree_lookup_get_block(cur, level, &lptr, &block);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
/* readahead the left most block for the next level down */
|
||||
if (level > 0) {
|
||||
union xfs_btree_ptr *ptr;
|
||||
|
||||
ptr = xfs_btree_ptr_addr(cur, 1, block);
|
||||
xfs_btree_readahead_ptr(cur, ptr, 1);
|
||||
|
||||
/* save for the next iteration of the loop */
|
||||
lptr = *ptr;
|
||||
}
|
||||
|
||||
/* for each buffer in the level */
|
||||
do {
|
||||
error = xfs_btree_block_change_owner(cur, level,
|
||||
new_owner,
|
||||
buffer_list);
|
||||
} while (!error);
|
||||
|
||||
if (error != ENOENT)
|
||||
return error;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -121,15 +121,18 @@ union xfs_btree_rec {
|
||||
/*
|
||||
* For logging record fields.
|
||||
*/
|
||||
#define XFS_BB_MAGIC 0x01
|
||||
#define XFS_BB_LEVEL 0x02
|
||||
#define XFS_BB_NUMRECS 0x04
|
||||
#define XFS_BB_LEFTSIB 0x08
|
||||
#define XFS_BB_RIGHTSIB 0x10
|
||||
#define XFS_BB_BLKNO 0x20
|
||||
#define XFS_BB_MAGIC (1 << 0)
|
||||
#define XFS_BB_LEVEL (1 << 1)
|
||||
#define XFS_BB_NUMRECS (1 << 2)
|
||||
#define XFS_BB_LEFTSIB (1 << 3)
|
||||
#define XFS_BB_RIGHTSIB (1 << 4)
|
||||
#define XFS_BB_BLKNO (1 << 5)
|
||||
#define XFS_BB_LSN (1 << 6)
|
||||
#define XFS_BB_UUID (1 << 7)
|
||||
#define XFS_BB_OWNER (1 << 8)
|
||||
#define XFS_BB_NUM_BITS 5
|
||||
#define XFS_BB_ALL_BITS ((1 << XFS_BB_NUM_BITS) - 1)
|
||||
#define XFS_BB_NUM_BITS_CRC 8
|
||||
#define XFS_BB_NUM_BITS_CRC 9
|
||||
#define XFS_BB_ALL_BITS_CRC ((1 << XFS_BB_NUM_BITS_CRC) - 1)
|
||||
|
||||
/*
|
||||
@ -442,6 +445,8 @@ int xfs_btree_new_iroot(struct xfs_btree_cur *, int *, int *);
|
||||
int xfs_btree_insert(struct xfs_btree_cur *, int *);
|
||||
int xfs_btree_delete(struct xfs_btree_cur *, int *);
|
||||
int xfs_btree_get_rec(struct xfs_btree_cur *, union xfs_btree_rec **, int *);
|
||||
int xfs_btree_change_owner(struct xfs_btree_cur *cur, __uint64_t new_owner,
|
||||
struct list_head *buffer_list);
|
||||
|
||||
/*
|
||||
* btree block CRC helpers
|
||||
|
@ -613,13 +613,27 @@ xfs_buf_item_unlock(
|
||||
}
|
||||
}
|
||||
}
|
||||
if (clean || aborted) {
|
||||
if (atomic_dec_and_test(&bip->bli_refcount)) {
|
||||
ASSERT(!aborted || XFS_FORCED_SHUTDOWN(lip->li_mountp));
|
||||
|
||||
/*
|
||||
* Clean buffers, by definition, cannot be in the AIL. However, aborted
|
||||
* buffers may be dirty and hence in the AIL. Therefore if we are
|
||||
* aborting a buffer and we've just taken the last refernce away, we
|
||||
* have to check if it is in the AIL before freeing it. We need to free
|
||||
* it in this case, because an aborted transaction has already shut the
|
||||
* filesystem down and this is the last chance we will have to do so.
|
||||
*/
|
||||
if (atomic_dec_and_test(&bip->bli_refcount)) {
|
||||
if (clean)
|
||||
xfs_buf_item_relse(bp);
|
||||
else if (aborted) {
|
||||
ASSERT(XFS_FORCED_SHUTDOWN(lip->li_mountp));
|
||||
if (lip->li_flags & XFS_LI_IN_AIL) {
|
||||
xfs_trans_ail_delete(lip->li_ailp, lip,
|
||||
SHUTDOWN_LOG_IO_ERROR);
|
||||
}
|
||||
xfs_buf_item_relse(bp);
|
||||
}
|
||||
} else
|
||||
atomic_dec(&bip->bli_refcount);
|
||||
}
|
||||
|
||||
if (!(flags & XFS_BLI_HOLD))
|
||||
xfs_buf_relse(bp);
|
||||
|
@ -635,6 +635,7 @@ xfs_da3_root_split(
|
||||
xfs_trans_log_buf(tp, bp, 0, size - 1);
|
||||
|
||||
bp->b_ops = blk1->bp->b_ops;
|
||||
xfs_trans_buf_copy_type(bp, blk1->bp);
|
||||
blk1->bp = bp;
|
||||
blk1->blkno = blkno;
|
||||
|
||||
|
@ -180,6 +180,11 @@ xfs_dir3_leaf_check_int(
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* We verify the magic numbers before decoding the leaf header so that on debug
|
||||
* kernels we don't get assertion failures in xfs_dir3_leaf_hdr_from_disk() due
|
||||
* to incorrect magic numbers.
|
||||
*/
|
||||
static bool
|
||||
xfs_dir3_leaf_verify(
|
||||
struct xfs_buf *bp,
|
||||
@ -191,24 +196,25 @@ xfs_dir3_leaf_verify(
|
||||
|
||||
ASSERT(magic == XFS_DIR2_LEAF1_MAGIC || magic == XFS_DIR2_LEAFN_MAGIC);
|
||||
|
||||
xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf);
|
||||
if (xfs_sb_version_hascrc(&mp->m_sb)) {
|
||||
struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr;
|
||||
__uint16_t magic3;
|
||||
|
||||
if ((magic == XFS_DIR2_LEAF1_MAGIC &&
|
||||
leafhdr.magic != XFS_DIR3_LEAF1_MAGIC) ||
|
||||
(magic == XFS_DIR2_LEAFN_MAGIC &&
|
||||
leafhdr.magic != XFS_DIR3_LEAFN_MAGIC))
|
||||
magic3 = (magic == XFS_DIR2_LEAF1_MAGIC) ? XFS_DIR3_LEAF1_MAGIC
|
||||
: XFS_DIR3_LEAFN_MAGIC;
|
||||
|
||||
if (leaf3->info.hdr.magic != cpu_to_be16(magic3))
|
||||
return false;
|
||||
|
||||
if (!uuid_equal(&leaf3->info.uuid, &mp->m_sb.sb_uuid))
|
||||
return false;
|
||||
if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn)
|
||||
return false;
|
||||
} else {
|
||||
if (leafhdr.magic != magic)
|
||||
if (leaf->hdr.info.magic != cpu_to_be16(magic))
|
||||
return false;
|
||||
}
|
||||
|
||||
xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf);
|
||||
return xfs_dir3_leaf_check_int(mp, &leafhdr, leaf);
|
||||
}
|
||||
|
||||
|
@ -142,7 +142,8 @@ xfs_qm_dqunpin_wait(
|
||||
STATIC uint
|
||||
xfs_qm_dquot_logitem_push(
|
||||
struct xfs_log_item *lip,
|
||||
struct list_head *buffer_list)
|
||||
struct list_head *buffer_list) __releases(&lip->li_ailp->xa_lock)
|
||||
__acquires(&lip->li_ailp->xa_lock)
|
||||
{
|
||||
struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot;
|
||||
struct xfs_buf *bp = NULL;
|
||||
|
@ -160,7 +160,8 @@ xfs_extent_busy_update_extent(
|
||||
struct xfs_extent_busy *busyp,
|
||||
xfs_agblock_t fbno,
|
||||
xfs_extlen_t flen,
|
||||
bool userdata)
|
||||
bool userdata) __releases(&pag->pagb_lock)
|
||||
__acquires(&pag->pagb_lock)
|
||||
{
|
||||
xfs_agblock_t fend = fbno + flen;
|
||||
xfs_agblock_t bbno = busyp->bno;
|
||||
|
@ -48,7 +48,7 @@ STATIC void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp,
|
||||
/*
|
||||
* Allocate and initialise an xfs_inode.
|
||||
*/
|
||||
STATIC struct xfs_inode *
|
||||
struct xfs_inode *
|
||||
xfs_inode_alloc(
|
||||
struct xfs_mount *mp,
|
||||
xfs_ino_t ino)
|
||||
@ -98,7 +98,7 @@ xfs_inode_free_callback(
|
||||
kmem_zone_free(xfs_inode_zone, ip);
|
||||
}
|
||||
|
||||
STATIC void
|
||||
void
|
||||
xfs_inode_free(
|
||||
struct xfs_inode *ip)
|
||||
{
|
||||
|
@ -42,6 +42,10 @@ struct xfs_eofblocks {
|
||||
int xfs_iget(struct xfs_mount *mp, struct xfs_trans *tp, xfs_ino_t ino,
|
||||
uint flags, uint lock_flags, xfs_inode_t **ipp);
|
||||
|
||||
/* recovery needs direct inode allocation capability */
|
||||
struct xfs_inode * xfs_inode_alloc(struct xfs_mount *mp, xfs_ino_t ino);
|
||||
void xfs_inode_free(struct xfs_inode *ip);
|
||||
|
||||
void xfs_reclaim_worker(struct work_struct *work);
|
||||
|
||||
int xfs_reclaim_inodes(struct xfs_mount *mp, int mode);
|
||||
|
@ -53,9 +53,8 @@ xfs_inobp_check(
|
||||
i * mp->m_sb.sb_inodesize);
|
||||
if (!dip->di_next_unlinked) {
|
||||
xfs_alert(mp,
|
||||
"Detected bogus zero next_unlinked field in incore inode buffer 0x%p.",
|
||||
bp);
|
||||
ASSERT(dip->di_next_unlinked);
|
||||
"Detected bogus zero next_unlinked field in inode %d buffer 0x%llx.",
|
||||
i, (long long)bp->b_bn);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -106,11 +105,10 @@ xfs_inode_buf_verify(
|
||||
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_HIGH,
|
||||
mp, dip);
|
||||
#ifdef DEBUG
|
||||
xfs_emerg(mp,
|
||||
xfs_alert(mp,
|
||||
"bad inode magic/vsn daddr %lld #%d (magic=%x)",
|
||||
(unsigned long long)bp->b_bn, i,
|
||||
be16_to_cpu(dip->di_magic));
|
||||
ASSERT(0);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
@ -196,7 +194,7 @@ xfs_imap_to_bp(
|
||||
return 0;
|
||||
}
|
||||
|
||||
STATIC void
|
||||
void
|
||||
xfs_dinode_from_disk(
|
||||
xfs_icdinode_t *to,
|
||||
xfs_dinode_t *from)
|
||||
|
@ -32,17 +32,17 @@ struct xfs_imap {
|
||||
ushort im_boffset; /* inode offset in block in bytes */
|
||||
};
|
||||
|
||||
int xfs_imap_to_bp(struct xfs_mount *, struct xfs_trans *,
|
||||
struct xfs_imap *, struct xfs_dinode **,
|
||||
struct xfs_buf **, uint, uint);
|
||||
int xfs_iread(struct xfs_mount *, struct xfs_trans *,
|
||||
struct xfs_inode *, uint);
|
||||
void xfs_dinode_calc_crc(struct xfs_mount *, struct xfs_dinode *);
|
||||
void xfs_dinode_to_disk(struct xfs_dinode *,
|
||||
struct xfs_icdinode *);
|
||||
int xfs_imap_to_bp(struct xfs_mount *, struct xfs_trans *,
|
||||
struct xfs_imap *, struct xfs_dinode **,
|
||||
struct xfs_buf **, uint, uint);
|
||||
int xfs_iread(struct xfs_mount *, struct xfs_trans *,
|
||||
struct xfs_inode *, uint);
|
||||
void xfs_dinode_calc_crc(struct xfs_mount *, struct xfs_dinode *);
|
||||
void xfs_dinode_to_disk(struct xfs_dinode *to, struct xfs_icdinode *from);
|
||||
void xfs_dinode_from_disk(struct xfs_icdinode *to, struct xfs_dinode *from);
|
||||
|
||||
#if defined(DEBUG)
|
||||
void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *);
|
||||
void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *);
|
||||
#else
|
||||
#define xfs_inobp_check(mp, bp)
|
||||
#endif /* DEBUG */
|
||||
|
@ -71,7 +71,7 @@ xfs_find_handle(
|
||||
int hsize;
|
||||
xfs_handle_t handle;
|
||||
struct inode *inode;
|
||||
struct fd f = {0};
|
||||
struct fd f = {NULL};
|
||||
struct path path;
|
||||
int error;
|
||||
struct xfs_inode *ip;
|
||||
@ -456,12 +456,9 @@ xfs_attrlist_by_handle(
|
||||
if (IS_ERR(dentry))
|
||||
return PTR_ERR(dentry);
|
||||
|
||||
kbuf = kmem_zalloc(al_hreq.buflen, KM_SLEEP | KM_MAYFAIL);
|
||||
if (!kbuf) {
|
||||
kbuf = kmem_zalloc_large(al_hreq.buflen);
|
||||
if (!kbuf)
|
||||
goto out_dput;
|
||||
}
|
||||
kbuf = kmem_zalloc_large(al_hreq.buflen, KM_SLEEP);
|
||||
if (!kbuf)
|
||||
goto out_dput;
|
||||
|
||||
cursor = (attrlist_cursor_kern_t *)&al_hreq.pos;
|
||||
error = -xfs_attr_list(XFS_I(dentry->d_inode), kbuf, al_hreq.buflen,
|
||||
@ -472,12 +469,9 @@ xfs_attrlist_by_handle(
|
||||
if (copy_to_user(al_hreq.buffer, kbuf, al_hreq.buflen))
|
||||
error = -EFAULT;
|
||||
|
||||
out_kfree:
|
||||
if (is_vmalloc_addr(kbuf))
|
||||
kmem_free_large(kbuf);
|
||||
else
|
||||
kmem_free(kbuf);
|
||||
out_dput:
|
||||
out_kfree:
|
||||
kmem_free(kbuf);
|
||||
out_dput:
|
||||
dput(dentry);
|
||||
return error;
|
||||
}
|
||||
@ -495,12 +489,9 @@ xfs_attrmulti_attr_get(
|
||||
|
||||
if (*len > XATTR_SIZE_MAX)
|
||||
return EINVAL;
|
||||
kbuf = kmem_zalloc(*len, KM_SLEEP | KM_MAYFAIL);
|
||||
if (!kbuf) {
|
||||
kbuf = kmem_zalloc_large(*len);
|
||||
if (!kbuf)
|
||||
return ENOMEM;
|
||||
}
|
||||
kbuf = kmem_zalloc_large(*len, KM_SLEEP);
|
||||
if (!kbuf)
|
||||
return ENOMEM;
|
||||
|
||||
error = xfs_attr_get(XFS_I(inode), name, kbuf, (int *)len, flags);
|
||||
if (error)
|
||||
@ -509,11 +500,8 @@ xfs_attrmulti_attr_get(
|
||||
if (copy_to_user(ubuf, kbuf, *len))
|
||||
error = EFAULT;
|
||||
|
||||
out_kfree:
|
||||
if (is_vmalloc_addr(kbuf))
|
||||
kmem_free_large(kbuf);
|
||||
else
|
||||
kmem_free(kbuf);
|
||||
out_kfree:
|
||||
kmem_free(kbuf);
|
||||
return error;
|
||||
}
|
||||
|
||||
|
@ -371,12 +371,9 @@ xfs_compat_attrlist_by_handle(
|
||||
return PTR_ERR(dentry);
|
||||
|
||||
error = -ENOMEM;
|
||||
kbuf = kmem_zalloc(al_hreq.buflen, KM_SLEEP | KM_MAYFAIL);
|
||||
if (!kbuf) {
|
||||
kbuf = kmem_zalloc_large(al_hreq.buflen);
|
||||
if (!kbuf)
|
||||
goto out_dput;
|
||||
}
|
||||
kbuf = kmem_zalloc_large(al_hreq.buflen, KM_SLEEP);
|
||||
if (!kbuf)
|
||||
goto out_dput;
|
||||
|
||||
cursor = (attrlist_cursor_kern_t *)&al_hreq.pos;
|
||||
error = -xfs_attr_list(XFS_I(dentry->d_inode), kbuf, al_hreq.buflen,
|
||||
@ -387,12 +384,9 @@ xfs_compat_attrlist_by_handle(
|
||||
if (copy_to_user(compat_ptr(al_hreq.buffer), kbuf, al_hreq.buflen))
|
||||
error = -EFAULT;
|
||||
|
||||
out_kfree:
|
||||
if (is_vmalloc_addr(kbuf))
|
||||
kmem_free_large(kbuf);
|
||||
else
|
||||
kmem_free(kbuf);
|
||||
out_dput:
|
||||
out_kfree:
|
||||
kmem_free(kbuf);
|
||||
out_dput:
|
||||
dput(dentry);
|
||||
return error;
|
||||
}
|
||||
|
@ -495,7 +495,7 @@ xfs_bulkstat(
|
||||
/*
|
||||
* Done, we're either out of filesystem or space to put the data.
|
||||
*/
|
||||
kmem_free_large(irbuf);
|
||||
kmem_free(irbuf);
|
||||
*ubcountp = ubelem;
|
||||
/*
|
||||
* Found some inodes, return them now and return the error next time.
|
||||
@ -541,8 +541,9 @@ xfs_bulkstat_single(
|
||||
* at the expense of the error case.
|
||||
*/
|
||||
|
||||
ino = (xfs_ino_t)*lastinop;
|
||||
error = xfs_bulkstat_one(mp, ino, buffer, sizeof(xfs_bstat_t), 0, &res);
|
||||
ino = *lastinop;
|
||||
error = xfs_bulkstat_one(mp, ino, buffer, sizeof(xfs_bstat_t),
|
||||
NULL, &res);
|
||||
if (error) {
|
||||
/*
|
||||
* Special case way failed, do it the "long" way
|
||||
|
@ -257,7 +257,8 @@ xlog_grant_head_wait(
|
||||
struct xlog *log,
|
||||
struct xlog_grant_head *head,
|
||||
struct xlog_ticket *tic,
|
||||
int need_bytes)
|
||||
int need_bytes) __releases(&head->lock)
|
||||
__acquires(&head->lock)
|
||||
{
|
||||
list_add_tail(&tic->t_queue, &head->waiters);
|
||||
|
||||
|
@ -474,6 +474,8 @@ typedef struct xfs_inode_log_format_64 {
|
||||
#define XFS_ILOG_ADATA 0x040 /* log i_af.if_data */
|
||||
#define XFS_ILOG_AEXT 0x080 /* log i_af.if_extents */
|
||||
#define XFS_ILOG_ABROOT 0x100 /* log i_af.i_broot */
|
||||
#define XFS_ILOG_DOWNER 0x200 /* change the data fork owner on replay */
|
||||
#define XFS_ILOG_AOWNER 0x400 /* change the attr fork owner on replay */
|
||||
|
||||
|
||||
/*
|
||||
@ -487,7 +489,8 @@ typedef struct xfs_inode_log_format_64 {
|
||||
#define XFS_ILOG_NONCORE (XFS_ILOG_DDATA | XFS_ILOG_DEXT | \
|
||||
XFS_ILOG_DBROOT | XFS_ILOG_DEV | \
|
||||
XFS_ILOG_UUID | XFS_ILOG_ADATA | \
|
||||
XFS_ILOG_AEXT | XFS_ILOG_ABROOT)
|
||||
XFS_ILOG_AEXT | XFS_ILOG_ABROOT | \
|
||||
XFS_ILOG_DOWNER | XFS_ILOG_AOWNER)
|
||||
|
||||
#define XFS_ILOG_DFORK (XFS_ILOG_DDATA | XFS_ILOG_DEXT | \
|
||||
XFS_ILOG_DBROOT)
|
||||
@ -499,7 +502,8 @@ typedef struct xfs_inode_log_format_64 {
|
||||
XFS_ILOG_DEXT | XFS_ILOG_DBROOT | \
|
||||
XFS_ILOG_DEV | XFS_ILOG_UUID | \
|
||||
XFS_ILOG_ADATA | XFS_ILOG_AEXT | \
|
||||
XFS_ILOG_ABROOT | XFS_ILOG_TIMESTAMP)
|
||||
XFS_ILOG_ABROOT | XFS_ILOG_TIMESTAMP | \
|
||||
XFS_ILOG_DOWNER | XFS_ILOG_AOWNER)
|
||||
|
||||
static inline int xfs_ilog_fbroot(int w)
|
||||
{
|
||||
|
@ -2014,7 +2014,7 @@ xlog_recover_get_buf_lsn(
|
||||
case XFS_ATTR3_RMT_MAGIC:
|
||||
return be64_to_cpu(((struct xfs_attr3_rmt_hdr *)blk)->rm_lsn);
|
||||
case XFS_SB_MAGIC:
|
||||
return be64_to_cpu(((struct xfs_sb *)blk)->sb_lsn);
|
||||
return be64_to_cpu(((struct xfs_dsb *)blk)->sb_lsn);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@ -2629,6 +2629,82 @@ out_release:
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* Inode fork owner changes
|
||||
*
|
||||
* If we have been told that we have to reparent the inode fork, it's because an
|
||||
* extent swap operation on a CRC enabled filesystem has been done and we are
|
||||
* replaying it. We need to walk the BMBT of the appropriate fork and change the
|
||||
* owners of it.
|
||||
*
|
||||
* The complexity here is that we don't have an inode context to work with, so
|
||||
* after we've replayed the inode we need to instantiate one. This is where the
|
||||
* fun begins.
|
||||
*
|
||||
* We are in the middle of log recovery, so we can't run transactions. That
|
||||
* means we cannot use cache coherent inode instantiation via xfs_iget(), as
|
||||
* that will result in the corresponding iput() running the inode through
|
||||
* xfs_inactive(). If we've just replayed an inode core that changes the link
|
||||
* count to zero (i.e. it's been unlinked), then xfs_inactive() will run
|
||||
* transactions (bad!).
|
||||
*
|
||||
* So, to avoid this, we instantiate an inode directly from the inode core we've
|
||||
* just recovered. We have the buffer still locked, and all we really need to
|
||||
* instantiate is the inode core and the forks being modified. We can do this
|
||||
* manually, then run the inode btree owner change, and then tear down the
|
||||
* xfs_inode without having to run any transactions at all.
|
||||
*
|
||||
* Also, because we don't have a transaction context available here but need to
|
||||
* gather all the buffers we modify for writeback so we pass the buffer_list
|
||||
* instead for the operation to use.
|
||||
*/
|
||||
|
||||
STATIC int
|
||||
xfs_recover_inode_owner_change(
|
||||
struct xfs_mount *mp,
|
||||
struct xfs_dinode *dip,
|
||||
struct xfs_inode_log_format *in_f,
|
||||
struct list_head *buffer_list)
|
||||
{
|
||||
struct xfs_inode *ip;
|
||||
int error;
|
||||
|
||||
ASSERT(in_f->ilf_fields & (XFS_ILOG_DOWNER|XFS_ILOG_AOWNER));
|
||||
|
||||
ip = xfs_inode_alloc(mp, in_f->ilf_ino);
|
||||
if (!ip)
|
||||
return ENOMEM;
|
||||
|
||||
/* instantiate the inode */
|
||||
xfs_dinode_from_disk(&ip->i_d, dip);
|
||||
ASSERT(ip->i_d.di_version >= 3);
|
||||
|
||||
error = xfs_iformat_fork(ip, dip);
|
||||
if (error)
|
||||
goto out_free_ip;
|
||||
|
||||
|
||||
if (in_f->ilf_fields & XFS_ILOG_DOWNER) {
|
||||
ASSERT(in_f->ilf_fields & XFS_ILOG_DBROOT);
|
||||
error = xfs_bmbt_change_owner(NULL, ip, XFS_DATA_FORK,
|
||||
ip->i_ino, buffer_list);
|
||||
if (error)
|
||||
goto out_free_ip;
|
||||
}
|
||||
|
||||
if (in_f->ilf_fields & XFS_ILOG_AOWNER) {
|
||||
ASSERT(in_f->ilf_fields & XFS_ILOG_ABROOT);
|
||||
error = xfs_bmbt_change_owner(NULL, ip, XFS_ATTR_FORK,
|
||||
ip->i_ino, buffer_list);
|
||||
if (error)
|
||||
goto out_free_ip;
|
||||
}
|
||||
|
||||
out_free_ip:
|
||||
xfs_inode_free(ip);
|
||||
return error;
|
||||
}
|
||||
|
||||
STATIC int
|
||||
xlog_recover_inode_pass2(
|
||||
struct xlog *log,
|
||||
@ -2681,8 +2757,7 @@ xlog_recover_inode_pass2(
|
||||
error = bp->b_error;
|
||||
if (error) {
|
||||
xfs_buf_ioerror_alert(bp, "xlog_recover_do..(read#2)");
|
||||
xfs_buf_relse(bp);
|
||||
goto error;
|
||||
goto out_release;
|
||||
}
|
||||
ASSERT(in_f->ilf_fields & XFS_ILOG_CORE);
|
||||
dip = (xfs_dinode_t *)xfs_buf_offset(bp, in_f->ilf_boffset);
|
||||
@ -2692,30 +2767,31 @@ xlog_recover_inode_pass2(
|
||||
* like an inode!
|
||||
*/
|
||||
if (unlikely(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))) {
|
||||
xfs_buf_relse(bp);
|
||||
xfs_alert(mp,
|
||||
"%s: Bad inode magic number, dip = 0x%p, dino bp = 0x%p, ino = %Ld",
|
||||
__func__, dip, bp, in_f->ilf_ino);
|
||||
XFS_ERROR_REPORT("xlog_recover_inode_pass2(1)",
|
||||
XFS_ERRLEVEL_LOW, mp);
|
||||
error = EFSCORRUPTED;
|
||||
goto error;
|
||||
goto out_release;
|
||||
}
|
||||
dicp = item->ri_buf[1].i_addr;
|
||||
if (unlikely(dicp->di_magic != XFS_DINODE_MAGIC)) {
|
||||
xfs_buf_relse(bp);
|
||||
xfs_alert(mp,
|
||||
"%s: Bad inode log record, rec ptr 0x%p, ino %Ld",
|
||||
__func__, item, in_f->ilf_ino);
|
||||
XFS_ERROR_REPORT("xlog_recover_inode_pass2(2)",
|
||||
XFS_ERRLEVEL_LOW, mp);
|
||||
error = EFSCORRUPTED;
|
||||
goto error;
|
||||
goto out_release;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the inode has an LSN in it, recover the inode only if it's less
|
||||
* than the lsn of the transaction we are replaying.
|
||||
* than the lsn of the transaction we are replaying. Note: we still
|
||||
* need to replay an owner change even though the inode is more recent
|
||||
* than the transaction as there is no guarantee that all the btree
|
||||
* blocks are more recent than this transaction, too.
|
||||
*/
|
||||
if (dip->di_version >= 3) {
|
||||
xfs_lsn_t lsn = be64_to_cpu(dip->di_lsn);
|
||||
@ -2723,7 +2799,7 @@ xlog_recover_inode_pass2(
|
||||
if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) {
|
||||
trace_xfs_log_recover_inode_skip(log, in_f);
|
||||
error = 0;
|
||||
goto out_release;
|
||||
goto out_owner_change;
|
||||
}
|
||||
}
|
||||
|
||||
@ -2745,10 +2821,9 @@ xlog_recover_inode_pass2(
|
||||
dicp->di_flushiter < (DI_MAX_FLUSH >> 1)) {
|
||||
/* do nothing */
|
||||
} else {
|
||||
xfs_buf_relse(bp);
|
||||
trace_xfs_log_recover_inode_skip(log, in_f);
|
||||
error = 0;
|
||||
goto error;
|
||||
goto out_release;
|
||||
}
|
||||
}
|
||||
|
||||
@ -2760,13 +2835,12 @@ xlog_recover_inode_pass2(
|
||||
(dicp->di_format != XFS_DINODE_FMT_BTREE)) {
|
||||
XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(3)",
|
||||
XFS_ERRLEVEL_LOW, mp, dicp);
|
||||
xfs_buf_relse(bp);
|
||||
xfs_alert(mp,
|
||||
"%s: Bad regular inode log record, rec ptr 0x%p, "
|
||||
"ino ptr = 0x%p, ino bp = 0x%p, ino %Ld",
|
||||
__func__, item, dip, bp, in_f->ilf_ino);
|
||||
error = EFSCORRUPTED;
|
||||
goto error;
|
||||
goto out_release;
|
||||
}
|
||||
} else if (unlikely(S_ISDIR(dicp->di_mode))) {
|
||||
if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) &&
|
||||
@ -2774,19 +2848,17 @@ xlog_recover_inode_pass2(
|
||||
(dicp->di_format != XFS_DINODE_FMT_LOCAL)) {
|
||||
XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(4)",
|
||||
XFS_ERRLEVEL_LOW, mp, dicp);
|
||||
xfs_buf_relse(bp);
|
||||
xfs_alert(mp,
|
||||
"%s: Bad dir inode log record, rec ptr 0x%p, "
|
||||
"ino ptr = 0x%p, ino bp = 0x%p, ino %Ld",
|
||||
__func__, item, dip, bp, in_f->ilf_ino);
|
||||
error = EFSCORRUPTED;
|
||||
goto error;
|
||||
goto out_release;
|
||||
}
|
||||
}
|
||||
if (unlikely(dicp->di_nextents + dicp->di_anextents > dicp->di_nblocks)){
|
||||
XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)",
|
||||
XFS_ERRLEVEL_LOW, mp, dicp);
|
||||
xfs_buf_relse(bp);
|
||||
xfs_alert(mp,
|
||||
"%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, "
|
||||
"dino bp 0x%p, ino %Ld, total extents = %d, nblocks = %Ld",
|
||||
@ -2794,29 +2866,27 @@ xlog_recover_inode_pass2(
|
||||
dicp->di_nextents + dicp->di_anextents,
|
||||
dicp->di_nblocks);
|
||||
error = EFSCORRUPTED;
|
||||
goto error;
|
||||
goto out_release;
|
||||
}
|
||||
if (unlikely(dicp->di_forkoff > mp->m_sb.sb_inodesize)) {
|
||||
XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(6)",
|
||||
XFS_ERRLEVEL_LOW, mp, dicp);
|
||||
xfs_buf_relse(bp);
|
||||
xfs_alert(mp,
|
||||
"%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, "
|
||||
"dino bp 0x%p, ino %Ld, forkoff 0x%x", __func__,
|
||||
item, dip, bp, in_f->ilf_ino, dicp->di_forkoff);
|
||||
error = EFSCORRUPTED;
|
||||
goto error;
|
||||
goto out_release;
|
||||
}
|
||||
isize = xfs_icdinode_size(dicp->di_version);
|
||||
if (unlikely(item->ri_buf[1].i_len > isize)) {
|
||||
XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)",
|
||||
XFS_ERRLEVEL_LOW, mp, dicp);
|
||||
xfs_buf_relse(bp);
|
||||
xfs_alert(mp,
|
||||
"%s: Bad inode log record length %d, rec ptr 0x%p",
|
||||
__func__, item->ri_buf[1].i_len, item);
|
||||
error = EFSCORRUPTED;
|
||||
goto error;
|
||||
goto out_release;
|
||||
}
|
||||
|
||||
/* The core is in in-core format */
|
||||
@ -2842,7 +2912,7 @@ xlog_recover_inode_pass2(
|
||||
}
|
||||
|
||||
if (in_f->ilf_size == 2)
|
||||
goto write_inode_buffer;
|
||||
goto out_owner_change;
|
||||
len = item->ri_buf[2].i_len;
|
||||
src = item->ri_buf[2].i_addr;
|
||||
ASSERT(in_f->ilf_size <= 4);
|
||||
@ -2903,13 +2973,15 @@ xlog_recover_inode_pass2(
|
||||
default:
|
||||
xfs_warn(log->l_mp, "%s: Invalid flag", __func__);
|
||||
ASSERT(0);
|
||||
xfs_buf_relse(bp);
|
||||
error = EIO;
|
||||
goto error;
|
||||
goto out_release;
|
||||
}
|
||||
}
|
||||
|
||||
write_inode_buffer:
|
||||
out_owner_change:
|
||||
if (in_f->ilf_fields & (XFS_ILOG_DOWNER|XFS_ILOG_AOWNER))
|
||||
error = xfs_recover_inode_owner_change(mp, dip, in_f,
|
||||
buffer_list);
|
||||
/* re-generate the checksum. */
|
||||
xfs_dinode_calc_crc(log->l_mp, dip);
|
||||
|
||||
|
@ -41,6 +41,7 @@
|
||||
#include "xfs_trans_space.h"
|
||||
#include "xfs_trace.h"
|
||||
#include "xfs_symlink.h"
|
||||
#include "xfs_buf_item.h"
|
||||
|
||||
/* ----- Kernel only functions below ----- */
|
||||
STATIC int
|
||||
@ -363,6 +364,7 @@ xfs_symlink(
|
||||
pathlen -= byte_cnt;
|
||||
offset += byte_cnt;
|
||||
|
||||
xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SYMLINK_BUF);
|
||||
xfs_trans_log_buf(tp, bp, 0, (buf + byte_cnt - 1) -
|
||||
(char *)bp->b_addr);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user