diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c index 061cc01245a9..4a078e07e1a0 100644 --- a/fs/xfs/libxfs/xfs_defer.c +++ b/fs/xfs/libxfs/xfs_defer.c @@ -1092,7 +1092,11 @@ xfs_defer_ops_continue( ASSERT(!(tp->t_flags & XFS_TRANS_DIRTY)); /* Lock the captured resources to the new transaction. */ - if (dfc->dfc_held.dr_inos == 2) + if (dfc->dfc_held.dr_inos > 2) { + xfs_sort_inodes(dfc->dfc_held.dr_ip, dfc->dfc_held.dr_inos); + xfs_lock_inodes(dfc->dfc_held.dr_ip, dfc->dfc_held.dr_inos, + XFS_ILOCK_EXCL); + } else if (dfc->dfc_held.dr_inos == 2) xfs_lock_two_inodes(dfc->dfc_held.dr_ip[0], XFS_ILOCK_EXCL, dfc->dfc_held.dr_ip[1], XFS_ILOCK_EXCL); else if (dfc->dfc_held.dr_inos == 1) diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h index 81cca60d70a3..8b338031e487 100644 --- a/fs/xfs/libxfs/xfs_defer.h +++ b/fs/xfs/libxfs/xfs_defer.h @@ -77,7 +77,13 @@ extern const struct xfs_defer_op_type xfs_exchmaps_defer_type; /* * Deferred operation item relogging limits. */ -#define XFS_DEFER_OPS_NR_INODES 2 /* join up to two inodes */ + +/* + * Rename w/ parent pointers can require up to 5 inodes with deferred ops to + * be joined to the transaction: src_dp, target_dp, src_ip, target_ip, and wip. + * These inodes are locked in sorted order by their inode numbers + */ +#define XFS_DEFER_OPS_NR_INODES 5 #define XFS_DEFER_OPS_NR_BUFS 2 /* join up to two buffers */ /* Resources that must be held across a transaction roll. */ diff --git a/fs/xfs/scrub/rmap_repair.c b/fs/xfs/scrub/rmap_repair.c index e8e07b683eab..25acd69614c2 100644 --- a/fs/xfs/scrub/rmap_repair.c +++ b/fs/xfs/scrub/rmap_repair.c @@ -578,23 +578,9 @@ xrep_rmap_scan_inode( struct xrep_rmap *rr, struct xfs_inode *ip) { - unsigned int lock_mode = 0; + unsigned int lock_mode = xrep_rmap_scan_ilock(ip); int error; - /* - * Directory updates (create/link/unlink/rename) drop the directory's - * ILOCK before finishing any rmapbt updates associated with directory - * shape changes. For this scan to coordinate correctly with the live - * update hook, we must take the only lock (i_rwsem) that is held all - * the way to dir op completion. This will get fixed by the parent - * pointer patchset. - */ - if (S_ISDIR(VFS_I(ip)->i_mode)) { - lock_mode = XFS_IOLOCK_SHARED; - xfs_ilock(ip, lock_mode); - } - lock_mode |= xrep_rmap_scan_ilock(ip); - /* Check the data fork. */ error = xrep_rmap_scan_ifork(rr, ip, XFS_DATA_FORK); if (error) diff --git a/fs/xfs/scrub/tempfile.c b/fs/xfs/scrub/tempfile.c index c72e447eb8ec..6f39504a216e 100644 --- a/fs/xfs/scrub/tempfile.c +++ b/fs/xfs/scrub/tempfile.c @@ -153,6 +153,7 @@ xrep_tempfile_create( xfs_qm_dqrele(pdqp); /* Finish setting up the incore / vfs context. */ + xfs_iunlock(sc->tempip, XFS_ILOCK_EXCL); xfs_setup_iops(sc->tempip); xfs_finish_inode_setup(sc->tempip); @@ -168,6 +169,7 @@ out_release_inode: * transactions and deadlocks from xfs_inactive. */ if (sc->tempip) { + xfs_iunlock(sc->tempip, XFS_ILOCK_EXCL); xfs_finish_inode_setup(sc->tempip); xchk_irele(sc, sc->tempip); } diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index c98cb468c357..13aba84bd64a 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -1371,6 +1371,47 @@ xfs_dqlock2( } } +static int +xfs_dqtrx_cmp( + const void *a, + const void *b) +{ + const struct xfs_dqtrx *qa = a; + const struct xfs_dqtrx *qb = b; + + if (qa->qt_dquot->q_id > qb->qt_dquot->q_id) + return 1; + if (qa->qt_dquot->q_id < qb->qt_dquot->q_id) + return -1; + return 0; +} + +void +xfs_dqlockn( + struct xfs_dqtrx *q) +{ + unsigned int i; + + BUILD_BUG_ON(XFS_QM_TRANS_MAXDQS > MAX_LOCKDEP_SUBCLASSES); + + /* Sort in order of dquot id, do not allow duplicates */ + for (i = 0; i < XFS_QM_TRANS_MAXDQS && q[i].qt_dquot != NULL; i++) { + unsigned int j; + + for (j = 0; j < i; j++) + ASSERT(q[i].qt_dquot != q[j].qt_dquot); + } + if (i == 0) + return; + + sort(q, i, sizeof(struct xfs_dqtrx), xfs_dqtrx_cmp, NULL); + + mutex_lock(&q[0].qt_dquot->q_qlock); + for (i = 1; i < XFS_QM_TRANS_MAXDQS && q[i].qt_dquot != NULL; i++) + mutex_lock_nested(&q[i].qt_dquot->q_qlock, + XFS_QLOCK_NESTED + i - 1); +} + int __init xfs_qm_init(void) { diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h index 956272d9b302..677bb2dc9ac9 100644 --- a/fs/xfs/xfs_dquot.h +++ b/fs/xfs/xfs_dquot.h @@ -223,6 +223,7 @@ int xfs_qm_dqget_uncached(struct xfs_mount *mp, void xfs_qm_dqput(struct xfs_dquot *dqp); void xfs_dqlock2(struct xfs_dquot *, struct xfs_dquot *); +void xfs_dqlockn(struct xfs_dqtrx *q); void xfs_dquot_set_prealloc_limits(struct xfs_dquot *); diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 03dcb4ac0431..2aec7ab59aeb 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -418,7 +418,7 @@ xfs_lock_inumorder( * lock more than one at a time, lockdep will report false positives saying we * have violated locking orders. */ -static void +void xfs_lock_inodes( struct xfs_inode **ips, int inodes, @@ -747,6 +747,8 @@ xfs_inode_inherit_flags2( /* * Initialise a newly allocated inode and return the in-core inode to the * caller locked exclusively. + * + * Caller is responsible for unlocking the inode manually upon return */ int xfs_init_new_inode( @@ -873,7 +875,7 @@ xfs_init_new_inode( /* * Log the new values stuffed into the inode. */ - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, ip, 0); xfs_trans_log_inode(tp, ip, flags); /* now that we have an i_mode we can setup the inode structure */ @@ -1101,8 +1103,7 @@ xfs_create( * the transaction cancel unlocking dp so don't do it explicitly in the * error path. */ - xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); - unlock_dp_on_error = false; + xfs_trans_ijoin(tp, dp, 0); error = xfs_dir_createname(tp, dp, name, ip->i_ino, resblks - XFS_IALLOC_SPACE_RES(mp)); @@ -1151,6 +1152,8 @@ xfs_create( xfs_qm_dqrele(pdqp); *ipp = ip; + xfs_iunlock(ip, XFS_ILOCK_EXCL); + xfs_iunlock(dp, XFS_ILOCK_EXCL); return 0; out_trans_cancel: @@ -1162,6 +1165,7 @@ xfs_create( * transactions and deadlocks from xfs_inactive. */ if (ip) { + xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_finish_inode_setup(ip); xfs_irele(ip); } @@ -1247,6 +1251,7 @@ xfs_create_tmpfile( xfs_qm_dqrele(pdqp); *ipp = ip; + xfs_iunlock(ip, XFS_ILOCK_EXCL); return 0; out_trans_cancel: @@ -1258,6 +1263,7 @@ xfs_create_tmpfile( * transactions and deadlocks from xfs_inactive. */ if (ip) { + xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_finish_inode_setup(ip); xfs_irele(ip); } @@ -1362,10 +1368,15 @@ xfs_link( if (xfs_has_wsync(mp) || xfs_has_dirsync(mp)) xfs_trans_set_sync(tp); - return xfs_trans_commit(tp); + error = xfs_trans_commit(tp); + xfs_iunlock(tdp, XFS_ILOCK_EXCL); + xfs_iunlock(sip, XFS_ILOCK_EXCL); + return error; error_return: xfs_trans_cancel(tp); + xfs_iunlock(tdp, XFS_ILOCK_EXCL); + xfs_iunlock(sip, XFS_ILOCK_EXCL); std_return: if (error == -ENOSPC && nospace_error) error = nospace_error; @@ -2775,19 +2786,39 @@ xfs_remove( error = xfs_trans_commit(tp); if (error) - goto std_return; + goto out_unlock; if (is_dir && xfs_inode_is_filestream(ip)) xfs_filestream_deassociate(ip); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + xfs_iunlock(dp, XFS_ILOCK_EXCL); return 0; out_trans_cancel: xfs_trans_cancel(tp); + out_unlock: + xfs_iunlock(ip, XFS_ILOCK_EXCL); + xfs_iunlock(dp, XFS_ILOCK_EXCL); std_return: return error; } +static inline void +xfs_iunlock_rename( + struct xfs_inode **i_tab, + int num_inodes) +{ + int i; + + for (i = num_inodes - 1; i >= 0; i--) { + /* Skip duplicate inodes if src and target dps are the same */ + if (!i_tab[i] || (i > 0 && i_tab[i] == i_tab[i - 1])) + continue; + xfs_iunlock(i_tab[i], XFS_ILOCK_EXCL); + } +} + /* * Enter all inodes for a rename transaction into a sorted array. */ @@ -2802,7 +2833,7 @@ xfs_sort_for_rename( struct xfs_inode **i_tab,/* out: sorted array of inodes */ int *num_inodes) /* in/out: inodes in array */ { - int i, j; + int i; ASSERT(*num_inodes == __XFS_SORT_INODES); memset(i_tab, 0, *num_inodes * sizeof(struct xfs_inode *)); @@ -2824,17 +2855,26 @@ xfs_sort_for_rename( i_tab[i++] = wip; *num_inodes = i; + xfs_sort_inodes(i_tab, *num_inodes); +} + +void +xfs_sort_inodes( + struct xfs_inode **i_tab, + unsigned int num_inodes) +{ + int i, j; + + ASSERT(num_inodes <= __XFS_SORT_INODES); + /* * Sort the elements via bubble sort. (Remember, there are at * most 5 elements to sort, so this is adequate.) */ - for (i = 0; i < *num_inodes; i++) { - for (j = 1; j < *num_inodes; j++) { - if (i_tab[j]->i_ino < i_tab[j-1]->i_ino) { - struct xfs_inode *temp = i_tab[j]; - i_tab[j] = i_tab[j-1]; - i_tab[j-1] = temp; - } + for (i = 0; i < num_inodes; i++) { + for (j = 1; j < num_inodes; j++) { + if (i_tab[j]->i_ino < i_tab[j-1]->i_ino) + swap(i_tab[j], i_tab[j - 1]); } } } @@ -3088,8 +3128,10 @@ retry: * Attach the dquots to the inodes */ error = xfs_qm_vop_rename_dqattach(inodes); - if (error) - goto out_trans_cancel; + if (error) { + xfs_trans_cancel(tp); + goto out_release_wip; + } /* * Lock all the participating inodes. Depending upon whether @@ -3100,18 +3142,16 @@ retry: xfs_lock_inodes(inodes, num_inodes, XFS_ILOCK_EXCL); /* - * Join all the inodes to the transaction. From this point on, - * we can rely on either trans_commit or trans_cancel to unlock - * them. + * Join all the inodes to the transaction. */ - xfs_trans_ijoin(tp, src_dp, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, src_dp, 0); if (new_parent) - xfs_trans_ijoin(tp, target_dp, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, target_dp, 0); + xfs_trans_ijoin(tp, src_ip, 0); if (target_ip) - xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, target_ip, 0); if (wip) - xfs_trans_ijoin(tp, wip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, wip, 0); /* * If we are using project inheritance, we only allow renames @@ -3125,10 +3165,13 @@ retry: } /* RENAME_EXCHANGE is unique from here on. */ - if (flags & RENAME_EXCHANGE) - return xfs_cross_rename(tp, src_dp, src_name, src_ip, + if (flags & RENAME_EXCHANGE) { + error = xfs_cross_rename(tp, src_dp, src_name, src_ip, target_dp, target_name, target_ip, spaceres); + xfs_iunlock_rename(inodes, num_inodes); + return error; + } /* * Try to reserve quota to handle an expansion of the target directory. @@ -3142,6 +3185,7 @@ retry: if (error == -EDQUOT || error == -ENOSPC) { if (!retried) { xfs_trans_cancel(tp); + xfs_iunlock_rename(inodes, num_inodes); xfs_blockgc_free_quota(target_dp, 0); retried = true; goto retry; @@ -3368,12 +3412,14 @@ retry: xfs_dir_update_hook(src_dp, wip, 1, src_name); error = xfs_finish_rename(tp); + xfs_iunlock_rename(inodes, num_inodes); if (wip) xfs_irele(wip); return error; out_trans_cancel: xfs_trans_cancel(tp); + xfs_iunlock_rename(inodes, num_inodes); out_release_wip: if (wip) xfs_irele(wip); diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index c74c48bc0945..a6da1ab8ab13 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -627,6 +627,8 @@ int xfs_ilock2_io_mmap(struct xfs_inode *ip1, struct xfs_inode *ip2); void xfs_iunlock2_io_mmap(struct xfs_inode *ip1, struct xfs_inode *ip2); void xfs_iunlock2_remapping(struct xfs_inode *ip1, struct xfs_inode *ip2); void xfs_bumplink(struct xfs_trans *tp, struct xfs_inode *ip); +void xfs_lock_inodes(struct xfs_inode **ips, int inodes, uint lock_mode); +void xfs_sort_inodes(struct xfs_inode **i_tab, unsigned int num_inodes); static inline bool xfs_inode_unlinked_incomplete( diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 0f4cf4170c35..47120b745c47 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -836,8 +836,10 @@ xfs_qm_qino_alloc( ASSERT(xfs_is_shutdown(mp)); xfs_alert(mp, "%s failed (error %d)!", __func__, error); } - if (need_alloc) + if (need_alloc) { + xfs_iunlock(*ipp, XFS_ILOCK_EXCL); xfs_finish_inode_setup(*ipp); + } return error; } diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h index f5993012bf98..6e09dfcd13e2 100644 --- a/fs/xfs/xfs_qm.h +++ b/fs/xfs/xfs_qm.h @@ -136,7 +136,7 @@ enum { XFS_QM_TRANS_PRJ, XFS_QM_TRANS_DQTYPES }; -#define XFS_QM_TRANS_MAXDQS 2 +#define XFS_QM_TRANS_MAXDQS 5 struct xfs_dquot_acct { struct xfs_dqtrx dqs[XFS_QM_TRANS_DQTYPES][XFS_QM_TRANS_MAXDQS]; }; diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c index fb060aaf6d40..85ef56fdd7df 100644 --- a/fs/xfs/xfs_symlink.c +++ b/fs/xfs/xfs_symlink.c @@ -172,8 +172,7 @@ xfs_symlink( * the transaction cancel unlocking dp so don't do it explicitly in the * error path. */ - xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); - unlock_dp_on_error = false; + xfs_trans_ijoin(tp, dp, 0); /* * Also attach the dquot(s) to it, if applicable. @@ -215,6 +214,8 @@ xfs_symlink( xfs_qm_dqrele(pdqp); *ipp = ip; + xfs_iunlock(ip, XFS_ILOCK_EXCL); + xfs_iunlock(dp, XFS_ILOCK_EXCL); return 0; out_trans_cancel: @@ -226,6 +227,7 @@ out_release_inode: * transactions and deadlocks from xfs_inactive. */ if (ip) { + xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_finish_inode_setup(ip); xfs_irele(ip); } diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 7350640059cc..50d878d78a5e 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -1430,6 +1430,8 @@ out_cancel: * The caller must ensure that the on-disk dquots attached to this inode have * already been allocated and initialized. The ILOCKs will be dropped when the * transaction is committed or cancelled. + * + * Caller is responsible for unlocking the inodes manually upon return */ int xfs_trans_alloc_dir( @@ -1460,8 +1462,8 @@ retry: xfs_lock_two_inodes(dp, XFS_ILOCK_EXCL, ip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, dp, 0); + xfs_trans_ijoin(tp, ip, 0); error = xfs_qm_dqattach_locked(dp, false); if (error) { @@ -1484,6 +1486,9 @@ retry: if (error == -EDQUOT || error == -ENOSPC) { if (!retried) { xfs_trans_cancel(tp); + xfs_iunlock(dp, XFS_ILOCK_EXCL); + if (dp != ip) + xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_blockgc_free_quota(dp, 0); retried = true; goto retry; diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c index 577b535a595c..b368e13424c4 100644 --- a/fs/xfs/xfs_trans_dquot.c +++ b/fs/xfs/xfs_trans_dquot.c @@ -379,24 +379,29 @@ xfs_trans_mod_dquot( /* * Given an array of dqtrx structures, lock all the dquots associated and join - * them to the transaction, provided they have been modified. We know that the - * highest number of dquots of one type - usr, grp and prj - involved in a - * transaction is 3 so we don't need to make this very generic. + * them to the transaction, provided they have been modified. */ STATIC void xfs_trans_dqlockedjoin( struct xfs_trans *tp, struct xfs_dqtrx *q) { + unsigned int i; ASSERT(q[0].qt_dquot != NULL); if (q[1].qt_dquot == NULL) { xfs_dqlock(q[0].qt_dquot); xfs_trans_dqjoin(tp, q[0].qt_dquot); - } else { - ASSERT(XFS_QM_TRANS_MAXDQS == 2); + } else if (q[2].qt_dquot == NULL) { xfs_dqlock2(q[0].qt_dquot, q[1].qt_dquot); xfs_trans_dqjoin(tp, q[0].qt_dquot); xfs_trans_dqjoin(tp, q[1].qt_dquot); + } else { + xfs_dqlockn(q); + for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) { + if (q[i].qt_dquot == NULL) + break; + xfs_trans_dqjoin(tp, q[i].qt_dquot); + } } }