Merge tag 'xfs-for-linus-4.7-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs

Pull xfs updates from Dave Chinner:
 "A pretty average collection of fixes, cleanups and improvements in
  this request.

  Summary:
   - fixes for mount line parsing, sparse warnings, read-only compat
     feature remount behaviour
   - allow fast path symlink lookups for inline symlinks.
   - attribute listing cleanups
   - writeback goes direct to bios rather than indirecting through
     bufferheads
   - transaction allocation cleanup
   - optimised kmem_realloc
   - added configurable error handling for metadata write errors,
     changed default error handling behaviour from "retry forever" to
     "retry until unmount then fail"
   - fixed several inode cluster writeback lookup vs reclaim race
     conditions
   - fixed inode cluster writeback checking wrong inode after lookup
   - fixed bugs where struct xfs_inode freeing wasn't actually RCU safe
   - cleaned up inode reclaim tagging"

* tag 'xfs-for-linus-4.7-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs: (39 commits)
  xfs: fix warning in xfs_finish_page_writeback for non-debug builds
  xfs: move reclaim tagging functions
  xfs: simplify inode reclaim tagging interfaces
  xfs: rename variables in xfs_iflush_cluster for clarity
  xfs: xfs_iflush_cluster has range issues
  xfs: mark reclaimed inodes invalid earlier
  xfs: xfs_inode_free() isn't RCU safe
  xfs: optimise xfs_iext_destroy
  xfs: skip stale inodes in xfs_iflush_cluster
  xfs: fix inode validity check in xfs_iflush_cluster
  xfs: xfs_iflush_cluster fails to abort on error
  xfs: remove xfs_fs_evict_inode()
  xfs: add "fail at unmount" error handling configuration
  xfs: add configuration handlers for specific errors
  xfs: add configuration of error failure speed
  xfs: introduce table-based init for error behaviors
  xfs: add configurable error support to metadata buffers
  xfs: introduce metadata IO error class
  xfs: configurable error behavior via sysfs
  xfs: buffer ->bi_end_io function requires irq-safe lock
  ...
This commit is contained in:
Linus Torvalds
2016-05-26 10:13:40 -07:00
49 changed files with 1295 additions and 1137 deletions

View File

@ -4542,7 +4542,6 @@ int readlink_copy(char __user *buffer, int buflen, const char *link)
out: out:
return len; return len;
} }
EXPORT_SYMBOL(readlink_copy);
/* /*
* A helper for ->readlink(). This should be used *ONLY* for symlinks that * A helper for ->readlink(). This should be used *ONLY* for symlinks that

View File

@ -93,19 +93,23 @@ kmem_zalloc_large(size_t size, xfs_km_flags_t flags)
} }
void * void *
kmem_realloc(const void *ptr, size_t newsize, size_t oldsize, kmem_realloc(const void *old, size_t newsize, xfs_km_flags_t flags)
xfs_km_flags_t flags)
{ {
void *new; int retries = 0;
gfp_t lflags = kmem_flags_convert(flags);
void *ptr;
new = kmem_alloc(newsize, flags); do {
if (ptr) { ptr = krealloc(old, newsize, lflags);
if (new) if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP)))
memcpy(new, ptr, return ptr;
((oldsize < newsize) ? oldsize : newsize)); if (!(++retries % 100))
kmem_free(ptr); xfs_err(NULL,
} "%s(%u) possible memory allocation deadlock size %zu in %s (mode:0x%x)",
return new; current->comm, current->pid,
newsize, __func__, lflags);
congestion_wait(BLK_RW_ASYNC, HZ/50);
} while (1);
} }
void * void *

View File

@ -62,7 +62,7 @@ kmem_flags_convert(xfs_km_flags_t flags)
extern void *kmem_alloc(size_t, xfs_km_flags_t); extern void *kmem_alloc(size_t, xfs_km_flags_t);
extern void *kmem_zalloc_large(size_t size, xfs_km_flags_t); extern void *kmem_zalloc_large(size_t size, xfs_km_flags_t);
extern void *kmem_realloc(const void *, size_t, size_t, xfs_km_flags_t); extern void *kmem_realloc(const void *, size_t, xfs_km_flags_t);
static inline void kmem_free(const void *ptr) static inline void kmem_free(const void *ptr)
{ {
kvfree(ptr); kvfree(ptr);

View File

@ -242,37 +242,21 @@ xfs_attr_set(
return error; return error;
} }
/* tres.tr_logres = M_RES(mp)->tr_attrsetm.tr_logres +
* Start our first transaction of the day. M_RES(mp)->tr_attrsetrt.tr_logres * args.total;
* tres.tr_logcount = XFS_ATTRSET_LOG_COUNT;
* All future transactions during this code must be "chained" off tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;
* this one via the trans_dup() call. All transactions will contain
* the inode, and the inode will always be marked with trans_ihold().
* Since the inode will be locked in all transactions, we must log
* the inode in every transaction to let it float upward through
* the log.
*/
args.trans = xfs_trans_alloc(mp, XFS_TRANS_ATTR_SET);
/* /*
* Root fork attributes can use reserved data blocks for this * Root fork attributes can use reserved data blocks for this
* operation if necessary * operation if necessary
*/ */
error = xfs_trans_alloc(mp, &tres, args.total, 0,
if (rsvd) rsvd ? XFS_TRANS_RESERVE : 0, &args.trans);
args.trans->t_flags |= XFS_TRANS_RESERVE; if (error)
tres.tr_logres = M_RES(mp)->tr_attrsetm.tr_logres +
M_RES(mp)->tr_attrsetrt.tr_logres * args.total;
tres.tr_logcount = XFS_ATTRSET_LOG_COUNT;
tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;
error = xfs_trans_reserve(args.trans, &tres, args.total, 0);
if (error) {
xfs_trans_cancel(args.trans);
return error; return error;
}
xfs_ilock(dp, XFS_ILOCK_EXCL);
xfs_ilock(dp, XFS_ILOCK_EXCL);
error = xfs_trans_reserve_quota_nblks(args.trans, dp, args.total, 0, error = xfs_trans_reserve_quota_nblks(args.trans, dp, args.total, 0,
rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES : rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
XFS_QMOPT_RES_REGBLKS); XFS_QMOPT_RES_REGBLKS);
@ -428,32 +412,16 @@ xfs_attr_remove(
if (error) if (error)
return error; return error;
/*
* Start our first transaction of the day.
*
* All future transactions during this code must be "chained" off
* this one via the trans_dup() call. All transactions will contain
* the inode, and the inode will always be marked with trans_ihold().
* Since the inode will be locked in all transactions, we must log
* the inode in every transaction to let it float upward through
* the log.
*/
args.trans = xfs_trans_alloc(mp, XFS_TRANS_ATTR_RM);
/* /*
* Root fork attributes can use reserved data blocks for this * Root fork attributes can use reserved data blocks for this
* operation if necessary * operation if necessary
*/ */
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_attrrm,
if (flags & ATTR_ROOT) XFS_ATTRRM_SPACE_RES(mp), 0,
args.trans->t_flags |= XFS_TRANS_RESERVE; (flags & ATTR_ROOT) ? XFS_TRANS_RESERVE : 0,
&args.trans);
error = xfs_trans_reserve(args.trans, &M_RES(mp)->tr_attrrm, if (error)
XFS_ATTRRM_SPACE_RES(mp), 0);
if (error) {
xfs_trans_cancel(args.trans);
return error; return error;
}
xfs_ilock(dp, XFS_ILOCK_EXCL); xfs_ilock(dp, XFS_ILOCK_EXCL);
/* /*

View File

@ -1121,15 +1121,14 @@ xfs_bmap_add_attrfork(
mp = ip->i_mount; mp = ip->i_mount;
ASSERT(!XFS_NOT_DQATTACHED(mp, ip)); ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
tp = xfs_trans_alloc(mp, XFS_TRANS_ADDAFORK);
blks = XFS_ADDAFORK_SPACE_RES(mp); blks = XFS_ADDAFORK_SPACE_RES(mp);
if (rsvd)
tp->t_flags |= XFS_TRANS_RESERVE; error = xfs_trans_alloc(mp, &M_RES(mp)->tr_addafork, blks, 0,
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_addafork, blks, 0); rsvd ? XFS_TRANS_RESERVE : 0, &tp);
if (error) { if (error)
xfs_trans_cancel(tp);
return error; return error;
}
xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_ilock(ip, XFS_ILOCK_EXCL);
error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ? error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ?
XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES : XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
@ -6026,13 +6025,10 @@ xfs_bmap_split_extent(
xfs_fsblock_t firstfsb; xfs_fsblock_t firstfsb;
int error; int error;
tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write,
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, 0, &tp);
XFS_DIOSTRAT_SPACE_RES(mp, 0), 0); if (error)
if (error) {
xfs_trans_cancel(tp);
return error; return error;
}
xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_ilock(ip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);

View File

@ -257,15 +257,12 @@ xfs_dir2_block_to_sf(
* *
* Convert the inode to local format and copy the data in. * Convert the inode to local format and copy the data in.
*/ */
dp->i_df.if_flags &= ~XFS_IFEXTENTS;
dp->i_df.if_flags |= XFS_IFINLINE;
dp->i_d.di_format = XFS_DINODE_FMT_LOCAL;
ASSERT(dp->i_df.if_bytes == 0); ASSERT(dp->i_df.if_bytes == 0);
xfs_idata_realloc(dp, size, XFS_DATA_FORK); xfs_init_local_fork(dp, XFS_DATA_FORK, dst, size);
dp->i_d.di_format = XFS_DINODE_FMT_LOCAL;
dp->i_d.di_size = size;
logflags |= XFS_ILOG_DDATA; logflags |= XFS_ILOG_DDATA;
memcpy(dp->i_df.if_u1.if_data, dst, size);
dp->i_d.di_size = size;
xfs_dir2_sf_check(args); xfs_dir2_sf_check(args);
out: out:
xfs_trans_log_inode(args->trans, dp, logflags); xfs_trans_log_inode(args->trans, dp, logflags);

View File

@ -231,6 +231,48 @@ xfs_iformat_fork(
return error; return error;
} }
void
xfs_init_local_fork(
struct xfs_inode *ip,
int whichfork,
const void *data,
int size)
{
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
int mem_size = size, real_size = 0;
bool zero_terminate;
/*
* If we are using the local fork to store a symlink body we need to
* zero-terminate it so that we can pass it back to the VFS directly.
* Overallocate the in-memory fork by one for that and add a zero
* to terminate it below.
*/
zero_terminate = S_ISLNK(VFS_I(ip)->i_mode);
if (zero_terminate)
mem_size++;
if (size == 0)
ifp->if_u1.if_data = NULL;
else if (mem_size <= sizeof(ifp->if_u2.if_inline_data))
ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
else {
real_size = roundup(mem_size, 4);
ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP | KM_NOFS);
}
if (size) {
memcpy(ifp->if_u1.if_data, data, size);
if (zero_terminate)
ifp->if_u1.if_data[size] = '\0';
}
ifp->if_bytes = size;
ifp->if_real_bytes = real_size;
ifp->if_flags &= ~(XFS_IFEXTENTS | XFS_IFBROOT);
ifp->if_flags |= XFS_IFINLINE;
}
/* /*
* The file is in-lined in the on-disk inode. * The file is in-lined in the on-disk inode.
* If it fits into if_inline_data, then copy * If it fits into if_inline_data, then copy
@ -248,8 +290,6 @@ xfs_iformat_local(
int whichfork, int whichfork,
int size) int size)
{ {
xfs_ifork_t *ifp;
int real_size;
/* /*
* If the size is unreasonable, then something * If the size is unreasonable, then something
@ -265,22 +305,8 @@ xfs_iformat_local(
ip->i_mount, dip); ip->i_mount, dip);
return -EFSCORRUPTED; return -EFSCORRUPTED;
} }
ifp = XFS_IFORK_PTR(ip, whichfork);
real_size = 0; xfs_init_local_fork(ip, whichfork, XFS_DFORK_PTR(dip, whichfork), size);
if (size == 0)
ifp->if_u1.if_data = NULL;
else if (size <= sizeof(ifp->if_u2.if_inline_data))
ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
else {
real_size = roundup(size, 4);
ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP | KM_NOFS);
}
ifp->if_bytes = size;
ifp->if_real_bytes = real_size;
if (size)
memcpy(ifp->if_u1.if_data, XFS_DFORK_PTR(dip, whichfork), size);
ifp->if_flags &= ~XFS_IFEXTENTS;
ifp->if_flags |= XFS_IFINLINE;
return 0; return 0;
} }
@ -516,7 +542,6 @@ xfs_iroot_realloc(
new_max = cur_max + rec_diff; new_max = cur_max + rec_diff;
new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max); new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max);
ifp->if_broot = kmem_realloc(ifp->if_broot, new_size, ifp->if_broot = kmem_realloc(ifp->if_broot, new_size,
XFS_BMAP_BROOT_SPACE_CALC(mp, cur_max),
KM_SLEEP | KM_NOFS); KM_SLEEP | KM_NOFS);
op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
ifp->if_broot_bytes); ifp->if_broot_bytes);
@ -660,7 +685,6 @@ xfs_idata_realloc(
ifp->if_u1.if_data = ifp->if_u1.if_data =
kmem_realloc(ifp->if_u1.if_data, kmem_realloc(ifp->if_u1.if_data,
real_size, real_size,
ifp->if_real_bytes,
KM_SLEEP | KM_NOFS); KM_SLEEP | KM_NOFS);
} }
} else { } else {
@ -1376,8 +1400,7 @@ xfs_iext_realloc_direct(
if (rnew_size != ifp->if_real_bytes) { if (rnew_size != ifp->if_real_bytes) {
ifp->if_u1.if_extents = ifp->if_u1.if_extents =
kmem_realloc(ifp->if_u1.if_extents, kmem_realloc(ifp->if_u1.if_extents,
rnew_size, rnew_size, KM_NOFS);
ifp->if_real_bytes, KM_NOFS);
} }
if (rnew_size > ifp->if_real_bytes) { if (rnew_size > ifp->if_real_bytes) {
memset(&ifp->if_u1.if_extents[ifp->if_bytes / memset(&ifp->if_u1.if_extents[ifp->if_bytes /
@ -1461,9 +1484,8 @@ xfs_iext_realloc_indirect(
if (new_size == 0) { if (new_size == 0) {
xfs_iext_destroy(ifp); xfs_iext_destroy(ifp);
} else { } else {
ifp->if_u1.if_ext_irec = (xfs_ext_irec_t *) ifp->if_u1.if_ext_irec =
kmem_realloc(ifp->if_u1.if_ext_irec, kmem_realloc(ifp->if_u1.if_ext_irec, new_size, KM_NOFS);
new_size, size, KM_NOFS);
} }
} }
@ -1496,6 +1518,24 @@ xfs_iext_indirect_to_direct(
} }
} }
/*
* Remove all records from the indirection array.
*/
STATIC void
xfs_iext_irec_remove_all(
struct xfs_ifork *ifp)
{
int nlists;
int i;
ASSERT(ifp->if_flags & XFS_IFEXTIREC);
nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
for (i = 0; i < nlists; i++)
kmem_free(ifp->if_u1.if_ext_irec[i].er_extbuf);
kmem_free(ifp->if_u1.if_ext_irec);
ifp->if_flags &= ~XFS_IFEXTIREC;
}
/* /*
* Free incore file extents. * Free incore file extents.
*/ */
@ -1504,14 +1544,7 @@ xfs_iext_destroy(
xfs_ifork_t *ifp) /* inode fork pointer */ xfs_ifork_t *ifp) /* inode fork pointer */
{ {
if (ifp->if_flags & XFS_IFEXTIREC) { if (ifp->if_flags & XFS_IFEXTIREC) {
int erp_idx; xfs_iext_irec_remove_all(ifp);
int nlists;
nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
for (erp_idx = nlists - 1; erp_idx >= 0 ; erp_idx--) {
xfs_iext_irec_remove(ifp, erp_idx);
}
ifp->if_flags &= ~XFS_IFEXTIREC;
} else if (ifp->if_real_bytes) { } else if (ifp->if_real_bytes) {
kmem_free(ifp->if_u1.if_extents); kmem_free(ifp->if_u1.if_extents);
} else if (ifp->if_bytes) { } else if (ifp->if_bytes) {

View File

@ -134,6 +134,7 @@ void xfs_iroot_realloc(struct xfs_inode *, int, int);
int xfs_iread_extents(struct xfs_trans *, struct xfs_inode *, int); int xfs_iread_extents(struct xfs_trans *, struct xfs_inode *, int);
int xfs_iextents_copy(struct xfs_inode *, struct xfs_bmbt_rec *, int xfs_iextents_copy(struct xfs_inode *, struct xfs_bmbt_rec *,
int); int);
void xfs_init_local_fork(struct xfs_inode *, int, const void *, int);
struct xfs_bmbt_rec_host * struct xfs_bmbt_rec_host *
xfs_iext_get_ext(struct xfs_ifork *, xfs_extnum_t); xfs_iext_get_ext(struct xfs_ifork *, xfs_extnum_t);

View File

@ -211,6 +211,11 @@ typedef struct xfs_trans_header {
#define XFS_TRANS_HEADER_MAGIC 0x5452414e /* TRAN */ #define XFS_TRANS_HEADER_MAGIC 0x5452414e /* TRAN */
/*
* The only type valid for th_type in CIL-enabled file system logs:
*/
#define XFS_TRANS_CHECKPOINT 40
/* /*
* Log item types. * Log item types.
*/ */

View File

@ -838,12 +838,10 @@ xfs_sync_sb(
struct xfs_trans *tp; struct xfs_trans *tp;
int error; int error;
tp = _xfs_trans_alloc(mp, XFS_TRANS_SB_CHANGE, KM_SLEEP); error = xfs_trans_alloc(mp, &M_RES(mp)->tr_sb, 0, 0,
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_sb, 0, 0); XFS_TRANS_NO_WRITECOUNT, &tp);
if (error) { if (error)
xfs_trans_cancel(tp);
return error; return error;
}
xfs_log_sb(tp); xfs_log_sb(tp);
if (wait) if (wait)

View File

@ -55,103 +55,6 @@ extern const struct xfs_buf_ops xfs_sb_quiet_buf_ops;
extern const struct xfs_buf_ops xfs_symlink_buf_ops; extern const struct xfs_buf_ops xfs_symlink_buf_ops;
extern const struct xfs_buf_ops xfs_rtbuf_ops; extern const struct xfs_buf_ops xfs_rtbuf_ops;
/*
* Transaction types. Used to distinguish types of buffers. These never reach
* the log.
*/
#define XFS_TRANS_SETATTR_NOT_SIZE 1
#define XFS_TRANS_SETATTR_SIZE 2
#define XFS_TRANS_INACTIVE 3
#define XFS_TRANS_CREATE 4
#define XFS_TRANS_CREATE_TRUNC 5
#define XFS_TRANS_TRUNCATE_FILE 6
#define XFS_TRANS_REMOVE 7
#define XFS_TRANS_LINK 8
#define XFS_TRANS_RENAME 9
#define XFS_TRANS_MKDIR 10
#define XFS_TRANS_RMDIR 11
#define XFS_TRANS_SYMLINK 12
#define XFS_TRANS_SET_DMATTRS 13
#define XFS_TRANS_GROWFS 14
#define XFS_TRANS_STRAT_WRITE 15
#define XFS_TRANS_DIOSTRAT 16
/* 17 was XFS_TRANS_WRITE_SYNC */
#define XFS_TRANS_WRITEID 18
#define XFS_TRANS_ADDAFORK 19
#define XFS_TRANS_ATTRINVAL 20
#define XFS_TRANS_ATRUNCATE 21
#define XFS_TRANS_ATTR_SET 22
#define XFS_TRANS_ATTR_RM 23
#define XFS_TRANS_ATTR_FLAG 24
#define XFS_TRANS_CLEAR_AGI_BUCKET 25
#define XFS_TRANS_SB_CHANGE 26
/*
* Dummy entries since we use the transaction type to index into the
* trans_type[] in xlog_recover_print_trans_head()
*/
#define XFS_TRANS_DUMMY1 27
#define XFS_TRANS_DUMMY2 28
#define XFS_TRANS_QM_QUOTAOFF 29
#define XFS_TRANS_QM_DQALLOC 30
#define XFS_TRANS_QM_SETQLIM 31
#define XFS_TRANS_QM_DQCLUSTER 32
#define XFS_TRANS_QM_QINOCREATE 33
#define XFS_TRANS_QM_QUOTAOFF_END 34
#define XFS_TRANS_FSYNC_TS 35
#define XFS_TRANS_GROWFSRT_ALLOC 36
#define XFS_TRANS_GROWFSRT_ZERO 37
#define XFS_TRANS_GROWFSRT_FREE 38
#define XFS_TRANS_SWAPEXT 39
#define XFS_TRANS_CHECKPOINT 40
#define XFS_TRANS_ICREATE 41
#define XFS_TRANS_CREATE_TMPFILE 42
#define XFS_TRANS_TYPE_MAX 43
/* new transaction types need to be reflected in xfs_logprint(8) */
#define XFS_TRANS_TYPES \
{ XFS_TRANS_SETATTR_NOT_SIZE, "SETATTR_NOT_SIZE" }, \
{ XFS_TRANS_SETATTR_SIZE, "SETATTR_SIZE" }, \
{ XFS_TRANS_INACTIVE, "INACTIVE" }, \
{ XFS_TRANS_CREATE, "CREATE" }, \
{ XFS_TRANS_CREATE_TRUNC, "CREATE_TRUNC" }, \
{ XFS_TRANS_TRUNCATE_FILE, "TRUNCATE_FILE" }, \
{ XFS_TRANS_REMOVE, "REMOVE" }, \
{ XFS_TRANS_LINK, "LINK" }, \
{ XFS_TRANS_RENAME, "RENAME" }, \
{ XFS_TRANS_MKDIR, "MKDIR" }, \
{ XFS_TRANS_RMDIR, "RMDIR" }, \
{ XFS_TRANS_SYMLINK, "SYMLINK" }, \
{ XFS_TRANS_SET_DMATTRS, "SET_DMATTRS" }, \
{ XFS_TRANS_GROWFS, "GROWFS" }, \
{ XFS_TRANS_STRAT_WRITE, "STRAT_WRITE" }, \
{ XFS_TRANS_DIOSTRAT, "DIOSTRAT" }, \
{ XFS_TRANS_WRITEID, "WRITEID" }, \
{ XFS_TRANS_ADDAFORK, "ADDAFORK" }, \
{ XFS_TRANS_ATTRINVAL, "ATTRINVAL" }, \
{ XFS_TRANS_ATRUNCATE, "ATRUNCATE" }, \
{ XFS_TRANS_ATTR_SET, "ATTR_SET" }, \
{ XFS_TRANS_ATTR_RM, "ATTR_RM" }, \
{ XFS_TRANS_ATTR_FLAG, "ATTR_FLAG" }, \
{ XFS_TRANS_CLEAR_AGI_BUCKET, "CLEAR_AGI_BUCKET" }, \
{ XFS_TRANS_SB_CHANGE, "SBCHANGE" }, \
{ XFS_TRANS_DUMMY1, "DUMMY1" }, \
{ XFS_TRANS_DUMMY2, "DUMMY2" }, \
{ XFS_TRANS_QM_QUOTAOFF, "QM_QUOTAOFF" }, \
{ XFS_TRANS_QM_DQALLOC, "QM_DQALLOC" }, \
{ XFS_TRANS_QM_SETQLIM, "QM_SETQLIM" }, \
{ XFS_TRANS_QM_DQCLUSTER, "QM_DQCLUSTER" }, \
{ XFS_TRANS_QM_QINOCREATE, "QM_QINOCREATE" }, \
{ XFS_TRANS_QM_QUOTAOFF_END, "QM_QOFF_END" }, \
{ XFS_TRANS_FSYNC_TS, "FSYNC_TS" }, \
{ XFS_TRANS_GROWFSRT_ALLOC, "GROWFSRT_ALLOC" }, \
{ XFS_TRANS_GROWFSRT_ZERO, "GROWFSRT_ZERO" }, \
{ XFS_TRANS_GROWFSRT_FREE, "GROWFSRT_FREE" }, \
{ XFS_TRANS_SWAPEXT, "SWAPEXT" }, \
{ XFS_TRANS_CHECKPOINT, "CHECKPOINT" }, \
{ XFS_TRANS_ICREATE, "ICREATE" }, \
{ XFS_TRANS_CREATE_TMPFILE, "CREATE_TMPFILE" }, \
{ XLOG_UNMOUNT_REC_TYPE, "UNMOUNT" }
/* /*
* This structure is used to track log items associated with * This structure is used to track log items associated with
* a transaction. It points to the log item and keeps some * a transaction. It points to the log item and keeps some
@ -181,8 +84,9 @@ int xfs_log_calc_minimum_size(struct xfs_mount *);
#define XFS_TRANS_SYNC 0x08 /* make commit synchronous */ #define XFS_TRANS_SYNC 0x08 /* make commit synchronous */
#define XFS_TRANS_DQ_DIRTY 0x10 /* at least one dquot in trx dirty */ #define XFS_TRANS_DQ_DIRTY 0x10 /* at least one dquot in trx dirty */
#define XFS_TRANS_RESERVE 0x20 /* OK to use reserved data blocks */ #define XFS_TRANS_RESERVE 0x20 /* OK to use reserved data blocks */
#define XFS_TRANS_FREEZE_PROT 0x40 /* Transaction has elevated writer #define XFS_TRANS_NO_WRITECOUNT 0x40 /* do not elevate SB writecount */
count in superblock */ #define XFS_TRANS_NOFS 0x80 /* pass KM_NOFS to kmem_alloc */
/* /*
* Field values for xfs_trans_mod_sb. * Field values for xfs_trans_mod_sb.
*/ */

View File

@ -84,23 +84,71 @@ xfs_find_bdev_for_inode(
} }
/* /*
* We're now finished for good with this ioend structure. * We're now finished for good with this page. Update the page state via the
* Update the page state via the associated buffer_heads, * associated buffer_heads, paying attention to the start and end offsets that
* release holds on the inode and bio, and finally free * we need to process on the page.
* up memory. Do not use the ioend after this. */
static void
xfs_finish_page_writeback(
struct inode *inode,
struct bio_vec *bvec,
int error)
{
unsigned int end = bvec->bv_offset + bvec->bv_len - 1;
struct buffer_head *head, *bh;
unsigned int off = 0;
ASSERT(bvec->bv_offset < PAGE_SIZE);
ASSERT((bvec->bv_offset & ((1 << inode->i_blkbits) - 1)) == 0);
ASSERT(end < PAGE_SIZE);
ASSERT((bvec->bv_len & ((1 << inode->i_blkbits) - 1)) == 0);
bh = head = page_buffers(bvec->bv_page);
do {
if (off < bvec->bv_offset)
goto next_bh;
if (off > end)
break;
bh->b_end_io(bh, !error);
next_bh:
off += bh->b_size;
} while ((bh = bh->b_this_page) != head);
}
/*
* We're now finished for good with this ioend structure. Update the page
* state, release holds on bios, and finally free up memory. Do not use the
* ioend after this.
*/ */
STATIC void STATIC void
xfs_destroy_ioend( xfs_destroy_ioend(
xfs_ioend_t *ioend) struct xfs_ioend *ioend,
int error)
{ {
struct buffer_head *bh, *next; struct inode *inode = ioend->io_inode;
struct bio *last = ioend->io_bio;
struct bio *bio, *next;
for (bh = ioend->io_buffer_head; bh; bh = next) { for (bio = &ioend->io_inline_bio; bio; bio = next) {
next = bh->b_private; struct bio_vec *bvec;
bh->b_end_io(bh, !ioend->io_error); int i;
/*
* For the last bio, bi_private points to the ioend, so we
* need to explicitly end the iteration here.
*/
if (bio == last)
next = NULL;
else
next = bio->bi_private;
/* walk each page on bio, ending page IO on them */
bio_for_each_segment_all(bvec, bio, i)
xfs_finish_page_writeback(inode, bvec, error);
bio_put(bio);
} }
mempool_free(ioend, xfs_ioend_pool);
} }
/* /*
@ -120,13 +168,9 @@ xfs_setfilesize_trans_alloc(
struct xfs_trans *tp; struct xfs_trans *tp;
int error; int error;
tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp);
if (error)
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0);
if (error) {
xfs_trans_cancel(tp);
return error; return error;
}
ioend->io_append_trans = tp; ioend->io_append_trans = tp;
@ -174,7 +218,8 @@ xfs_setfilesize(
STATIC int STATIC int
xfs_setfilesize_ioend( xfs_setfilesize_ioend(
struct xfs_ioend *ioend) struct xfs_ioend *ioend,
int error)
{ {
struct xfs_inode *ip = XFS_I(ioend->io_inode); struct xfs_inode *ip = XFS_I(ioend->io_inode);
struct xfs_trans *tp = ioend->io_append_trans; struct xfs_trans *tp = ioend->io_append_trans;
@ -188,36 +233,14 @@ xfs_setfilesize_ioend(
__sb_writers_acquired(VFS_I(ip)->i_sb, SB_FREEZE_FS); __sb_writers_acquired(VFS_I(ip)->i_sb, SB_FREEZE_FS);
/* we abort the update if there was an IO error */ /* we abort the update if there was an IO error */
if (ioend->io_error) { if (error) {
xfs_trans_cancel(tp); xfs_trans_cancel(tp);
return ioend->io_error; return error;
} }
return xfs_setfilesize(ip, tp, ioend->io_offset, ioend->io_size); return xfs_setfilesize(ip, tp, ioend->io_offset, ioend->io_size);
} }
/*
* Schedule IO completion handling on the final put of an ioend.
*
* If there is no work to do we might as well call it a day and free the
* ioend right now.
*/
STATIC void
xfs_finish_ioend(
struct xfs_ioend *ioend)
{
if (atomic_dec_and_test(&ioend->io_remaining)) {
struct xfs_mount *mp = XFS_I(ioend->io_inode)->i_mount;
if (ioend->io_type == XFS_IO_UNWRITTEN)
queue_work(mp->m_unwritten_workqueue, &ioend->io_work);
else if (ioend->io_append_trans)
queue_work(mp->m_data_workqueue, &ioend->io_work);
else
xfs_destroy_ioend(ioend);
}
}
/* /*
* IO write completion. * IO write completion.
*/ */
@ -225,16 +248,17 @@ STATIC void
xfs_end_io( xfs_end_io(
struct work_struct *work) struct work_struct *work)
{ {
xfs_ioend_t *ioend = container_of(work, xfs_ioend_t, io_work); struct xfs_ioend *ioend =
struct xfs_inode *ip = XFS_I(ioend->io_inode); container_of(work, struct xfs_ioend, io_work);
int error = 0; struct xfs_inode *ip = XFS_I(ioend->io_inode);
int error = ioend->io_bio->bi_error;
/* /*
* Set an error if the mount has shut down and proceed with end I/O * Set an error if the mount has shut down and proceed with end I/O
* processing so it can perform whatever cleanups are necessary. * processing so it can perform whatever cleanups are necessary.
*/ */
if (XFS_FORCED_SHUTDOWN(ip->i_mount)) if (XFS_FORCED_SHUTDOWN(ip->i_mount))
ioend->io_error = -EIO; error = -EIO;
/* /*
* For unwritten extents we need to issue transactions to convert a * For unwritten extents we need to issue transactions to convert a
@ -244,55 +268,33 @@ xfs_end_io(
* on error. * on error.
*/ */
if (ioend->io_type == XFS_IO_UNWRITTEN) { if (ioend->io_type == XFS_IO_UNWRITTEN) {
if (ioend->io_error) if (error)
goto done; goto done;
error = xfs_iomap_write_unwritten(ip, ioend->io_offset, error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
ioend->io_size); ioend->io_size);
} else if (ioend->io_append_trans) { } else if (ioend->io_append_trans) {
error = xfs_setfilesize_ioend(ioend); error = xfs_setfilesize_ioend(ioend, error);
} else { } else {
ASSERT(!xfs_ioend_is_append(ioend)); ASSERT(!xfs_ioend_is_append(ioend));
} }
done: done:
if (error) xfs_destroy_ioend(ioend, error);
ioend->io_error = error;
xfs_destroy_ioend(ioend);
} }
/* STATIC void
* Allocate and initialise an IO completion structure. xfs_end_bio(
* We need to track unwritten extent write completion here initially. struct bio *bio)
* We'll need to extend this for updating the ondisk inode size later
* (vs. incore size).
*/
STATIC xfs_ioend_t *
xfs_alloc_ioend(
struct inode *inode,
unsigned int type)
{ {
xfs_ioend_t *ioend; struct xfs_ioend *ioend = bio->bi_private;
struct xfs_mount *mp = XFS_I(ioend->io_inode)->i_mount;
ioend = mempool_alloc(xfs_ioend_pool, GFP_NOFS); if (ioend->io_type == XFS_IO_UNWRITTEN)
queue_work(mp->m_unwritten_workqueue, &ioend->io_work);
/* else if (ioend->io_append_trans)
* Set the count to 1 initially, which will prevent an I/O queue_work(mp->m_data_workqueue, &ioend->io_work);
* completion callback from happening before we have started else
* all the I/O from calling the completion routine too early. xfs_destroy_ioend(ioend, bio->bi_error);
*/
atomic_set(&ioend->io_remaining, 1);
ioend->io_error = 0;
INIT_LIST_HEAD(&ioend->io_list);
ioend->io_type = type;
ioend->io_inode = inode;
ioend->io_buffer_head = NULL;
ioend->io_buffer_tail = NULL;
ioend->io_offset = 0;
ioend->io_size = 0;
ioend->io_append_trans = NULL;
INIT_WORK(&ioend->io_work, xfs_end_io);
return ioend;
} }
STATIC int STATIC int
@ -364,50 +366,6 @@ xfs_imap_valid(
offset < imap->br_startoff + imap->br_blockcount; offset < imap->br_startoff + imap->br_blockcount;
} }
/*
* BIO completion handler for buffered IO.
*/
STATIC void
xfs_end_bio(
struct bio *bio)
{
xfs_ioend_t *ioend = bio->bi_private;
if (!ioend->io_error)
ioend->io_error = bio->bi_error;
/* Toss bio and pass work off to an xfsdatad thread */
bio->bi_private = NULL;
bio->bi_end_io = NULL;
bio_put(bio);
xfs_finish_ioend(ioend);
}
STATIC void
xfs_submit_ioend_bio(
struct writeback_control *wbc,
xfs_ioend_t *ioend,
struct bio *bio)
{
atomic_inc(&ioend->io_remaining);
bio->bi_private = ioend;
bio->bi_end_io = xfs_end_bio;
submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, bio);
}
STATIC struct bio *
xfs_alloc_ioend_bio(
struct buffer_head *bh)
{
struct bio *bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES);
ASSERT(bio->bi_private == NULL);
bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
bio->bi_bdev = bh->b_bdev;
return bio;
}
STATIC void STATIC void
xfs_start_buffer_writeback( xfs_start_buffer_writeback(
struct buffer_head *bh) struct buffer_head *bh)
@ -452,28 +410,35 @@ static inline int xfs_bio_add_buffer(struct bio *bio, struct buffer_head *bh)
} }
/* /*
* Submit all of the bios for an ioend. We are only passed a single ioend at a * Submit the bio for an ioend. We are passed an ioend with a bio attached to
* time; the caller is responsible for chaining prior to submission. * it, and we submit that bio. The ioend may be used for multiple bio
* submissions, so we only want to allocate an append transaction for the ioend
* once. In the case of multiple bio submission, each bio will take an IO
* reference to the ioend to ensure that the ioend completion is only done once
* all bios have been submitted and the ioend is really done.
* *
* If @fail is non-zero, it means that we have a situation where some part of * If @fail is non-zero, it means that we have a situation where some part of
* the submission process has failed after we have marked paged for writeback * the submission process has failed after we have marked paged for writeback
* and unlocked them. In this situation, we need to fail the ioend chain rather * and unlocked them. In this situation, we need to fail the bio and ioend
* than submit it to IO. This typically only happens on a filesystem shutdown. * rather than submit it to IO. This typically only happens on a filesystem
* shutdown.
*/ */
STATIC int STATIC int
xfs_submit_ioend( xfs_submit_ioend(
struct writeback_control *wbc, struct writeback_control *wbc,
xfs_ioend_t *ioend, struct xfs_ioend *ioend,
int status) int status)
{ {
struct buffer_head *bh;
struct bio *bio;
sector_t lastblock = 0;
/* Reserve log space if we might write beyond the on-disk inode size. */ /* Reserve log space if we might write beyond the on-disk inode size. */
if (!status && if (!status &&
ioend->io_type != XFS_IO_UNWRITTEN && xfs_ioend_is_append(ioend)) ioend->io_type != XFS_IO_UNWRITTEN &&
xfs_ioend_is_append(ioend) &&
!ioend->io_append_trans)
status = xfs_setfilesize_trans_alloc(ioend); status = xfs_setfilesize_trans_alloc(ioend);
ioend->io_bio->bi_private = ioend;
ioend->io_bio->bi_end_io = xfs_end_bio;
/* /*
* If we are failing the IO now, just mark the ioend with an * If we are failing the IO now, just mark the ioend with an
* error and finish it. This will run IO completion immediately * error and finish it. This will run IO completion immediately
@ -481,35 +446,75 @@ xfs_submit_ioend(
* time. * time.
*/ */
if (status) { if (status) {
ioend->io_error = status; ioend->io_bio->bi_error = status;
xfs_finish_ioend(ioend); bio_endio(ioend->io_bio);
return status; return status;
} }
bio = NULL; submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE,
for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) { ioend->io_bio);
if (!bio) {
retry:
bio = xfs_alloc_ioend_bio(bh);
} else if (bh->b_blocknr != lastblock + 1) {
xfs_submit_ioend_bio(wbc, ioend, bio);
goto retry;
}
if (xfs_bio_add_buffer(bio, bh) != bh->b_size) {
xfs_submit_ioend_bio(wbc, ioend, bio);
goto retry;
}
lastblock = bh->b_blocknr;
}
if (bio)
xfs_submit_ioend_bio(wbc, ioend, bio);
xfs_finish_ioend(ioend);
return 0; return 0;
} }
static void
xfs_init_bio_from_bh(
struct bio *bio,
struct buffer_head *bh)
{
bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
bio->bi_bdev = bh->b_bdev;
}
static struct xfs_ioend *
xfs_alloc_ioend(
struct inode *inode,
unsigned int type,
xfs_off_t offset,
struct buffer_head *bh)
{
struct xfs_ioend *ioend;
struct bio *bio;
bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, xfs_ioend_bioset);
xfs_init_bio_from_bh(bio, bh);
ioend = container_of(bio, struct xfs_ioend, io_inline_bio);
INIT_LIST_HEAD(&ioend->io_list);
ioend->io_type = type;
ioend->io_inode = inode;
ioend->io_size = 0;
ioend->io_offset = offset;
INIT_WORK(&ioend->io_work, xfs_end_io);
ioend->io_append_trans = NULL;
ioend->io_bio = bio;
return ioend;
}
/*
* Allocate a new bio, and chain the old bio to the new one.
*
* Note that we have to do perform the chaining in this unintuitive order
* so that the bi_private linkage is set up in the right direction for the
* traversal in xfs_destroy_ioend().
*/
static void
xfs_chain_bio(
struct xfs_ioend *ioend,
struct writeback_control *wbc,
struct buffer_head *bh)
{
struct bio *new;
new = bio_alloc(GFP_NOFS, BIO_MAX_PAGES);
xfs_init_bio_from_bh(new, bh);
bio_chain(ioend->io_bio, new);
bio_get(ioend->io_bio); /* for xfs_destroy_ioend */
submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE,
ioend->io_bio);
ioend->io_bio = new;
}
/* /*
* Test to see if we've been building up a completion structure for * Test to see if we've been building up a completion structure for
* earlier buffers -- if so, we try to append to this ioend if we * earlier buffers -- if so, we try to append to this ioend if we
@ -523,27 +528,24 @@ xfs_add_to_ioend(
struct buffer_head *bh, struct buffer_head *bh,
xfs_off_t offset, xfs_off_t offset,
struct xfs_writepage_ctx *wpc, struct xfs_writepage_ctx *wpc,
struct writeback_control *wbc,
struct list_head *iolist) struct list_head *iolist)
{ {
if (!wpc->ioend || wpc->io_type != wpc->ioend->io_type || if (!wpc->ioend || wpc->io_type != wpc->ioend->io_type ||
bh->b_blocknr != wpc->last_block + 1 || bh->b_blocknr != wpc->last_block + 1 ||
offset != wpc->ioend->io_offset + wpc->ioend->io_size) { offset != wpc->ioend->io_offset + wpc->ioend->io_size) {
struct xfs_ioend *new;
if (wpc->ioend) if (wpc->ioend)
list_add(&wpc->ioend->io_list, iolist); list_add(&wpc->ioend->io_list, iolist);
wpc->ioend = xfs_alloc_ioend(inode, wpc->io_type, offset, bh);
new = xfs_alloc_ioend(inode, wpc->io_type);
new->io_offset = offset;
new->io_buffer_head = bh;
new->io_buffer_tail = bh;
wpc->ioend = new;
} else {
wpc->ioend->io_buffer_tail->b_private = bh;
wpc->ioend->io_buffer_tail = bh;
} }
bh->b_private = NULL; /*
* If the buffer doesn't fit into the bio we need to allocate a new
* one. This shouldn't happen more than once for a given buffer.
*/
while (xfs_bio_add_buffer(wpc->ioend->io_bio, bh) != bh->b_size)
xfs_chain_bio(wpc->ioend, wbc, bh);
wpc->ioend->io_size += bh->b_size; wpc->ioend->io_size += bh->b_size;
wpc->last_block = bh->b_blocknr; wpc->last_block = bh->b_blocknr;
xfs_start_buffer_writeback(bh); xfs_start_buffer_writeback(bh);
@ -803,7 +805,7 @@ xfs_writepage_map(
lock_buffer(bh); lock_buffer(bh);
if (wpc->io_type != XFS_IO_OVERWRITE) if (wpc->io_type != XFS_IO_OVERWRITE)
xfs_map_at_offset(inode, bh, &wpc->imap, offset); xfs_map_at_offset(inode, bh, &wpc->imap, offset);
xfs_add_to_ioend(inode, bh, offset, wpc, &submit_list); xfs_add_to_ioend(inode, bh, offset, wpc, wbc, &submit_list);
count++; count++;
} }
@ -1391,13 +1393,10 @@ xfs_end_io_direct_write(
trace_xfs_end_io_direct_write_append(ip, offset, size); trace_xfs_end_io_direct_write_append(ip, offset, size);
tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0,
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0); &tp);
if (error) { if (!error)
xfs_trans_cancel(tp); error = xfs_setfilesize(ip, tp, offset, size);
return error;
}
error = xfs_setfilesize(ip, tp, offset, size);
} }
return error; return error;

View File

@ -18,7 +18,7 @@
#ifndef __XFS_AOPS_H__ #ifndef __XFS_AOPS_H__
#define __XFS_AOPS_H__ #define __XFS_AOPS_H__
extern mempool_t *xfs_ioend_pool; extern struct bio_set *xfs_ioend_bioset;
/* /*
* Types of I/O for bmap clustering and I/O completion tracking. * Types of I/O for bmap clustering and I/O completion tracking.
@ -37,22 +37,19 @@ enum {
{ XFS_IO_OVERWRITE, "overwrite" } { XFS_IO_OVERWRITE, "overwrite" }
/* /*
* xfs_ioend struct manages large extent writes for XFS. * Structure for buffered I/O completions.
* It can manage several multi-page bio's at once.
*/ */
typedef struct xfs_ioend { struct xfs_ioend {
struct list_head io_list; /* next ioend in chain */ struct list_head io_list; /* next ioend in chain */
unsigned int io_type; /* delalloc / unwritten */ unsigned int io_type; /* delalloc / unwritten */
int io_error; /* I/O error code */
atomic_t io_remaining; /* hold count */
struct inode *io_inode; /* file being written to */ struct inode *io_inode; /* file being written to */
struct buffer_head *io_buffer_head;/* buffer linked list head */
struct buffer_head *io_buffer_tail;/* buffer linked list tail */
size_t io_size; /* size of the extent */ size_t io_size; /* size of the extent */
xfs_off_t io_offset; /* offset in the file */ xfs_off_t io_offset; /* offset in the file */
struct work_struct io_work; /* xfsdatad work queue */ struct work_struct io_work; /* xfsdatad work queue */
struct xfs_trans *io_append_trans;/* xact. for size update */ struct xfs_trans *io_append_trans;/* xact. for size update */
} xfs_ioend_t; struct bio *io_bio; /* bio being built */
struct bio io_inline_bio; /* MUST BE LAST! */
};
extern const struct address_space_operations xfs_address_space_operations; extern const struct address_space_operations xfs_address_space_operations;

View File

@ -112,8 +112,9 @@ typedef struct attrlist_cursor_kern {
*========================================================================*/ *========================================================================*/
/* Return 0 on success, or -errno; other state communicated via *context */
typedef int (*put_listent_func_t)(struct xfs_attr_list_context *, int, typedef int (*put_listent_func_t)(struct xfs_attr_list_context *, int,
unsigned char *, int, int, unsigned char *); unsigned char *, int, int);
typedef struct xfs_attr_list_context { typedef struct xfs_attr_list_context {
struct xfs_inode *dp; /* inode */ struct xfs_inode *dp; /* inode */
@ -126,7 +127,6 @@ typedef struct xfs_attr_list_context {
int firstu; /* first used byte in buffer */ int firstu; /* first used byte in buffer */
int flags; /* from VOP call */ int flags; /* from VOP call */
int resynch; /* T/F: resynch with cursor */ int resynch; /* T/F: resynch with cursor */
int put_value; /* T/F: need value for listent */
put_listent_func_t put_listent; /* list output fmt function */ put_listent_func_t put_listent; /* list output fmt function */
int index; /* index into output buffer */ int index; /* index into output buffer */
} xfs_attr_list_context_t; } xfs_attr_list_context_t;

View File

@ -405,21 +405,11 @@ xfs_attr_inactive(
goto out_destroy_fork; goto out_destroy_fork;
xfs_iunlock(dp, lock_mode); xfs_iunlock(dp, lock_mode);
/*
* Start our first transaction of the day.
*
* All future transactions during this code must be "chained" off
* this one via the trans_dup() call. All transactions will contain
* the inode, and the inode will always be marked with trans_ihold().
* Since the inode will be locked in all transactions, we must log
* the inode in every transaction to let it float upward through
* the log.
*/
lock_mode = 0; lock_mode = 0;
trans = xfs_trans_alloc(mp, XFS_TRANS_ATTRINVAL);
error = xfs_trans_reserve(trans, &M_RES(mp)->tr_attrinval, 0, 0); error = xfs_trans_alloc(mp, &M_RES(mp)->tr_attrinval, 0, 0, 0, &trans);
if (error) if (error)
goto out_cancel; goto out_destroy_fork;
lock_mode = XFS_ILOCK_EXCL; lock_mode = XFS_ILOCK_EXCL;
xfs_ilock(dp, lock_mode); xfs_ilock(dp, lock_mode);

View File

@ -106,18 +106,15 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
sfe->flags, sfe->flags,
sfe->nameval, sfe->nameval,
(int)sfe->namelen, (int)sfe->namelen,
(int)sfe->valuelen, (int)sfe->valuelen);
&sfe->nameval[sfe->namelen]); if (error)
return error;
/* /*
* Either search callback finished early or * Either search callback finished early or
* didn't fit it all in the buffer after all. * didn't fit it all in the buffer after all.
*/ */
if (context->seen_enough) if (context->seen_enough)
break; break;
if (error)
return error;
sfe = XFS_ATTR_SF_NEXTENTRY(sfe); sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
} }
trace_xfs_attr_list_sf_all(context); trace_xfs_attr_list_sf_all(context);
@ -200,8 +197,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
sbp->flags, sbp->flags,
sbp->name, sbp->name,
sbp->namelen, sbp->namelen,
sbp->valuelen, sbp->valuelen);
&sbp->name[sbp->namelen]);
if (error) { if (error) {
kmem_free(sbuf); kmem_free(sbuf);
return error; return error;
@ -416,6 +412,9 @@ xfs_attr3_leaf_list_int(
*/ */
retval = 0; retval = 0;
for (; i < ichdr.count; entry++, i++) { for (; i < ichdr.count; entry++, i++) {
char *name;
int namelen, valuelen;
if (be32_to_cpu(entry->hashval) != cursor->hashval) { if (be32_to_cpu(entry->hashval) != cursor->hashval) {
cursor->hashval = be32_to_cpu(entry->hashval); cursor->hashval = be32_to_cpu(entry->hashval);
cursor->offset = 0; cursor->offset = 0;
@ -425,56 +424,25 @@ xfs_attr3_leaf_list_int(
continue; /* skip incomplete entries */ continue; /* skip incomplete entries */
if (entry->flags & XFS_ATTR_LOCAL) { if (entry->flags & XFS_ATTR_LOCAL) {
xfs_attr_leaf_name_local_t *name_loc = xfs_attr_leaf_name_local_t *name_loc;
xfs_attr3_leaf_name_local(leaf, i);
retval = context->put_listent(context, name_loc = xfs_attr3_leaf_name_local(leaf, i);
entry->flags, name = name_loc->nameval;
name_loc->nameval, namelen = name_loc->namelen;
(int)name_loc->namelen, valuelen = be16_to_cpu(name_loc->valuelen);
be16_to_cpu(name_loc->valuelen),
&name_loc->nameval[name_loc->namelen]);
if (retval)
return retval;
} else { } else {
xfs_attr_leaf_name_remote_t *name_rmt = xfs_attr_leaf_name_remote_t *name_rmt;
xfs_attr3_leaf_name_remote(leaf, i);
int valuelen = be32_to_cpu(name_rmt->valuelen); name_rmt = xfs_attr3_leaf_name_remote(leaf, i);
name = name_rmt->name;
if (context->put_value) { namelen = name_rmt->namelen;
xfs_da_args_t args; valuelen = be32_to_cpu(name_rmt->valuelen);
memset((char *)&args, 0, sizeof(args));
args.geo = context->dp->i_mount->m_attr_geo;
args.dp = context->dp;
args.whichfork = XFS_ATTR_FORK;
args.valuelen = valuelen;
args.rmtvaluelen = valuelen;
args.value = kmem_alloc(valuelen, KM_SLEEP | KM_NOFS);
args.rmtblkno = be32_to_cpu(name_rmt->valueblk);
args.rmtblkcnt = xfs_attr3_rmt_blocks(
args.dp->i_mount, valuelen);
retval = xfs_attr_rmtval_get(&args);
if (!retval)
retval = context->put_listent(context,
entry->flags,
name_rmt->name,
(int)name_rmt->namelen,
valuelen,
args.value);
kmem_free(args.value);
} else {
retval = context->put_listent(context,
entry->flags,
name_rmt->name,
(int)name_rmt->namelen,
valuelen,
NULL);
}
if (retval)
return retval;
} }
retval = context->put_listent(context, entry->flags,
name, namelen, valuelen);
if (retval)
break;
if (context->seen_enough) if (context->seen_enough)
break; break;
cursor->offset++; cursor->offset++;
@ -551,8 +519,7 @@ xfs_attr_put_listent(
int flags, int flags,
unsigned char *name, unsigned char *name,
int namelen, int namelen,
int valuelen, int valuelen)
unsigned char *value)
{ {
struct attrlist *alist = (struct attrlist *)context->alist; struct attrlist *alist = (struct attrlist *)context->alist;
attrlist_ent_t *aep; attrlist_ent_t *aep;
@ -581,7 +548,7 @@ xfs_attr_put_listent(
trace_xfs_attr_list_full(context); trace_xfs_attr_list_full(context);
alist->al_more = 1; alist->al_more = 1;
context->seen_enough = 1; context->seen_enough = 1;
return 1; return 0;
} }
aep = (attrlist_ent_t *)&context->alist[context->firstu]; aep = (attrlist_ent_t *)&context->alist[context->firstu];

View File

@ -900,19 +900,15 @@ xfs_free_eofblocks(
* Free them up now by truncating the file to * Free them up now by truncating the file to
* its current size. * its current size.
*/ */
tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
if (need_iolock) { if (need_iolock) {
if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) { if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL))
xfs_trans_cancel(tp);
return -EAGAIN; return -EAGAIN;
}
} }
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0,
&tp);
if (error) { if (error) {
ASSERT(XFS_FORCED_SHUTDOWN(mp)); ASSERT(XFS_FORCED_SHUTDOWN(mp));
xfs_trans_cancel(tp);
if (need_iolock) if (need_iolock)
xfs_iunlock(ip, XFS_IOLOCK_EXCL); xfs_iunlock(ip, XFS_IOLOCK_EXCL);
return error; return error;
@ -1037,9 +1033,9 @@ xfs_alloc_file_space(
/* /*
* Allocate and setup the transaction. * Allocate and setup the transaction.
*/ */
tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks,
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, resrtextents, 0, &tp);
resblks, resrtextents);
/* /*
* Check for running out of space * Check for running out of space
*/ */
@ -1048,7 +1044,6 @@ xfs_alloc_file_space(
* Free the transaction structure. * Free the transaction structure.
*/ */
ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp)); ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp));
xfs_trans_cancel(tp);
break; break;
} }
xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_ilock(ip, XFS_ILOCK_EXCL);
@ -1311,18 +1306,10 @@ xfs_free_file_space(
* transaction to dip into the reserve blocks to ensure * transaction to dip into the reserve blocks to ensure
* the freeing of the space succeeds at ENOSPC. * the freeing of the space succeeds at ENOSPC.
*/ */
tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0,
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, resblks, 0); &tp);
/*
* check for running out of space
*/
if (error) { if (error) {
/*
* Free the transaction structure.
*/
ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp)); ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp));
xfs_trans_cancel(tp);
break; break;
} }
xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_ilock(ip, XFS_ILOCK_EXCL);
@ -1482,19 +1469,16 @@ xfs_shift_file_space(
} }
while (!error && !done) { while (!error && !done) {
tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
/* /*
* We would need to reserve permanent block for transaction. * We would need to reserve permanent block for transaction.
* This will come into picture when after shifting extent into * This will come into picture when after shifting extent into
* hole we found that adjacent extents can be merged which * hole we found that adjacent extents can be merged which
* may lead to freeing of a block during record update. * may lead to freeing of a block during record update.
*/ */
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write,
XFS_DIOSTRAT_SPACE_RES(mp, 0), 0); XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, 0, &tp);
if (error) { if (error)
xfs_trans_cancel(tp);
break; break;
}
xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_ilock(ip, XFS_ILOCK_EXCL);
error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot, error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot,
@ -1747,12 +1731,9 @@ xfs_swap_extents(
if (error) if (error)
goto out_unlock; goto out_unlock;
tp = xfs_trans_alloc(mp, XFS_TRANS_SWAPEXT); error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0); if (error)
if (error) {
xfs_trans_cancel(tp);
goto out_unlock; goto out_unlock;
}
/* /*
* Lock and join the inodes to the tansaction so that transaction commit * Lock and join the inodes to the tansaction so that transaction commit

View File

@ -1100,22 +1100,18 @@ xfs_bwrite(
return error; return error;
} }
STATIC void static void
xfs_buf_bio_end_io( xfs_buf_bio_end_io(
struct bio *bio) struct bio *bio)
{ {
xfs_buf_t *bp = (xfs_buf_t *)bio->bi_private; struct xfs_buf *bp = (struct xfs_buf *)bio->bi_private;
/* /*
* don't overwrite existing errors - otherwise we can lose errors on * don't overwrite existing errors - otherwise we can lose errors on
* buffers that require multiple bios to complete. * buffers that require multiple bios to complete.
*/ */
if (bio->bi_error) { if (bio->bi_error)
spin_lock(&bp->b_lock); cmpxchg(&bp->b_io_error, 0, bio->bi_error);
if (!bp->b_io_error)
bp->b_io_error = bio->bi_error;
spin_unlock(&bp->b_lock);
}
if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ)) if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp)); invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));

View File

@ -183,6 +183,26 @@ typedef struct xfs_buf {
unsigned int b_page_count; /* size of page array */ unsigned int b_page_count; /* size of page array */
unsigned int b_offset; /* page offset in first page */ unsigned int b_offset; /* page offset in first page */
int b_error; /* error code on I/O */ int b_error; /* error code on I/O */
/*
* async write failure retry count. Initialised to zero on the first
* failure, then when it exceeds the maximum configured without a
* success the write is considered to be failed permanently and the
* iodone handler will take appropriate action.
*
* For retry timeouts, we record the jiffie of the first failure. This
* means that we can change the retry timeout for buffers already under
* I/O and thus avoid getting stuck in a retry loop with a long timeout.
*
* last_error is used to ensure that we are getting repeated errors, not
* different errors. e.g. a block device might change ENOSPC to EIO when
* a failure timeout occurs, so we want to re-initialise the error
* retry behaviour appropriately when that happens.
*/
int b_retries;
unsigned long b_first_retry_time; /* in jiffies */
int b_last_error;
const struct xfs_buf_ops *b_ops; const struct xfs_buf_ops *b_ops;
#ifdef XFS_BUF_LOCK_TRACKING #ifdef XFS_BUF_LOCK_TRACKING

View File

@ -1042,35 +1042,22 @@ xfs_buf_do_callbacks(
} }
} }
/* static bool
* This is the iodone() function for buffers which have had callbacks xfs_buf_iodone_callback_error(
* attached to them by xfs_buf_attach_iodone(). It should remove each
* log item from the buffer's list and call the callback of each in turn.
* When done, the buffer's fsprivate field is set to NULL and the buffer
* is unlocked with a call to iodone().
*/
void
xfs_buf_iodone_callbacks(
struct xfs_buf *bp) struct xfs_buf *bp)
{ {
struct xfs_log_item *lip = bp->b_fspriv; struct xfs_log_item *lip = bp->b_fspriv;
struct xfs_mount *mp = lip->li_mountp; struct xfs_mount *mp = lip->li_mountp;
static ulong lasttime; static ulong lasttime;
static xfs_buftarg_t *lasttarg; static xfs_buftarg_t *lasttarg;
struct xfs_error_cfg *cfg;
if (likely(!bp->b_error))
goto do_callbacks;
/* /*
* If we've already decided to shutdown the filesystem because of * If we've already decided to shutdown the filesystem because of
* I/O errors, there's no point in giving this a retry. * I/O errors, there's no point in giving this a retry.
*/ */
if (XFS_FORCED_SHUTDOWN(mp)) { if (XFS_FORCED_SHUTDOWN(mp))
xfs_buf_stale(bp); goto out_stale;
bp->b_flags |= XBF_DONE;
trace_xfs_buf_item_iodone(bp, _RET_IP_);
goto do_callbacks;
}
if (bp->b_target != lasttarg || if (bp->b_target != lasttarg ||
time_after(jiffies, (lasttime + 5*HZ))) { time_after(jiffies, (lasttime + 5*HZ))) {
@ -1079,45 +1066,93 @@ xfs_buf_iodone_callbacks(
} }
lasttarg = bp->b_target; lasttarg = bp->b_target;
/* synchronous writes will have callers process the error */
if (!(bp->b_flags & XBF_ASYNC))
goto out_stale;
trace_xfs_buf_item_iodone_async(bp, _RET_IP_);
ASSERT(bp->b_iodone != NULL);
/* /*
* If the write was asynchronous then no one will be looking for the * If the write was asynchronous then no one will be looking for the
* error. Clear the error state and write the buffer out again. * error. If this is the first failure of this type, clear the error
* * state and write the buffer out again. This means we always retry an
* XXX: This helps against transient write errors, but we need to find * async write failure at least once, but we also need to set the buffer
* a way to shut the filesystem down if the writes keep failing. * up to behave correctly now for repeated failures.
*
* In practice we'll shut the filesystem down soon as non-transient
* errors tend to affect the whole device and a failing log write
* will make us give up. But we really ought to do better here.
*/ */
if (bp->b_flags & XBF_ASYNC) { if (!(bp->b_flags & (XBF_STALE|XBF_WRITE_FAIL)) ||
ASSERT(bp->b_iodone != NULL); bp->b_last_error != bp->b_error) {
bp->b_flags |= (XBF_WRITE | XBF_ASYNC |
XBF_DONE | XBF_WRITE_FAIL);
bp->b_last_error = bp->b_error;
bp->b_retries = 0;
bp->b_first_retry_time = jiffies;
trace_xfs_buf_item_iodone_async(bp, _RET_IP_); xfs_buf_ioerror(bp, 0);
xfs_buf_submit(bp);
xfs_buf_ioerror(bp, 0); /* errno of 0 unsets the flag */ return true;
if (!(bp->b_flags & (XBF_STALE|XBF_WRITE_FAIL))) {
bp->b_flags |= XBF_WRITE | XBF_ASYNC |
XBF_DONE | XBF_WRITE_FAIL;
xfs_buf_submit(bp);
} else {
xfs_buf_relse(bp);
}
return;
} }
/* /*
* If the write of the buffer was synchronous, we want to make * Repeated failure on an async write. Take action according to the
* sure to return the error to the caller of xfs_bwrite(). * error configuration we have been set up to use.
*/ */
cfg = xfs_error_get_cfg(mp, XFS_ERR_METADATA, bp->b_error);
if (cfg->max_retries != XFS_ERR_RETRY_FOREVER &&
++bp->b_retries > cfg->max_retries)
goto permanent_error;
if (cfg->retry_timeout &&
time_after(jiffies, cfg->retry_timeout + bp->b_first_retry_time))
goto permanent_error;
/* At unmount we may treat errors differently */
if ((mp->m_flags & XFS_MOUNT_UNMOUNTING) && mp->m_fail_unmount)
goto permanent_error;
/* still a transient error, higher layers will retry */
xfs_buf_ioerror(bp, 0);
xfs_buf_relse(bp);
return true;
/*
* Permanent error - we need to trigger a shutdown if we haven't already
* to indicate that inconsistency will result from this action.
*/
permanent_error:
xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
out_stale:
xfs_buf_stale(bp); xfs_buf_stale(bp);
bp->b_flags |= XBF_DONE; bp->b_flags |= XBF_DONE;
trace_xfs_buf_error_relse(bp, _RET_IP_); trace_xfs_buf_error_relse(bp, _RET_IP_);
return false;
}
/*
* This is the iodone() function for buffers which have had callbacks attached
* to them by xfs_buf_attach_iodone(). We need to iterate the items on the
* callback list, mark the buffer as having no more callbacks and then push the
* buffer through IO completion processing.
*/
void
xfs_buf_iodone_callbacks(
struct xfs_buf *bp)
{
/*
* If there is an error, process it. Some errors require us
* to run callbacks after failure processing is done so we
* detect that and take appropriate action.
*/
if (bp->b_error && xfs_buf_iodone_callback_error(bp))
return;
/*
* Successful IO or permanent error. Either way, we can clear the
* retry state here in preparation for the next error that may occur.
*/
bp->b_last_error = 0;
bp->b_retries = 0;
do_callbacks:
xfs_buf_do_callbacks(bp); xfs_buf_do_callbacks(bp);
bp->b_fspriv = NULL; bp->b_fspriv = NULL;
bp->b_iodone = NULL; bp->b_iodone = NULL;

View File

@ -614,11 +614,10 @@ xfs_qm_dqread(
trace_xfs_dqread(dqp); trace_xfs_dqread(dqp);
if (flags & XFS_QMOPT_DQALLOC) { if (flags & XFS_QMOPT_DQALLOC) {
tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC); error = xfs_trans_alloc(mp, &M_RES(mp)->tr_qm_dqalloc,
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_dqalloc, XFS_QM_DQALLOC_SPACE_RES(mp), 0, 0, &tp);
XFS_QM_DQALLOC_SPACE_RES(mp), 0);
if (error) if (error)
goto error1; goto error0;
} }
/* /*
@ -692,7 +691,7 @@ error0:
* end of the chunk, skip ahead to first id in next allocated chunk * end of the chunk, skip ahead to first id in next allocated chunk
* using the SEEK_DATA interface. * using the SEEK_DATA interface.
*/ */
int static int
xfs_dq_get_next_id( xfs_dq_get_next_id(
xfs_mount_t *mp, xfs_mount_t *mp,
uint type, uint type,

View File

@ -145,12 +145,10 @@ xfs_update_prealloc_flags(
struct xfs_trans *tp; struct xfs_trans *tp;
int error; int error;
tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_WRITEID); error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_writeid,
error = xfs_trans_reserve(tp, &M_RES(ip->i_mount)->tr_writeid, 0, 0); 0, 0, 0, &tp);
if (error) { if (error)
xfs_trans_cancel(tp);
return error; return error;
}
xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_ilock(ip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);

View File

@ -198,14 +198,10 @@ xfs_growfs_data_private(
return error; return error;
} }
tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFS); error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growdata,
tp->t_flags |= XFS_TRANS_RESERVE; XFS_GROWFS_SPACE_RES(mp), 0, XFS_TRANS_RESERVE, &tp);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growdata, if (error)
XFS_GROWFS_SPACE_RES(mp), 0);
if (error) {
xfs_trans_cancel(tp);
return error; return error;
}
/* /*
* Write new AG headers to disk. Non-transactional, but written * Write new AG headers to disk. Non-transactional, but written
@ -243,8 +239,8 @@ xfs_growfs_data_private(
agf->agf_roots[XFS_BTNUM_CNTi] = cpu_to_be32(XFS_CNT_BLOCK(mp)); agf->agf_roots[XFS_BTNUM_CNTi] = cpu_to_be32(XFS_CNT_BLOCK(mp));
agf->agf_levels[XFS_BTNUM_BNOi] = cpu_to_be32(1); agf->agf_levels[XFS_BTNUM_BNOi] = cpu_to_be32(1);
agf->agf_levels[XFS_BTNUM_CNTi] = cpu_to_be32(1); agf->agf_levels[XFS_BTNUM_CNTi] = cpu_to_be32(1);
agf->agf_flfirst = 0; agf->agf_flfirst = cpu_to_be32(1);
agf->agf_fllast = cpu_to_be32(XFS_AGFL_SIZE(mp) - 1); agf->agf_fllast = 0;
agf->agf_flcount = 0; agf->agf_flcount = 0;
tmpsize = agsize - XFS_PREALLOC_BLOCKS(mp); tmpsize = agsize - XFS_PREALLOC_BLOCKS(mp);
agf->agf_freeblks = cpu_to_be32(tmpsize); agf->agf_freeblks = cpu_to_be32(tmpsize);

View File

@ -37,9 +37,6 @@
#include <linux/kthread.h> #include <linux/kthread.h>
#include <linux/freezer.h> #include <linux/freezer.h>
STATIC void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp,
struct xfs_perag *pag, struct xfs_inode *ip);
/* /*
* Allocate and initialise an xfs_inode. * Allocate and initialise an xfs_inode.
*/ */
@ -94,13 +91,6 @@ xfs_inode_free_callback(
struct inode *inode = container_of(head, struct inode, i_rcu); struct inode *inode = container_of(head, struct inode, i_rcu);
struct xfs_inode *ip = XFS_I(inode); struct xfs_inode *ip = XFS_I(inode);
kmem_zone_free(xfs_inode_zone, ip);
}
void
xfs_inode_free(
struct xfs_inode *ip)
{
switch (VFS_I(ip)->i_mode & S_IFMT) { switch (VFS_I(ip)->i_mode & S_IFMT) {
case S_IFREG: case S_IFREG:
case S_IFDIR: case S_IFDIR:
@ -118,6 +108,25 @@ xfs_inode_free(
ip->i_itemp = NULL; ip->i_itemp = NULL;
} }
kmem_zone_free(xfs_inode_zone, ip);
}
static void
__xfs_inode_free(
struct xfs_inode *ip)
{
/* asserts to verify all state is correct here */
ASSERT(atomic_read(&ip->i_pincount) == 0);
ASSERT(!xfs_isiflocked(ip));
XFS_STATS_DEC(ip->i_mount, vn_active);
call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback);
}
void
xfs_inode_free(
struct xfs_inode *ip)
{
/* /*
* Because we use RCU freeing we need to ensure the inode always * Because we use RCU freeing we need to ensure the inode always
* appears to be reclaimed with an invalid inode number when in the * appears to be reclaimed with an invalid inode number when in the
@ -129,12 +138,123 @@ xfs_inode_free(
ip->i_ino = 0; ip->i_ino = 0;
spin_unlock(&ip->i_flags_lock); spin_unlock(&ip->i_flags_lock);
/* asserts to verify all state is correct here */ __xfs_inode_free(ip);
ASSERT(atomic_read(&ip->i_pincount) == 0); }
ASSERT(!xfs_isiflocked(ip));
XFS_STATS_DEC(ip->i_mount, vn_active);
call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback); /*
* Queue a new inode reclaim pass if there are reclaimable inodes and there
* isn't a reclaim pass already in progress. By default it runs every 5s based
* on the xfs periodic sync default of 30s. Perhaps this should have it's own
* tunable, but that can be done if this method proves to be ineffective or too
* aggressive.
*/
static void
xfs_reclaim_work_queue(
struct xfs_mount *mp)
{
rcu_read_lock();
if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) {
queue_delayed_work(mp->m_reclaim_workqueue, &mp->m_reclaim_work,
msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10));
}
rcu_read_unlock();
}
/*
* This is a fast pass over the inode cache to try to get reclaim moving on as
* many inodes as possible in a short period of time. It kicks itself every few
* seconds, as well as being kicked by the inode cache shrinker when memory
* goes low. It scans as quickly as possible avoiding locked inodes or those
* already being flushed, and once done schedules a future pass.
*/
void
xfs_reclaim_worker(
struct work_struct *work)
{
struct xfs_mount *mp = container_of(to_delayed_work(work),
struct xfs_mount, m_reclaim_work);
xfs_reclaim_inodes(mp, SYNC_TRYLOCK);
xfs_reclaim_work_queue(mp);
}
static void
xfs_perag_set_reclaim_tag(
struct xfs_perag *pag)
{
struct xfs_mount *mp = pag->pag_mount;
ASSERT(spin_is_locked(&pag->pag_ici_lock));
if (pag->pag_ici_reclaimable++)
return;
/* propagate the reclaim tag up into the perag radix tree */
spin_lock(&mp->m_perag_lock);
radix_tree_tag_set(&mp->m_perag_tree, pag->pag_agno,
XFS_ICI_RECLAIM_TAG);
spin_unlock(&mp->m_perag_lock);
/* schedule periodic background inode reclaim */
xfs_reclaim_work_queue(mp);
trace_xfs_perag_set_reclaim(mp, pag->pag_agno, -1, _RET_IP_);
}
static void
xfs_perag_clear_reclaim_tag(
struct xfs_perag *pag)
{
struct xfs_mount *mp = pag->pag_mount;
ASSERT(spin_is_locked(&pag->pag_ici_lock));
if (--pag->pag_ici_reclaimable)
return;
/* clear the reclaim tag from the perag radix tree */
spin_lock(&mp->m_perag_lock);
radix_tree_tag_clear(&mp->m_perag_tree, pag->pag_agno,
XFS_ICI_RECLAIM_TAG);
spin_unlock(&mp->m_perag_lock);
trace_xfs_perag_clear_reclaim(mp, pag->pag_agno, -1, _RET_IP_);
}
/*
* We set the inode flag atomically with the radix tree tag.
* Once we get tag lookups on the radix tree, this inode flag
* can go away.
*/
void
xfs_inode_set_reclaim_tag(
struct xfs_inode *ip)
{
struct xfs_mount *mp = ip->i_mount;
struct xfs_perag *pag;
pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
spin_lock(&pag->pag_ici_lock);
spin_lock(&ip->i_flags_lock);
radix_tree_tag_set(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ip->i_ino),
XFS_ICI_RECLAIM_TAG);
xfs_perag_set_reclaim_tag(pag);
__xfs_iflags_set(ip, XFS_IRECLAIMABLE);
spin_unlock(&ip->i_flags_lock);
spin_unlock(&pag->pag_ici_lock);
xfs_perag_put(pag);
}
STATIC void
xfs_inode_clear_reclaim_tag(
struct xfs_perag *pag,
xfs_ino_t ino)
{
radix_tree_tag_clear(&pag->pag_ici_root,
XFS_INO_TO_AGINO(pag->pag_mount, ino),
XFS_ICI_RECLAIM_TAG);
xfs_perag_clear_reclaim_tag(pag);
} }
/* /*
@ -264,7 +384,7 @@ xfs_iget_cache_hit(
*/ */
ip->i_flags &= ~XFS_IRECLAIM_RESET_FLAGS; ip->i_flags &= ~XFS_IRECLAIM_RESET_FLAGS;
ip->i_flags |= XFS_INEW; ip->i_flags |= XFS_INEW;
__xfs_inode_clear_reclaim_tag(mp, pag, ip); xfs_inode_clear_reclaim_tag(pag, ip->i_ino);
inode->i_state = I_NEW; inode->i_state = I_NEW;
ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock)); ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
@ -722,121 +842,6 @@ xfs_inode_ag_iterator_tag(
return last_error; return last_error;
} }
/*
* Queue a new inode reclaim pass if there are reclaimable inodes and there
* isn't a reclaim pass already in progress. By default it runs every 5s based
* on the xfs periodic sync default of 30s. Perhaps this should have it's own
* tunable, but that can be done if this method proves to be ineffective or too
* aggressive.
*/
static void
xfs_reclaim_work_queue(
struct xfs_mount *mp)
{
rcu_read_lock();
if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) {
queue_delayed_work(mp->m_reclaim_workqueue, &mp->m_reclaim_work,
msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10));
}
rcu_read_unlock();
}
/*
* This is a fast pass over the inode cache to try to get reclaim moving on as
* many inodes as possible in a short period of time. It kicks itself every few
* seconds, as well as being kicked by the inode cache shrinker when memory
* goes low. It scans as quickly as possible avoiding locked inodes or those
* already being flushed, and once done schedules a future pass.
*/
void
xfs_reclaim_worker(
struct work_struct *work)
{
struct xfs_mount *mp = container_of(to_delayed_work(work),
struct xfs_mount, m_reclaim_work);
xfs_reclaim_inodes(mp, SYNC_TRYLOCK);
xfs_reclaim_work_queue(mp);
}
static void
__xfs_inode_set_reclaim_tag(
struct xfs_perag *pag,
struct xfs_inode *ip)
{
radix_tree_tag_set(&pag->pag_ici_root,
XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino),
XFS_ICI_RECLAIM_TAG);
if (!pag->pag_ici_reclaimable) {
/* propagate the reclaim tag up into the perag radix tree */
spin_lock(&ip->i_mount->m_perag_lock);
radix_tree_tag_set(&ip->i_mount->m_perag_tree,
XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
XFS_ICI_RECLAIM_TAG);
spin_unlock(&ip->i_mount->m_perag_lock);
/* schedule periodic background inode reclaim */
xfs_reclaim_work_queue(ip->i_mount);
trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno,
-1, _RET_IP_);
}
pag->pag_ici_reclaimable++;
}
/*
* We set the inode flag atomically with the radix tree tag.
* Once we get tag lookups on the radix tree, this inode flag
* can go away.
*/
void
xfs_inode_set_reclaim_tag(
xfs_inode_t *ip)
{
struct xfs_mount *mp = ip->i_mount;
struct xfs_perag *pag;
pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
spin_lock(&pag->pag_ici_lock);
spin_lock(&ip->i_flags_lock);
__xfs_inode_set_reclaim_tag(pag, ip);
__xfs_iflags_set(ip, XFS_IRECLAIMABLE);
spin_unlock(&ip->i_flags_lock);
spin_unlock(&pag->pag_ici_lock);
xfs_perag_put(pag);
}
STATIC void
__xfs_inode_clear_reclaim(
xfs_perag_t *pag,
xfs_inode_t *ip)
{
pag->pag_ici_reclaimable--;
if (!pag->pag_ici_reclaimable) {
/* clear the reclaim tag from the perag radix tree */
spin_lock(&ip->i_mount->m_perag_lock);
radix_tree_tag_clear(&ip->i_mount->m_perag_tree,
XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
XFS_ICI_RECLAIM_TAG);
spin_unlock(&ip->i_mount->m_perag_lock);
trace_xfs_perag_clear_reclaim(ip->i_mount, pag->pag_agno,
-1, _RET_IP_);
}
}
STATIC void
__xfs_inode_clear_reclaim_tag(
xfs_mount_t *mp,
xfs_perag_t *pag,
xfs_inode_t *ip)
{
radix_tree_tag_clear(&pag->pag_ici_root,
XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
__xfs_inode_clear_reclaim(pag, ip);
}
/* /*
* Grab the inode for reclaim exclusively. * Grab the inode for reclaim exclusively.
* Return 0 if we grabbed it, non-zero otherwise. * Return 0 if we grabbed it, non-zero otherwise.
@ -929,6 +934,7 @@ xfs_reclaim_inode(
int sync_mode) int sync_mode)
{ {
struct xfs_buf *bp = NULL; struct xfs_buf *bp = NULL;
xfs_ino_t ino = ip->i_ino; /* for radix_tree_delete */
int error; int error;
restart: restart:
@ -993,6 +999,22 @@ restart:
xfs_iflock(ip); xfs_iflock(ip);
reclaim: reclaim:
/*
* Because we use RCU freeing we need to ensure the inode always appears
* to be reclaimed with an invalid inode number when in the free state.
* We do this as early as possible under the ILOCK and flush lock so
* that xfs_iflush_cluster() can be guaranteed to detect races with us
* here. By doing this, we guarantee that once xfs_iflush_cluster has
* locked both the XFS_ILOCK and the flush lock that it will see either
* a valid, flushable inode that will serialise correctly against the
* locks below, or it will see a clean (and invalid) inode that it can
* skip.
*/
spin_lock(&ip->i_flags_lock);
ip->i_flags = XFS_IRECLAIM;
ip->i_ino = 0;
spin_unlock(&ip->i_flags_lock);
xfs_ifunlock(ip); xfs_ifunlock(ip);
xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_iunlock(ip, XFS_ILOCK_EXCL);
@ -1006,9 +1028,9 @@ reclaim:
*/ */
spin_lock(&pag->pag_ici_lock); spin_lock(&pag->pag_ici_lock);
if (!radix_tree_delete(&pag->pag_ici_root, if (!radix_tree_delete(&pag->pag_ici_root,
XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino))) XFS_INO_TO_AGINO(ip->i_mount, ino)))
ASSERT(0); ASSERT(0);
__xfs_inode_clear_reclaim(pag, ip); xfs_perag_clear_reclaim_tag(pag);
spin_unlock(&pag->pag_ici_lock); spin_unlock(&pag->pag_ici_lock);
/* /*
@ -1023,7 +1045,7 @@ reclaim:
xfs_qm_dqdetach(ip); xfs_qm_dqdetach(ip);
xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_iunlock(ip, XFS_ILOCK_EXCL);
xfs_inode_free(ip); __xfs_inode_free(ip);
return error; return error;
out_ifunlock: out_ifunlock:

View File

@ -1030,7 +1030,7 @@ xfs_dir_ialloc(
tp->t_flags &= ~(XFS_TRANS_DQ_DIRTY); tp->t_flags &= ~(XFS_TRANS_DQ_DIRTY);
} }
code = xfs_trans_roll(&tp, 0); code = xfs_trans_roll(&tp, NULL);
if (committed != NULL) if (committed != NULL)
*committed = 1; *committed = 1;
@ -1161,11 +1161,9 @@ xfs_create(
rdev = 0; rdev = 0;
resblks = XFS_MKDIR_SPACE_RES(mp, name->len); resblks = XFS_MKDIR_SPACE_RES(mp, name->len);
tres = &M_RES(mp)->tr_mkdir; tres = &M_RES(mp)->tr_mkdir;
tp = xfs_trans_alloc(mp, XFS_TRANS_MKDIR);
} else { } else {
resblks = XFS_CREATE_SPACE_RES(mp, name->len); resblks = XFS_CREATE_SPACE_RES(mp, name->len);
tres = &M_RES(mp)->tr_create; tres = &M_RES(mp)->tr_create;
tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE);
} }
/* /*
@ -1174,20 +1172,19 @@ xfs_create(
* the case we'll drop the one we have and get a more * the case we'll drop the one we have and get a more
* appropriate transaction later. * appropriate transaction later.
*/ */
error = xfs_trans_reserve(tp, tres, resblks, 0); error = xfs_trans_alloc(mp, tres, resblks, 0, 0, &tp);
if (error == -ENOSPC) { if (error == -ENOSPC) {
/* flush outstanding delalloc blocks and retry */ /* flush outstanding delalloc blocks and retry */
xfs_flush_inodes(mp); xfs_flush_inodes(mp);
error = xfs_trans_reserve(tp, tres, resblks, 0); error = xfs_trans_alloc(mp, tres, resblks, 0, 0, &tp);
} }
if (error == -ENOSPC) { if (error == -ENOSPC) {
/* No space at all so try a "no-allocation" reservation */ /* No space at all so try a "no-allocation" reservation */
resblks = 0; resblks = 0;
error = xfs_trans_reserve(tp, tres, 0, 0); error = xfs_trans_alloc(mp, tres, 0, 0, 0, &tp);
} }
if (error) if (error)
goto out_trans_cancel; goto out_release_inode;
xfs_ilock(dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL | xfs_ilock(dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL |
XFS_IOLOCK_PARENT | XFS_ILOCK_PARENT); XFS_IOLOCK_PARENT | XFS_ILOCK_PARENT);
@ -1337,17 +1334,16 @@ xfs_create_tmpfile(
return error; return error;
resblks = XFS_IALLOC_SPACE_RES(mp); resblks = XFS_IALLOC_SPACE_RES(mp);
tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE_TMPFILE);
tres = &M_RES(mp)->tr_create_tmpfile; tres = &M_RES(mp)->tr_create_tmpfile;
error = xfs_trans_reserve(tp, tres, resblks, 0);
error = xfs_trans_alloc(mp, tres, resblks, 0, 0, &tp);
if (error == -ENOSPC) { if (error == -ENOSPC) {
/* No space at all so try a "no-allocation" reservation */ /* No space at all so try a "no-allocation" reservation */
resblks = 0; resblks = 0;
error = xfs_trans_reserve(tp, tres, 0, 0); error = xfs_trans_alloc(mp, tres, 0, 0, 0, &tp);
} }
if (error) if (error)
goto out_trans_cancel; goto out_release_inode;
error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp, error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp,
pdqp, resblks, 1, 0); pdqp, resblks, 1, 0);
@ -1432,15 +1428,14 @@ xfs_link(
if (error) if (error)
goto std_return; goto std_return;
tp = xfs_trans_alloc(mp, XFS_TRANS_LINK);
resblks = XFS_LINK_SPACE_RES(mp, target_name->len); resblks = XFS_LINK_SPACE_RES(mp, target_name->len);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_link, resblks, 0); error = xfs_trans_alloc(mp, &M_RES(mp)->tr_link, resblks, 0, 0, &tp);
if (error == -ENOSPC) { if (error == -ENOSPC) {
resblks = 0; resblks = 0;
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_link, 0, 0); error = xfs_trans_alloc(mp, &M_RES(mp)->tr_link, 0, 0, 0, &tp);
} }
if (error) if (error)
goto error_return; goto std_return;
xfs_ilock(tdp, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT); xfs_ilock(tdp, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT);
xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL); xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL);
@ -1710,11 +1705,9 @@ xfs_inactive_truncate(
struct xfs_trans *tp; struct xfs_trans *tp;
int error; int error;
tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
if (error) { if (error) {
ASSERT(XFS_FORCED_SHUTDOWN(mp)); ASSERT(XFS_FORCED_SHUTDOWN(mp));
xfs_trans_cancel(tp);
return error; return error;
} }
@ -1764,8 +1757,6 @@ xfs_inactive_ifree(
struct xfs_trans *tp; struct xfs_trans *tp;
int error; int error;
tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
/* /*
* The ifree transaction might need to allocate blocks for record * The ifree transaction might need to allocate blocks for record
* insertion to the finobt. We don't want to fail here at ENOSPC, so * insertion to the finobt. We don't want to fail here at ENOSPC, so
@ -1781,9 +1772,8 @@ xfs_inactive_ifree(
* now remains allocated and sits on the unlinked list until the fs is * now remains allocated and sits on the unlinked list until the fs is
* repaired. * repaired.
*/ */
tp->t_flags |= XFS_TRANS_RESERVE; error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ifree,
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ifree, XFS_IFREE_SPACE_RES(mp), 0, XFS_TRANS_RESERVE, &tp);
XFS_IFREE_SPACE_RES(mp), 0);
if (error) { if (error) {
if (error == -ENOSPC) { if (error == -ENOSPC) {
xfs_warn_ratelimited(mp, xfs_warn_ratelimited(mp,
@ -1792,7 +1782,6 @@ xfs_inactive_ifree(
} else { } else {
ASSERT(XFS_FORCED_SHUTDOWN(mp)); ASSERT(XFS_FORCED_SHUTDOWN(mp));
} }
xfs_trans_cancel(tp);
return error; return error;
} }
@ -2525,11 +2514,6 @@ xfs_remove(
if (error) if (error)
goto std_return; goto std_return;
if (is_dir)
tp = xfs_trans_alloc(mp, XFS_TRANS_RMDIR);
else
tp = xfs_trans_alloc(mp, XFS_TRANS_REMOVE);
/* /*
* We try to get the real space reservation first, * We try to get the real space reservation first,
* allowing for directory btree deletion(s) implying * allowing for directory btree deletion(s) implying
@ -2540,14 +2524,15 @@ xfs_remove(
* block from the directory. * block from the directory.
*/ */
resblks = XFS_REMOVE_SPACE_RES(mp); resblks = XFS_REMOVE_SPACE_RES(mp);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_remove, resblks, 0); error = xfs_trans_alloc(mp, &M_RES(mp)->tr_remove, resblks, 0, 0, &tp);
if (error == -ENOSPC) { if (error == -ENOSPC) {
resblks = 0; resblks = 0;
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_remove, 0, 0); error = xfs_trans_alloc(mp, &M_RES(mp)->tr_remove, 0, 0, 0,
&tp);
} }
if (error) { if (error) {
ASSERT(error != -ENOSPC); ASSERT(error != -ENOSPC);
goto out_trans_cancel; goto std_return;
} }
xfs_ilock(dp, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT); xfs_ilock(dp, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT);
@ -2855,6 +2840,7 @@ xfs_rename_alloc_whiteout(
* and flag it as linkable. * and flag it as linkable.
*/ */
drop_nlink(VFS_I(tmpfile)); drop_nlink(VFS_I(tmpfile));
xfs_setup_iops(tmpfile);
xfs_finish_inode_setup(tmpfile); xfs_finish_inode_setup(tmpfile);
VFS_I(tmpfile)->i_state |= I_LINKABLE; VFS_I(tmpfile)->i_state |= I_LINKABLE;
@ -2910,15 +2896,15 @@ xfs_rename(
xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip, wip, xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip, wip,
inodes, &num_inodes); inodes, &num_inodes);
tp = xfs_trans_alloc(mp, XFS_TRANS_RENAME);
spaceres = XFS_RENAME_SPACE_RES(mp, target_name->len); spaceres = XFS_RENAME_SPACE_RES(mp, target_name->len);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_rename, spaceres, 0); error = xfs_trans_alloc(mp, &M_RES(mp)->tr_rename, spaceres, 0, 0, &tp);
if (error == -ENOSPC) { if (error == -ENOSPC) {
spaceres = 0; spaceres = 0;
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_rename, 0, 0); error = xfs_trans_alloc(mp, &M_RES(mp)->tr_rename, 0, 0, 0,
&tp);
} }
if (error) if (error)
goto out_trans_cancel; goto out_release_wip;
/* /*
* Attach the dquots to the inodes * Attach the dquots to the inodes
@ -3155,6 +3141,7 @@ out_bmap_cancel:
xfs_bmap_cancel(&free_list); xfs_bmap_cancel(&free_list);
out_trans_cancel: out_trans_cancel:
xfs_trans_cancel(tp); xfs_trans_cancel(tp);
out_release_wip:
if (wip) if (wip)
IRELE(wip); IRELE(wip);
return error; return error;
@ -3162,16 +3149,16 @@ out_trans_cancel:
STATIC int STATIC int
xfs_iflush_cluster( xfs_iflush_cluster(
xfs_inode_t *ip, struct xfs_inode *ip,
xfs_buf_t *bp) struct xfs_buf *bp)
{ {
xfs_mount_t *mp = ip->i_mount; struct xfs_mount *mp = ip->i_mount;
struct xfs_perag *pag; struct xfs_perag *pag;
unsigned long first_index, mask; unsigned long first_index, mask;
unsigned long inodes_per_cluster; unsigned long inodes_per_cluster;
int ilist_size; int cilist_size;
xfs_inode_t **ilist; struct xfs_inode **cilist;
xfs_inode_t *iq; struct xfs_inode *cip;
int nr_found; int nr_found;
int clcount = 0; int clcount = 0;
int bufwasdelwri; int bufwasdelwri;
@ -3180,23 +3167,23 @@ xfs_iflush_cluster(
pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
inodes_per_cluster = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog; inodes_per_cluster = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog;
ilist_size = inodes_per_cluster * sizeof(xfs_inode_t *); cilist_size = inodes_per_cluster * sizeof(xfs_inode_t *);
ilist = kmem_alloc(ilist_size, KM_MAYFAIL|KM_NOFS); cilist = kmem_alloc(cilist_size, KM_MAYFAIL|KM_NOFS);
if (!ilist) if (!cilist)
goto out_put; goto out_put;
mask = ~(((mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog)) - 1); mask = ~(((mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog)) - 1);
first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask; first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask;
rcu_read_lock(); rcu_read_lock();
/* really need a gang lookup range call here */ /* really need a gang lookup range call here */
nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist, nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)cilist,
first_index, inodes_per_cluster); first_index, inodes_per_cluster);
if (nr_found == 0) if (nr_found == 0)
goto out_free; goto out_free;
for (i = 0; i < nr_found; i++) { for (i = 0; i < nr_found; i++) {
iq = ilist[i]; cip = cilist[i];
if (iq == ip) if (cip == ip)
continue; continue;
/* /*
@ -3205,20 +3192,30 @@ xfs_iflush_cluster(
* We need to check under the i_flags_lock for a valid inode * We need to check under the i_flags_lock for a valid inode
* here. Skip it if it is not valid or the wrong inode. * here. Skip it if it is not valid or the wrong inode.
*/ */
spin_lock(&ip->i_flags_lock); spin_lock(&cip->i_flags_lock);
if (!ip->i_ino || if (!cip->i_ino ||
(XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index) { __xfs_iflags_test(cip, XFS_ISTALE)) {
spin_unlock(&ip->i_flags_lock); spin_unlock(&cip->i_flags_lock);
continue; continue;
} }
spin_unlock(&ip->i_flags_lock);
/*
* Once we fall off the end of the cluster, no point checking
* any more inodes in the list because they will also all be
* outside the cluster.
*/
if ((XFS_INO_TO_AGINO(mp, cip->i_ino) & mask) != first_index) {
spin_unlock(&cip->i_flags_lock);
break;
}
spin_unlock(&cip->i_flags_lock);
/* /*
* Do an un-protected check to see if the inode is dirty and * Do an un-protected check to see if the inode is dirty and
* is a candidate for flushing. These checks will be repeated * is a candidate for flushing. These checks will be repeated
* later after the appropriate locks are acquired. * later after the appropriate locks are acquired.
*/ */
if (xfs_inode_clean(iq) && xfs_ipincount(iq) == 0) if (xfs_inode_clean(cip) && xfs_ipincount(cip) == 0)
continue; continue;
/* /*
@ -3226,15 +3223,28 @@ xfs_iflush_cluster(
* then this inode cannot be flushed and is skipped. * then this inode cannot be flushed and is skipped.
*/ */
if (!xfs_ilock_nowait(iq, XFS_ILOCK_SHARED)) if (!xfs_ilock_nowait(cip, XFS_ILOCK_SHARED))
continue; continue;
if (!xfs_iflock_nowait(iq)) { if (!xfs_iflock_nowait(cip)) {
xfs_iunlock(iq, XFS_ILOCK_SHARED); xfs_iunlock(cip, XFS_ILOCK_SHARED);
continue; continue;
} }
if (xfs_ipincount(iq)) { if (xfs_ipincount(cip)) {
xfs_ifunlock(iq); xfs_ifunlock(cip);
xfs_iunlock(iq, XFS_ILOCK_SHARED); xfs_iunlock(cip, XFS_ILOCK_SHARED);
continue;
}
/*
* Check the inode number again, just to be certain we are not
* racing with freeing in xfs_reclaim_inode(). See the comments
* in that function for more information as to why the initial
* check is not sufficient.
*/
if (!cip->i_ino) {
xfs_ifunlock(cip);
xfs_iunlock(cip, XFS_ILOCK_SHARED);
continue; continue;
} }
@ -3242,18 +3252,18 @@ xfs_iflush_cluster(
* arriving here means that this inode can be flushed. First * arriving here means that this inode can be flushed. First
* re-check that it's dirty before flushing. * re-check that it's dirty before flushing.
*/ */
if (!xfs_inode_clean(iq)) { if (!xfs_inode_clean(cip)) {
int error; int error;
error = xfs_iflush_int(iq, bp); error = xfs_iflush_int(cip, bp);
if (error) { if (error) {
xfs_iunlock(iq, XFS_ILOCK_SHARED); xfs_iunlock(cip, XFS_ILOCK_SHARED);
goto cluster_corrupt_out; goto cluster_corrupt_out;
} }
clcount++; clcount++;
} else { } else {
xfs_ifunlock(iq); xfs_ifunlock(cip);
} }
xfs_iunlock(iq, XFS_ILOCK_SHARED); xfs_iunlock(cip, XFS_ILOCK_SHARED);
} }
if (clcount) { if (clcount) {
@ -3263,7 +3273,7 @@ xfs_iflush_cluster(
out_free: out_free:
rcu_read_unlock(); rcu_read_unlock();
kmem_free(ilist); kmem_free(cilist);
out_put: out_put:
xfs_perag_put(pag); xfs_perag_put(pag);
return 0; return 0;
@ -3306,8 +3316,8 @@ cluster_corrupt_out:
/* /*
* Unlocks the flush lock * Unlocks the flush lock
*/ */
xfs_iflush_abort(iq, false); xfs_iflush_abort(cip, false);
kmem_free(ilist); kmem_free(cilist);
xfs_perag_put(pag); xfs_perag_put(pag);
return -EFSCORRUPTED; return -EFSCORRUPTED;
} }
@ -3327,7 +3337,7 @@ xfs_iflush(
struct xfs_buf **bpp) struct xfs_buf **bpp)
{ {
struct xfs_mount *mp = ip->i_mount; struct xfs_mount *mp = ip->i_mount;
struct xfs_buf *bp; struct xfs_buf *bp = NULL;
struct xfs_dinode *dip; struct xfs_dinode *dip;
int error; int error;
@ -3369,14 +3379,22 @@ xfs_iflush(
} }
/* /*
* Get the buffer containing the on-disk inode. * Get the buffer containing the on-disk inode. We are doing a try-lock
* operation here, so we may get an EAGAIN error. In that case, we
* simply want to return with the inode still dirty.
*
* If we get any other error, we effectively have a corruption situation
* and we cannot flush the inode, so we treat it the same as failing
* xfs_iflush_int().
*/ */
error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &dip, &bp, XBF_TRYLOCK, error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &dip, &bp, XBF_TRYLOCK,
0); 0);
if (error || !bp) { if (error == -EAGAIN) {
xfs_ifunlock(ip); xfs_ifunlock(ip);
return error; return error;
} }
if (error)
goto corrupt_out;
/* /*
* First flush out the inode that xfs_iflush was called with. * First flush out the inode that xfs_iflush was called with.
@ -3404,7 +3422,8 @@ xfs_iflush(
return 0; return 0;
corrupt_out: corrupt_out:
xfs_buf_relse(bp); if (bp)
xfs_buf_relse(bp);
xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
cluster_corrupt_out: cluster_corrupt_out:
error = -EFSCORRUPTED; error = -EFSCORRUPTED;

View File

@ -440,6 +440,9 @@ loff_t __xfs_seek_hole_data(struct inode *inode, loff_t start,
/* from xfs_iops.c */ /* from xfs_iops.c */
extern void xfs_setup_inode(struct xfs_inode *ip);
extern void xfs_setup_iops(struct xfs_inode *ip);
/* /*
* When setting up a newly allocated inode, we need to call * When setting up a newly allocated inode, we need to call
* xfs_finish_inode_setup() once the inode is fully instantiated at * xfs_finish_inode_setup() once the inode is fully instantiated at
@ -447,7 +450,6 @@ loff_t __xfs_seek_hole_data(struct inode *inode, loff_t start,
* before we've completed instantiation. Otherwise we can do it * before we've completed instantiation. Otherwise we can do it
* the moment the inode lookup is complete. * the moment the inode lookup is complete.
*/ */
extern void xfs_setup_inode(struct xfs_inode *ip);
static inline void xfs_finish_inode_setup(struct xfs_inode *ip) static inline void xfs_finish_inode_setup(struct xfs_inode *ip)
{ {
xfs_iflags_clear(ip, XFS_INEW); xfs_iflags_clear(ip, XFS_INEW);
@ -458,6 +460,7 @@ static inline void xfs_finish_inode_setup(struct xfs_inode *ip)
static inline void xfs_setup_existing_inode(struct xfs_inode *ip) static inline void xfs_setup_existing_inode(struct xfs_inode *ip)
{ {
xfs_setup_inode(ip); xfs_setup_inode(ip);
xfs_setup_iops(ip);
xfs_finish_inode_setup(ip); xfs_finish_inode_setup(ip);
} }

View File

@ -210,7 +210,7 @@ xfs_inode_item_format_data_fork(
*/ */
data_bytes = roundup(ip->i_df.if_bytes, 4); data_bytes = roundup(ip->i_df.if_bytes, 4);
ASSERT(ip->i_df.if_real_bytes == 0 || ASSERT(ip->i_df.if_real_bytes == 0 ||
ip->i_df.if_real_bytes == data_bytes); ip->i_df.if_real_bytes >= data_bytes);
ASSERT(ip->i_df.if_u1.if_data != NULL); ASSERT(ip->i_df.if_u1.if_data != NULL);
ASSERT(ip->i_d.di_size > 0); ASSERT(ip->i_d.di_size > 0);
xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_ILOCAL, xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_ILOCAL,
@ -305,7 +305,7 @@ xfs_inode_item_format_attr_fork(
*/ */
data_bytes = roundup(ip->i_afp->if_bytes, 4); data_bytes = roundup(ip->i_afp->if_bytes, 4);
ASSERT(ip->i_afp->if_real_bytes == 0 || ASSERT(ip->i_afp->if_real_bytes == 0 ||
ip->i_afp->if_real_bytes == data_bytes); ip->i_afp->if_real_bytes >= data_bytes);
ASSERT(ip->i_afp->if_u1.if_data != NULL); ASSERT(ip->i_afp->if_u1.if_data != NULL);
xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_LOCAL, xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_LOCAL,
ip->i_afp->if_u1.if_data, ip->i_afp->if_u1.if_data,
@ -479,6 +479,8 @@ STATIC uint
xfs_inode_item_push( xfs_inode_item_push(
struct xfs_log_item *lip, struct xfs_log_item *lip,
struct list_head *buffer_list) struct list_head *buffer_list)
__releases(&lip->li_ailp->xa_lock)
__acquires(&lip->li_ailp->xa_lock)
{ {
struct xfs_inode_log_item *iip = INODE_ITEM(lip); struct xfs_inode_log_item *iip = INODE_ITEM(lip);
struct xfs_inode *ip = iip->ili_inode; struct xfs_inode *ip = iip->ili_inode;

View File

@ -277,7 +277,6 @@ xfs_readlink_by_handle(
{ {
struct dentry *dentry; struct dentry *dentry;
__u32 olen; __u32 olen;
void *link;
int error; int error;
if (!capable(CAP_SYS_ADMIN)) if (!capable(CAP_SYS_ADMIN))
@ -288,7 +287,7 @@ xfs_readlink_by_handle(
return PTR_ERR(dentry); return PTR_ERR(dentry);
/* Restrict this handle operation to symlinks only. */ /* Restrict this handle operation to symlinks only. */
if (!d_is_symlink(dentry)) { if (!d_inode(dentry)->i_op->readlink) {
error = -EINVAL; error = -EINVAL;
goto out_dput; goto out_dput;
} }
@ -298,21 +297,8 @@ xfs_readlink_by_handle(
goto out_dput; goto out_dput;
} }
link = kmalloc(MAXPATHLEN+1, GFP_KERNEL); error = d_inode(dentry)->i_op->readlink(dentry, hreq->ohandle, olen);
if (!link) {
error = -ENOMEM;
goto out_dput;
}
error = xfs_readlink(XFS_I(d_inode(dentry)), link);
if (error)
goto out_kfree;
error = readlink_copy(hreq->ohandle, olen, link);
if (error)
goto out_kfree;
out_kfree:
kfree(link);
out_dput: out_dput:
dput(dentry); dput(dentry);
return error; return error;
@ -334,12 +320,10 @@ xfs_set_dmattrs(
if (XFS_FORCED_SHUTDOWN(mp)) if (XFS_FORCED_SHUTDOWN(mp))
return -EIO; return -EIO;
tp = xfs_trans_alloc(mp, XFS_TRANS_SET_DMATTRS); error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0); if (error)
if (error) {
xfs_trans_cancel(tp);
return error; return error;
}
xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_ilock(ip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
@ -1141,10 +1125,9 @@ xfs_ioctl_setattr_get_trans(
if (XFS_FORCED_SHUTDOWN(mp)) if (XFS_FORCED_SHUTDOWN(mp))
goto out_unlock; goto out_unlock;
tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0);
if (error) if (error)
goto out_cancel; return ERR_PTR(error);
xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_ilock(ip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | join_flags); xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | join_flags);

View File

@ -132,6 +132,7 @@ xfs_iomap_write_direct(
int error; int error;
int lockmode; int lockmode;
int bmapi_flags = XFS_BMAPI_PREALLOC; int bmapi_flags = XFS_BMAPI_PREALLOC;
uint tflags = 0;
rt = XFS_IS_REALTIME_INODE(ip); rt = XFS_IS_REALTIME_INODE(ip);
extsz = xfs_get_extsz_hint(ip); extsz = xfs_get_extsz_hint(ip);
@ -191,11 +192,6 @@ xfs_iomap_write_direct(
if (error) if (error)
return error; return error;
/*
* Allocate and setup the transaction
*/
tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
/* /*
* For DAX, we do not allocate unwritten extents, but instead we zero * For DAX, we do not allocate unwritten extents, but instead we zero
* the block before we commit the transaction. Ideally we'd like to do * the block before we commit the transaction. Ideally we'd like to do
@ -209,23 +205,17 @@ xfs_iomap_write_direct(
* the reserve block pool for bmbt block allocation if there is no space * the reserve block pool for bmbt block allocation if there is no space
* left but we need to do unwritten extent conversion. * left but we need to do unwritten extent conversion.
*/ */
if (IS_DAX(VFS_I(ip))) { if (IS_DAX(VFS_I(ip))) {
bmapi_flags = XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO; bmapi_flags = XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO;
if (ISUNWRITTEN(imap)) { if (ISUNWRITTEN(imap)) {
tp->t_flags |= XFS_TRANS_RESERVE; tflags |= XFS_TRANS_RESERVE;
resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1; resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1;
} }
} }
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, resrtextents,
resblks, resrtextents); tflags, &tp);
/* if (error)
* Check for running out of space, note: need lock to return
*/
if (error) {
xfs_trans_cancel(tp);
return error; return error;
}
lockmode = XFS_ILOCK_EXCL; lockmode = XFS_ILOCK_EXCL;
xfs_ilock(ip, lockmode); xfs_ilock(ip, lockmode);
@ -726,15 +716,13 @@ xfs_iomap_write_allocate(
nimaps = 0; nimaps = 0;
while (nimaps == 0) { while (nimaps == 0) {
tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE);
tp->t_flags |= XFS_TRANS_RESERVE;
nres = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK); nres = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
nres, 0); error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, nres,
if (error) { 0, XFS_TRANS_RESERVE, &tp);
xfs_trans_cancel(tp); if (error)
return error; return error;
}
xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_ilock(ip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip, 0); xfs_trans_ijoin(tp, ip, 0);
@ -878,25 +866,18 @@ xfs_iomap_write_unwritten(
do { do {
/* /*
* set up a transaction to convert the range of extents * Set up a transaction to convert the range of extents
* from unwritten to real. Do allocations in a loop until * from unwritten to real. Do allocations in a loop until
* we have covered the range passed in. * we have covered the range passed in.
* *
* Note that we open code the transaction allocation here * Note that we can't risk to recursing back into the filesystem
* to pass KM_NOFS--we can't risk to recursing back into * here as we might be asked to write out the same inode that we
* the filesystem here as we might be asked to write out * complete here and might deadlock on the iolock.
* the same inode that we complete here and might deadlock
* on the iolock.
*/ */
sb_start_intwrite(mp->m_super); error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0,
tp = _xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE, KM_NOFS); XFS_TRANS_RESERVE | XFS_TRANS_NOFS, &tp);
tp->t_flags |= XFS_TRANS_RESERVE | XFS_TRANS_FREEZE_PROT; if (error)
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
resblks, 0);
if (error) {
xfs_trans_cancel(tp);
return error; return error;
}
xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_ilock(ip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip, 0); xfs_trans_ijoin(tp, ip, 0);

View File

@ -181,6 +181,8 @@ xfs_generic_create(
} }
#endif #endif
xfs_setup_iops(ip);
if (tmpfile) if (tmpfile)
d_tmpfile(dentry, inode); d_tmpfile(dentry, inode);
else else
@ -368,6 +370,8 @@ xfs_vn_symlink(
if (unlikely(error)) if (unlikely(error))
goto out_cleanup_inode; goto out_cleanup_inode;
xfs_setup_iops(cip);
d_instantiate(dentry, inode); d_instantiate(dentry, inode);
xfs_finish_inode_setup(cip); xfs_finish_inode_setup(cip);
return 0; return 0;
@ -442,6 +446,16 @@ xfs_vn_get_link(
return ERR_PTR(error); return ERR_PTR(error);
} }
STATIC const char *
xfs_vn_get_link_inline(
struct dentry *dentry,
struct inode *inode,
struct delayed_call *done)
{
ASSERT(XFS_I(inode)->i_df.if_flags & XFS_IFINLINE);
return XFS_I(inode)->i_df.if_u1.if_data;
}
STATIC int STATIC int
xfs_vn_getattr( xfs_vn_getattr(
struct vfsmount *mnt, struct vfsmount *mnt,
@ -599,12 +613,12 @@ xfs_setattr_nonsize(
return error; return error;
} }
tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0);
if (error) if (error)
goto out_trans_cancel; goto out_dqrele;
xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_ilock(ip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip, 0);
/* /*
* Change file ownership. Must be the owner or privileged. * Change file ownership. Must be the owner or privileged.
@ -633,12 +647,10 @@ xfs_setattr_nonsize(
NULL, capable(CAP_FOWNER) ? NULL, capable(CAP_FOWNER) ?
XFS_QMOPT_FORCE_RES : 0); XFS_QMOPT_FORCE_RES : 0);
if (error) /* out of quota */ if (error) /* out of quota */
goto out_unlock; goto out_cancel;
} }
} }
xfs_trans_ijoin(tp, ip, 0);
/* /*
* Change file ownership. Must be the owner or privileged. * Change file ownership. Must be the owner or privileged.
*/ */
@ -722,10 +734,9 @@ xfs_setattr_nonsize(
return 0; return 0;
out_unlock: out_cancel:
xfs_iunlock(ip, XFS_ILOCK_EXCL);
out_trans_cancel:
xfs_trans_cancel(tp); xfs_trans_cancel(tp);
out_dqrele:
xfs_qm_dqrele(udqp); xfs_qm_dqrele(udqp);
xfs_qm_dqrele(gdqp); xfs_qm_dqrele(gdqp);
return error; return error;
@ -834,7 +845,7 @@ xfs_setattr_size(
* We have to do all the page cache truncate work outside the * We have to do all the page cache truncate work outside the
* transaction context as the "lock" order is page lock->log space * transaction context as the "lock" order is page lock->log space
* reservation as defined by extent allocation in the writeback path. * reservation as defined by extent allocation in the writeback path.
* Hence a truncate can fail with ENOMEM from xfs_trans_reserve(), but * Hence a truncate can fail with ENOMEM from xfs_trans_alloc(), but
* having already truncated the in-memory version of the file (i.e. made * having already truncated the in-memory version of the file (i.e. made
* user visible changes). There's not much we can do about this, except * user visible changes). There's not much we can do about this, except
* to hope that the caller sees ENOMEM and retries the truncate * to hope that the caller sees ENOMEM and retries the truncate
@ -849,10 +860,9 @@ xfs_setattr_size(
return error; return error;
truncate_setsize(inode, newsize); truncate_setsize(inode, newsize);
tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE); error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
if (error) if (error)
goto out_trans_cancel; return error;
lock_flags |= XFS_ILOCK_EXCL; lock_flags |= XFS_ILOCK_EXCL;
xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_ilock(ip, XFS_ILOCK_EXCL);
@ -971,12 +981,9 @@ xfs_vn_update_time(
trace_xfs_update_time(ip); trace_xfs_update_time(ip);
tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0); if (error)
if (error) {
xfs_trans_cancel(tp);
return error; return error;
}
xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_ilock(ip, XFS_ILOCK_EXCL);
if (flags & S_CTIME) if (flags & S_CTIME)
@ -1167,6 +1174,18 @@ static const struct inode_operations xfs_symlink_inode_operations = {
.update_time = xfs_vn_update_time, .update_time = xfs_vn_update_time,
}; };
static const struct inode_operations xfs_inline_symlink_inode_operations = {
.readlink = generic_readlink,
.get_link = xfs_vn_get_link_inline,
.getattr = xfs_vn_getattr,
.setattr = xfs_vn_setattr,
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
.removexattr = generic_removexattr,
.listxattr = xfs_vn_listxattr,
.update_time = xfs_vn_update_time,
};
STATIC void STATIC void
xfs_diflags_to_iflags( xfs_diflags_to_iflags(
struct inode *inode, struct inode *inode,
@ -1193,7 +1212,7 @@ xfs_diflags_to_iflags(
} }
/* /*
* Initialize the Linux inode and set up the operation vectors. * Initialize the Linux inode.
* *
* When reading existing inodes from disk this is called directly from xfs_iget, * When reading existing inodes from disk this is called directly from xfs_iget,
* when creating a new inode it is called from xfs_ialloc after setting up the * when creating a new inode it is called from xfs_ialloc after setting up the
@ -1232,32 +1251,12 @@ xfs_setup_inode(
i_size_write(inode, ip->i_d.di_size); i_size_write(inode, ip->i_d.di_size);
xfs_diflags_to_iflags(inode, ip); xfs_diflags_to_iflags(inode, ip);
ip->d_ops = ip->i_mount->m_nondir_inode_ops; if (S_ISDIR(inode->i_mode)) {
lockdep_set_class(&ip->i_lock.mr_lock, &xfs_nondir_ilock_class);
switch (inode->i_mode & S_IFMT) {
case S_IFREG:
inode->i_op = &xfs_inode_operations;
inode->i_fop = &xfs_file_operations;
inode->i_mapping->a_ops = &xfs_address_space_operations;
break;
case S_IFDIR:
lockdep_set_class(&ip->i_lock.mr_lock, &xfs_dir_ilock_class); lockdep_set_class(&ip->i_lock.mr_lock, &xfs_dir_ilock_class);
if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb))
inode->i_op = &xfs_dir_ci_inode_operations;
else
inode->i_op = &xfs_dir_inode_operations;
inode->i_fop = &xfs_dir_file_operations;
ip->d_ops = ip->i_mount->m_dir_inode_ops; ip->d_ops = ip->i_mount->m_dir_inode_ops;
break; } else {
case S_IFLNK: ip->d_ops = ip->i_mount->m_nondir_inode_ops;
inode->i_op = &xfs_symlink_inode_operations; lockdep_set_class(&ip->i_lock.mr_lock, &xfs_nondir_ilock_class);
if (!(ip->i_df.if_flags & XFS_IFINLINE))
inode->i_mapping->a_ops = &xfs_address_space_operations;
break;
default:
inode->i_op = &xfs_inode_operations;
init_special_inode(inode, inode->i_mode, inode->i_rdev);
break;
} }
/* /*
@ -1277,3 +1276,35 @@ xfs_setup_inode(
cache_no_acl(inode); cache_no_acl(inode);
} }
} }
void
xfs_setup_iops(
struct xfs_inode *ip)
{
struct inode *inode = &ip->i_vnode;
switch (inode->i_mode & S_IFMT) {
case S_IFREG:
inode->i_op = &xfs_inode_operations;
inode->i_fop = &xfs_file_operations;
inode->i_mapping->a_ops = &xfs_address_space_operations;
break;
case S_IFDIR:
if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb))
inode->i_op = &xfs_dir_ci_inode_operations;
else
inode->i_op = &xfs_dir_inode_operations;
inode->i_fop = &xfs_dir_file_operations;
break;
case S_IFLNK:
if (ip->i_df.if_flags & XFS_IFINLINE)
inode->i_op = &xfs_inline_symlink_inode_operations;
else
inode->i_op = &xfs_symlink_inode_operations;
break;
default:
inode->i_op = &xfs_inode_operations;
init_special_inode(inode, inode->i_mode, inode->i_rdev);
break;
}
}

View File

@ -435,8 +435,7 @@ xfs_log_reserve(
int cnt, int cnt,
struct xlog_ticket **ticp, struct xlog_ticket **ticp,
__uint8_t client, __uint8_t client,
bool permanent, bool permanent)
uint t_type)
{ {
struct xlog *log = mp->m_log; struct xlog *log = mp->m_log;
struct xlog_ticket *tic; struct xlog_ticket *tic;
@ -456,7 +455,6 @@ xfs_log_reserve(
if (!tic) if (!tic)
return -ENOMEM; return -ENOMEM;
tic->t_trans_type = t_type;
*ticp = tic; *ticp = tic;
xlog_grant_push_ail(log, tic->t_cnt ? tic->t_unit_res * tic->t_cnt xlog_grant_push_ail(log, tic->t_cnt ? tic->t_unit_res * tic->t_cnt
@ -823,8 +821,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
} while (iclog != first_iclog); } while (iclog != first_iclog);
#endif #endif
if (! (XLOG_FORCED_SHUTDOWN(log))) { if (! (XLOG_FORCED_SHUTDOWN(log))) {
error = xfs_log_reserve(mp, 600, 1, &tic, error = xfs_log_reserve(mp, 600, 1, &tic, XFS_LOG, 0);
XFS_LOG, 0, XLOG_UNMOUNT_REC_TYPE);
if (!error) { if (!error) {
/* the data section must be 32 bit size aligned */ /* the data section must be 32 bit size aligned */
struct { struct {
@ -2032,58 +2029,8 @@ xlog_print_tic_res(
REG_TYPE_STR(ICREATE, "inode create") REG_TYPE_STR(ICREATE, "inode create")
}; };
#undef REG_TYPE_STR #undef REG_TYPE_STR
#define TRANS_TYPE_STR(type) [XFS_TRANS_##type] = #type
static char *trans_type_str[XFS_TRANS_TYPE_MAX] = {
TRANS_TYPE_STR(SETATTR_NOT_SIZE),
TRANS_TYPE_STR(SETATTR_SIZE),
TRANS_TYPE_STR(INACTIVE),
TRANS_TYPE_STR(CREATE),
TRANS_TYPE_STR(CREATE_TRUNC),
TRANS_TYPE_STR(TRUNCATE_FILE),
TRANS_TYPE_STR(REMOVE),
TRANS_TYPE_STR(LINK),
TRANS_TYPE_STR(RENAME),
TRANS_TYPE_STR(MKDIR),
TRANS_TYPE_STR(RMDIR),
TRANS_TYPE_STR(SYMLINK),
TRANS_TYPE_STR(SET_DMATTRS),
TRANS_TYPE_STR(GROWFS),
TRANS_TYPE_STR(STRAT_WRITE),
TRANS_TYPE_STR(DIOSTRAT),
TRANS_TYPE_STR(WRITEID),
TRANS_TYPE_STR(ADDAFORK),
TRANS_TYPE_STR(ATTRINVAL),
TRANS_TYPE_STR(ATRUNCATE),
TRANS_TYPE_STR(ATTR_SET),
TRANS_TYPE_STR(ATTR_RM),
TRANS_TYPE_STR(ATTR_FLAG),
TRANS_TYPE_STR(CLEAR_AGI_BUCKET),
TRANS_TYPE_STR(SB_CHANGE),
TRANS_TYPE_STR(DUMMY1),
TRANS_TYPE_STR(DUMMY2),
TRANS_TYPE_STR(QM_QUOTAOFF),
TRANS_TYPE_STR(QM_DQALLOC),
TRANS_TYPE_STR(QM_SETQLIM),
TRANS_TYPE_STR(QM_DQCLUSTER),
TRANS_TYPE_STR(QM_QINOCREATE),
TRANS_TYPE_STR(QM_QUOTAOFF_END),
TRANS_TYPE_STR(FSYNC_TS),
TRANS_TYPE_STR(GROWFSRT_ALLOC),
TRANS_TYPE_STR(GROWFSRT_ZERO),
TRANS_TYPE_STR(GROWFSRT_FREE),
TRANS_TYPE_STR(SWAPEXT),
TRANS_TYPE_STR(CHECKPOINT),
TRANS_TYPE_STR(ICREATE),
TRANS_TYPE_STR(CREATE_TMPFILE)
};
#undef TRANS_TYPE_STR
xfs_warn(mp, "xlog_write: reservation summary:"); xfs_warn(mp, "xlog_write: reservation summary:");
xfs_warn(mp, " trans type = %s (%u)",
((ticket->t_trans_type <= 0 ||
ticket->t_trans_type > XFS_TRANS_TYPE_MAX) ?
"bad-trans-type" : trans_type_str[ticket->t_trans_type]),
ticket->t_trans_type);
xfs_warn(mp, " unit res = %d bytes", xfs_warn(mp, " unit res = %d bytes",
ticket->t_unit_res); ticket->t_unit_res);
xfs_warn(mp, " current res = %d bytes", xfs_warn(mp, " current res = %d bytes",
@ -3378,7 +3325,7 @@ xfs_log_force(
{ {
int error; int error;
trace_xfs_log_force(mp, 0); trace_xfs_log_force(mp, 0, _RET_IP_);
error = _xfs_log_force(mp, flags, NULL); error = _xfs_log_force(mp, flags, NULL);
if (error) if (error)
xfs_warn(mp, "%s: error %d returned.", __func__, error); xfs_warn(mp, "%s: error %d returned.", __func__, error);
@ -3527,7 +3474,7 @@ xfs_log_force_lsn(
{ {
int error; int error;
trace_xfs_log_force(mp, lsn); trace_xfs_log_force(mp, lsn, _RET_IP_);
error = _xfs_log_force_lsn(mp, lsn, flags, NULL); error = _xfs_log_force_lsn(mp, lsn, flags, NULL);
if (error) if (error)
xfs_warn(mp, "%s: error %d returned.", __func__, error); xfs_warn(mp, "%s: error %d returned.", __func__, error);
@ -3709,7 +3656,6 @@ xlog_ticket_alloc(
tic->t_tid = prandom_u32(); tic->t_tid = prandom_u32();
tic->t_clientid = client; tic->t_clientid = client;
tic->t_flags = XLOG_TIC_INITED; tic->t_flags = XLOG_TIC_INITED;
tic->t_trans_type = 0;
if (permanent) if (permanent)
tic->t_flags |= XLOG_TIC_PERM_RESERV; tic->t_flags |= XLOG_TIC_PERM_RESERV;

View File

@ -161,8 +161,7 @@ int xfs_log_reserve(struct xfs_mount *mp,
int count, int count,
struct xlog_ticket **ticket, struct xlog_ticket **ticket,
__uint8_t clientid, __uint8_t clientid,
bool permanent, bool permanent);
uint t_type);
int xfs_log_regrant(struct xfs_mount *mp, struct xlog_ticket *tic); int xfs_log_regrant(struct xfs_mount *mp, struct xlog_ticket *tic);
int xfs_log_unmount_write(struct xfs_mount *mp); int xfs_log_unmount_write(struct xfs_mount *mp);
void xfs_log_unmount(struct xfs_mount *mp); void xfs_log_unmount(struct xfs_mount *mp);

View File

@ -51,7 +51,6 @@ xlog_cil_ticket_alloc(
tic = xlog_ticket_alloc(log, 0, 1, XFS_TRANSACTION, 0, tic = xlog_ticket_alloc(log, 0, 1, XFS_TRANSACTION, 0,
KM_SLEEP|KM_NOFS); KM_SLEEP|KM_NOFS);
tic->t_trans_type = XFS_TRANS_CHECKPOINT;
/* /*
* set the current reservation to zero so we know to steal the basic * set the current reservation to zero so we know to steal the basic

View File

@ -175,7 +175,6 @@ typedef struct xlog_ticket {
char t_cnt; /* current count : 1 */ char t_cnt; /* current count : 1 */
char t_clientid; /* who does this belong to; : 1 */ char t_clientid; /* who does this belong to; : 1 */
char t_flags; /* properties of reservation : 1 */ char t_flags; /* properties of reservation : 1 */
uint t_trans_type; /* transaction type : 4 */
/* reservation array fields */ /* reservation array fields */
uint t_res_num; /* num in array : 4 */ uint t_res_num; /* num in array : 4 */

View File

@ -3843,7 +3843,7 @@ xlog_recover_add_to_cont_trans(
old_ptr = item->ri_buf[item->ri_cnt-1].i_addr; old_ptr = item->ri_buf[item->ri_cnt-1].i_addr;
old_len = item->ri_buf[item->ri_cnt-1].i_len; old_len = item->ri_buf[item->ri_cnt-1].i_len;
ptr = kmem_realloc(old_ptr, len+old_len, old_len, KM_SLEEP); ptr = kmem_realloc(old_ptr, len + old_len, KM_SLEEP);
memcpy(&ptr[old_len], dp, len); memcpy(&ptr[old_len], dp, len);
item->ri_buf[item->ri_cnt-1].i_len += len; item->ri_buf[item->ri_cnt-1].i_len += len;
item->ri_buf[item->ri_cnt-1].i_addr = ptr; item->ri_buf[item->ri_cnt-1].i_addr = ptr;
@ -4205,10 +4205,9 @@ xlog_recover_process_efi(
} }
} }
tp = xfs_trans_alloc(mp, 0); error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
if (error) if (error)
goto abort_error; return error;
efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents); efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents);
for (i = 0; i < efip->efi_format.efi_nextents; i++) { for (i = 0; i < efip->efi_format.efi_nextents; i++) {
@ -4355,10 +4354,9 @@ xlog_recover_clear_agi_bucket(
int offset; int offset;
int error; int error;
tp = xfs_trans_alloc(mp, XFS_TRANS_CLEAR_AGI_BUCKET); error = xfs_trans_alloc(mp, &M_RES(mp)->tr_clearagi, 0, 0, 0, &tp);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_clearagi, 0, 0);
if (error) if (error)
goto out_abort; goto out_error;
error = xfs_read_agi(mp, tp, agno, &agibp); error = xfs_read_agi(mp, tp, agno, &agibp);
if (error) if (error)

View File

@ -89,7 +89,6 @@ xfs_uuid_mount(
if (hole < 0) { if (hole < 0) {
xfs_uuid_table = kmem_realloc(xfs_uuid_table, xfs_uuid_table = kmem_realloc(xfs_uuid_table,
(xfs_uuid_table_size + 1) * sizeof(*xfs_uuid_table), (xfs_uuid_table_size + 1) * sizeof(*xfs_uuid_table),
xfs_uuid_table_size * sizeof(*xfs_uuid_table),
KM_SLEEP); KM_SLEEP);
hole = xfs_uuid_table_size++; hole = xfs_uuid_table_size++;
} }
@ -681,6 +680,9 @@ xfs_mountfs(
xfs_set_maxicount(mp); xfs_set_maxicount(mp);
/* enable fail_at_unmount as default */
mp->m_fail_unmount = 1;
error = xfs_sysfs_init(&mp->m_kobj, &xfs_mp_ktype, NULL, mp->m_fsname); error = xfs_sysfs_init(&mp->m_kobj, &xfs_mp_ktype, NULL, mp->m_fsname);
if (error) if (error)
goto out; goto out;
@ -690,10 +692,15 @@ xfs_mountfs(
if (error) if (error)
goto out_remove_sysfs; goto out_remove_sysfs;
error = xfs_uuid_mount(mp); error = xfs_error_sysfs_init(mp);
if (error) if (error)
goto out_del_stats; goto out_del_stats;
error = xfs_uuid_mount(mp);
if (error)
goto out_remove_error_sysfs;
/* /*
* Set the minimum read and write sizes * Set the minimum read and write sizes
*/ */
@ -957,6 +964,7 @@ xfs_mountfs(
cancel_delayed_work_sync(&mp->m_reclaim_work); cancel_delayed_work_sync(&mp->m_reclaim_work);
xfs_reclaim_inodes(mp, SYNC_WAIT); xfs_reclaim_inodes(mp, SYNC_WAIT);
out_log_dealloc: out_log_dealloc:
mp->m_flags |= XFS_MOUNT_UNMOUNTING;
xfs_log_mount_cancel(mp); xfs_log_mount_cancel(mp);
out_fail_wait: out_fail_wait:
if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp)
@ -968,6 +976,8 @@ xfs_mountfs(
xfs_da_unmount(mp); xfs_da_unmount(mp);
out_remove_uuid: out_remove_uuid:
xfs_uuid_unmount(mp); xfs_uuid_unmount(mp);
out_remove_error_sysfs:
xfs_error_sysfs_del(mp);
out_del_stats: out_del_stats:
xfs_sysfs_del(&mp->m_stats.xs_kobj); xfs_sysfs_del(&mp->m_stats.xs_kobj);
out_remove_sysfs: out_remove_sysfs:
@ -1005,6 +1015,14 @@ xfs_unmountfs(
*/ */
xfs_log_force(mp, XFS_LOG_SYNC); xfs_log_force(mp, XFS_LOG_SYNC);
/*
* We now need to tell the world we are unmounting. This will allow
* us to detect that the filesystem is going away and we should error
* out anything that we have been retrying in the background. This will
* prevent neverending retries in AIL pushing from hanging the unmount.
*/
mp->m_flags |= XFS_MOUNT_UNMOUNTING;
/* /*
* Flush all pending changes from the AIL. * Flush all pending changes from the AIL.
*/ */
@ -1056,6 +1074,7 @@ xfs_unmountfs(
#endif #endif
xfs_free_perag(mp); xfs_free_perag(mp);
xfs_error_sysfs_del(mp);
xfs_sysfs_del(&mp->m_stats.xs_kobj); xfs_sysfs_del(&mp->m_stats.xs_kobj);
xfs_sysfs_del(&mp->m_kobj); xfs_sysfs_del(&mp->m_kobj);
} }

View File

@ -37,6 +37,32 @@ enum {
XFS_LOWSP_MAX, XFS_LOWSP_MAX,
}; };
/*
* Error Configuration
*
* Error classes define the subsystem the configuration belongs to.
* Error numbers define the errors that are configurable.
*/
enum {
XFS_ERR_METADATA,
XFS_ERR_CLASS_MAX,
};
enum {
XFS_ERR_DEFAULT,
XFS_ERR_EIO,
XFS_ERR_ENOSPC,
XFS_ERR_ENODEV,
XFS_ERR_ERRNO_MAX,
};
#define XFS_ERR_RETRY_FOREVER -1
struct xfs_error_cfg {
struct xfs_kobj kobj;
int max_retries;
unsigned long retry_timeout; /* in jiffies, 0 = no timeout */
};
typedef struct xfs_mount { typedef struct xfs_mount {
struct super_block *m_super; struct super_block *m_super;
xfs_tid_t m_tid; /* next unused tid for fs */ xfs_tid_t m_tid; /* next unused tid for fs */
@ -127,6 +153,9 @@ typedef struct xfs_mount {
int64_t m_low_space[XFS_LOWSP_MAX]; int64_t m_low_space[XFS_LOWSP_MAX];
/* low free space thresholds */ /* low free space thresholds */
struct xfs_kobj m_kobj; struct xfs_kobj m_kobj;
struct xfs_kobj m_error_kobj;
struct xfs_kobj m_error_meta_kobj;
struct xfs_error_cfg m_error_cfg[XFS_ERR_CLASS_MAX][XFS_ERR_ERRNO_MAX];
struct xstats m_stats; /* per-fs stats */ struct xstats m_stats; /* per-fs stats */
struct workqueue_struct *m_buf_workqueue; struct workqueue_struct *m_buf_workqueue;
@ -148,6 +177,7 @@ typedef struct xfs_mount {
*/ */
__uint32_t m_generation; __uint32_t m_generation;
bool m_fail_unmount;
#ifdef DEBUG #ifdef DEBUG
/* /*
* DEBUG mode instrumentation to test and/or trigger delayed allocation * DEBUG mode instrumentation to test and/or trigger delayed allocation
@ -166,6 +196,7 @@ typedef struct xfs_mount {
#define XFS_MOUNT_WSYNC (1ULL << 0) /* for nfs - all metadata ops #define XFS_MOUNT_WSYNC (1ULL << 0) /* for nfs - all metadata ops
must be synchronous except must be synchronous except
for space allocations */ for space allocations */
#define XFS_MOUNT_UNMOUNTING (1ULL << 1) /* filesystem is unmounting */
#define XFS_MOUNT_WAS_CLEAN (1ULL << 3) #define XFS_MOUNT_WAS_CLEAN (1ULL << 3)
#define XFS_MOUNT_FS_SHUTDOWN (1ULL << 4) /* atomic stop of all filesystem #define XFS_MOUNT_FS_SHUTDOWN (1ULL << 4) /* atomic stop of all filesystem
operations, typically for operations, typically for
@ -364,4 +395,7 @@ extern void xfs_set_low_space_thresholds(struct xfs_mount *);
int xfs_zero_extent(struct xfs_inode *ip, xfs_fsblock_t start_fsb, int xfs_zero_extent(struct xfs_inode *ip, xfs_fsblock_t start_fsb,
xfs_off_t count_fsb); xfs_off_t count_fsb);
struct xfs_error_cfg * xfs_error_get_cfg(struct xfs_mount *mp,
int error_class, int error);
#endif /* __XFS_MOUNT_H__ */ #endif /* __XFS_MOUNT_H__ */

View File

@ -308,12 +308,9 @@ xfs_fs_commit_blocks(
goto out_drop_iolock; goto out_drop_iolock;
} }
tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0); if (error)
if (error) {
xfs_trans_cancel(tp);
goto out_drop_iolock; goto out_drop_iolock;
}
xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_ilock(ip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);

View File

@ -783,13 +783,10 @@ xfs_qm_qino_alloc(
} }
} }
tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QINOCREATE); error = xfs_trans_alloc(mp, &M_RES(mp)->tr_create,
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_create, XFS_QM_QINOCREATE_SPACE_RES(mp), 0, 0, &tp);
XFS_QM_QINOCREATE_SPACE_RES(mp), 0); if (error)
if (error) {
xfs_trans_cancel(tp);
return error; return error;
}
if (need_alloc) { if (need_alloc) {
error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip, error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip,

View File

@ -236,10 +236,8 @@ xfs_qm_scall_trunc_qfile(
xfs_ilock(ip, XFS_IOLOCK_EXCL); xfs_ilock(ip, XFS_IOLOCK_EXCL);
tp = xfs_trans_alloc(mp, XFS_TRANS_TRUNCATE_FILE); error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
if (error) { if (error) {
xfs_trans_cancel(tp);
xfs_iunlock(ip, XFS_IOLOCK_EXCL); xfs_iunlock(ip, XFS_IOLOCK_EXCL);
goto out_put; goto out_put;
} }
@ -436,12 +434,9 @@ xfs_qm_scall_setqlim(
defq = xfs_get_defquota(dqp, q); defq = xfs_get_defquota(dqp, q);
xfs_dqunlock(dqp); xfs_dqunlock(dqp);
tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM); error = xfs_trans_alloc(mp, &M_RES(mp)->tr_qm_setqlim, 0, 0, 0, &tp);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_setqlim, 0, 0); if (error)
if (error) {
xfs_trans_cancel(tp);
goto out_rele; goto out_rele;
}
xfs_dqlock(dqp); xfs_dqlock(dqp);
xfs_trans_dqjoin(tp, dqp); xfs_trans_dqjoin(tp, dqp);
@ -569,13 +564,9 @@ xfs_qm_log_quotaoff_end(
int error; int error;
xfs_qoff_logitem_t *qoffi; xfs_qoff_logitem_t *qoffi;
tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF_END); error = xfs_trans_alloc(mp, &M_RES(mp)->tr_qm_equotaoff, 0, 0, 0, &tp);
if (error)
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_equotaoff, 0, 0);
if (error) {
xfs_trans_cancel(tp);
return error; return error;
}
qoffi = xfs_trans_get_qoff_item(tp, startqoff, qoffi = xfs_trans_get_qoff_item(tp, startqoff,
flags & XFS_ALL_QUOTA_ACCT); flags & XFS_ALL_QUOTA_ACCT);
@ -603,12 +594,9 @@ xfs_qm_log_quotaoff(
*qoffstartp = NULL; *qoffstartp = NULL;
tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF); error = xfs_trans_alloc(mp, &M_RES(mp)->tr_qm_quotaoff, 0, 0, 0, &tp);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_quotaoff, 0, 0); if (error)
if (error) {
xfs_trans_cancel(tp);
goto out; goto out;
}
qoffi = xfs_trans_get_qoff_item(tp, NULL, flags & XFS_ALL_QUOTA_ACCT); qoffi = xfs_trans_get_qoff_item(tp, NULL, flags & XFS_ALL_QUOTA_ACCT);
xfs_trans_log_quotaoff_item(tp, qoffi); xfs_trans_log_quotaoff_item(tp, qoffi);

View File

@ -780,15 +780,14 @@ xfs_growfs_rt_alloc(
* Allocate space to the file, as necessary. * Allocate space to the file, as necessary.
*/ */
while (oblocks < nblocks) { while (oblocks < nblocks) {
tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFSRT_ALLOC);
resblks = XFS_GROWFSRT_SPACE_RES(mp, nblocks - oblocks); resblks = XFS_GROWFSRT_SPACE_RES(mp, nblocks - oblocks);
/* /*
* Reserve space & log for one extent added to the file. * Reserve space & log for one extent added to the file.
*/ */
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growrtalloc, error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growrtalloc, resblks,
resblks, 0); 0, 0, &tp);
if (error) if (error)
goto out_trans_cancel; return error;
/* /*
* Lock the inode. * Lock the inode.
*/ */
@ -823,14 +822,13 @@ xfs_growfs_rt_alloc(
for (bno = map.br_startoff, fsbno = map.br_startblock; for (bno = map.br_startoff, fsbno = map.br_startblock;
bno < map.br_startoff + map.br_blockcount; bno < map.br_startoff + map.br_blockcount;
bno++, fsbno++) { bno++, fsbno++) {
tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFSRT_ZERO);
/* /*
* Reserve log for one block zeroing. * Reserve log for one block zeroing.
*/ */
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growrtzero, error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growrtzero,
0, 0); 0, 0, 0, &tp);
if (error) if (error)
goto out_trans_cancel; return error;
/* /*
* Lock the bitmap inode. * Lock the bitmap inode.
*/ */
@ -994,11 +992,10 @@ xfs_growfs_rt(
/* /*
* Start a transaction, get the log reservation. * Start a transaction, get the log reservation.
*/ */
tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFSRT_FREE); error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growrtfree, 0, 0, 0,
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growrtfree, &tp);
0, 0);
if (error) if (error)
goto error_cancel; break;
/* /*
* Lock out other callers by grabbing the bitmap inode lock. * Lock out other callers by grabbing the bitmap inode lock.
*/ */

View File

@ -58,8 +58,7 @@
#include <linux/parser.h> #include <linux/parser.h>
static const struct super_operations xfs_super_operations; static const struct super_operations xfs_super_operations;
static kmem_zone_t *xfs_ioend_zone; struct bio_set *xfs_ioend_bioset;
mempool_t *xfs_ioend_pool;
static struct kset *xfs_kset; /* top-level xfs sysfs dir */ static struct kset *xfs_kset; /* top-level xfs sysfs dir */
#ifdef DEBUG #ifdef DEBUG
@ -350,6 +349,7 @@ xfs_parseargs(
case Opt_pqnoenforce: case Opt_pqnoenforce:
mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE); mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);
mp->m_qflags &= ~XFS_PQUOTA_ENFD; mp->m_qflags &= ~XFS_PQUOTA_ENFD;
break;
case Opt_gquota: case Opt_gquota:
case Opt_grpquota: case Opt_grpquota:
mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE | mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE |
@ -928,7 +928,7 @@ xfs_fs_alloc_inode(
/* /*
* Now that the generic code is guaranteed not to be accessing * Now that the generic code is guaranteed not to be accessing
* the linux inode, we can reclaim the inode. * the linux inode, we can inactivate and reclaim the inode.
*/ */
STATIC void STATIC void
xfs_fs_destroy_inode( xfs_fs_destroy_inode(
@ -938,9 +938,14 @@ xfs_fs_destroy_inode(
trace_xfs_destroy_inode(ip); trace_xfs_destroy_inode(ip);
XFS_STATS_INC(ip->i_mount, vn_reclaim); ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
XFS_STATS_INC(ip->i_mount, vn_rele);
XFS_STATS_INC(ip->i_mount, vn_remove);
xfs_inactive(ip);
ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0); ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0);
XFS_STATS_INC(ip->i_mount, vn_reclaim);
/* /*
* We should never get here with one of the reclaim flags already set. * We should never get here with one of the reclaim flags already set.
@ -987,24 +992,6 @@ xfs_fs_inode_init_once(
"xfsino", ip->i_ino); "xfsino", ip->i_ino);
} }
STATIC void
xfs_fs_evict_inode(
struct inode *inode)
{
xfs_inode_t *ip = XFS_I(inode);
ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
trace_xfs_evict_inode(ip);
truncate_inode_pages_final(&inode->i_data);
clear_inode(inode);
XFS_STATS_INC(ip->i_mount, vn_rele);
XFS_STATS_INC(ip->i_mount, vn_remove);
xfs_inactive(ip);
}
/* /*
* We do an unlocked check for XFS_IDONTCACHE here because we are already * We do an unlocked check for XFS_IDONTCACHE here because we are already
* serialised against cache hits here via the inode->i_lock and igrab() in * serialised against cache hits here via the inode->i_lock and igrab() in
@ -1276,6 +1263,16 @@ xfs_fs_remount(
return -EINVAL; return -EINVAL;
} }
if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 &&
xfs_sb_has_ro_compat_feature(sbp,
XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) {
xfs_warn(mp,
"ro->rw transition prohibited on unknown (0x%x) ro-compat filesystem",
(sbp->sb_features_ro_compat &
XFS_SB_FEAT_RO_COMPAT_UNKNOWN));
return -EINVAL;
}
mp->m_flags &= ~XFS_MOUNT_RDONLY; mp->m_flags &= ~XFS_MOUNT_RDONLY;
/* /*
@ -1663,7 +1660,6 @@ xfs_fs_free_cached_objects(
static const struct super_operations xfs_super_operations = { static const struct super_operations xfs_super_operations = {
.alloc_inode = xfs_fs_alloc_inode, .alloc_inode = xfs_fs_alloc_inode,
.destroy_inode = xfs_fs_destroy_inode, .destroy_inode = xfs_fs_destroy_inode,
.evict_inode = xfs_fs_evict_inode,
.drop_inode = xfs_fs_drop_inode, .drop_inode = xfs_fs_drop_inode,
.put_super = xfs_fs_put_super, .put_super = xfs_fs_put_super,
.sync_fs = xfs_fs_sync_fs, .sync_fs = xfs_fs_sync_fs,
@ -1688,20 +1684,15 @@ MODULE_ALIAS_FS("xfs");
STATIC int __init STATIC int __init
xfs_init_zones(void) xfs_init_zones(void)
{ {
xfs_ioend_bioset = bioset_create(4 * MAX_BUF_PER_PAGE,
xfs_ioend_zone = kmem_zone_init(sizeof(xfs_ioend_t), "xfs_ioend"); offsetof(struct xfs_ioend, io_inline_bio));
if (!xfs_ioend_zone) if (!xfs_ioend_bioset)
goto out; goto out;
xfs_ioend_pool = mempool_create_slab_pool(4 * MAX_BUF_PER_PAGE,
xfs_ioend_zone);
if (!xfs_ioend_pool)
goto out_destroy_ioend_zone;
xfs_log_ticket_zone = kmem_zone_init(sizeof(xlog_ticket_t), xfs_log_ticket_zone = kmem_zone_init(sizeof(xlog_ticket_t),
"xfs_log_ticket"); "xfs_log_ticket");
if (!xfs_log_ticket_zone) if (!xfs_log_ticket_zone)
goto out_destroy_ioend_pool; goto out_free_ioend_bioset;
xfs_bmap_free_item_zone = kmem_zone_init(sizeof(xfs_bmap_free_item_t), xfs_bmap_free_item_zone = kmem_zone_init(sizeof(xfs_bmap_free_item_t),
"xfs_bmap_free_item"); "xfs_bmap_free_item");
@ -1797,10 +1788,8 @@ xfs_init_zones(void)
kmem_zone_destroy(xfs_bmap_free_item_zone); kmem_zone_destroy(xfs_bmap_free_item_zone);
out_destroy_log_ticket_zone: out_destroy_log_ticket_zone:
kmem_zone_destroy(xfs_log_ticket_zone); kmem_zone_destroy(xfs_log_ticket_zone);
out_destroy_ioend_pool: out_free_ioend_bioset:
mempool_destroy(xfs_ioend_pool); bioset_free(xfs_ioend_bioset);
out_destroy_ioend_zone:
kmem_zone_destroy(xfs_ioend_zone);
out: out:
return -ENOMEM; return -ENOMEM;
} }
@ -1826,9 +1815,7 @@ xfs_destroy_zones(void)
kmem_zone_destroy(xfs_btree_cur_zone); kmem_zone_destroy(xfs_btree_cur_zone);
kmem_zone_destroy(xfs_bmap_free_item_zone); kmem_zone_destroy(xfs_bmap_free_item_zone);
kmem_zone_destroy(xfs_log_ticket_zone); kmem_zone_destroy(xfs_log_ticket_zone);
mempool_destroy(xfs_ioend_pool); bioset_free(xfs_ioend_bioset);
kmem_zone_destroy(xfs_ioend_zone);
} }
STATIC int __init STATIC int __init

View File

@ -131,6 +131,8 @@ xfs_readlink(
trace_xfs_readlink(ip); trace_xfs_readlink(ip);
ASSERT(!(ip->i_df.if_flags & XFS_IFINLINE));
if (XFS_FORCED_SHUTDOWN(mp)) if (XFS_FORCED_SHUTDOWN(mp))
return -EIO; return -EIO;
@ -150,12 +152,7 @@ xfs_readlink(
} }
if (ip->i_df.if_flags & XFS_IFINLINE) { error = xfs_readlink_bmap(ip, link);
memcpy(link, ip->i_df.if_u1.if_data, pathlen);
link[pathlen] = '\0';
} else {
error = xfs_readlink_bmap(ip, link);
}
out: out:
xfs_iunlock(ip, XFS_ILOCK_SHARED); xfs_iunlock(ip, XFS_ILOCK_SHARED);
@ -221,7 +218,6 @@ xfs_symlink(
if (error) if (error)
return error; return error;
tp = xfs_trans_alloc(mp, XFS_TRANS_SYMLINK);
/* /*
* The symlink will fit into the inode data fork? * The symlink will fit into the inode data fork?
* There can't be any attributes so we get the whole variable part. * There can't be any attributes so we get the whole variable part.
@ -231,13 +227,15 @@ xfs_symlink(
else else
fs_blocks = xfs_symlink_blocks(mp, pathlen); fs_blocks = xfs_symlink_blocks(mp, pathlen);
resblks = XFS_SYMLINK_SPACE_RES(mp, link_name->len, fs_blocks); resblks = XFS_SYMLINK_SPACE_RES(mp, link_name->len, fs_blocks);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_symlink, resblks, 0);
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_symlink, resblks, 0, 0, &tp);
if (error == -ENOSPC && fs_blocks == 0) { if (error == -ENOSPC && fs_blocks == 0) {
resblks = 0; resblks = 0;
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_symlink, 0, 0); error = xfs_trans_alloc(mp, &M_RES(mp)->tr_symlink, 0, 0, 0,
&tp);
} }
if (error) if (error)
goto out_trans_cancel; goto out_release_inode;
xfs_ilock(dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL | xfs_ilock(dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL |
XFS_IOLOCK_PARENT | XFS_ILOCK_PARENT); XFS_IOLOCK_PARENT | XFS_ILOCK_PARENT);
@ -302,19 +300,11 @@ xfs_symlink(
* If the symlink will fit into the inode, write it inline. * If the symlink will fit into the inode, write it inline.
*/ */
if (pathlen <= XFS_IFORK_DSIZE(ip)) { if (pathlen <= XFS_IFORK_DSIZE(ip)) {
xfs_idata_realloc(ip, pathlen, XFS_DATA_FORK); xfs_init_local_fork(ip, XFS_DATA_FORK, target_path, pathlen);
memcpy(ip->i_df.if_u1.if_data, target_path, pathlen);
ip->i_d.di_size = pathlen; ip->i_d.di_size = pathlen;
/*
* The inode was initially created in extent format.
*/
ip->i_df.if_flags &= ~(XFS_IFEXTENTS | XFS_IFBROOT);
ip->i_df.if_flags |= XFS_IFINLINE;
ip->i_d.di_format = XFS_DINODE_FMT_LOCAL; ip->i_d.di_format = XFS_DINODE_FMT_LOCAL;
xfs_trans_log_inode(tp, ip, XFS_ILOG_DDATA | XFS_ILOG_CORE); xfs_trans_log_inode(tp, ip, XFS_ILOG_DDATA | XFS_ILOG_CORE);
} else { } else {
int offset; int offset;
@ -455,12 +445,9 @@ xfs_inactive_symlink_rmt(
*/ */
ASSERT(ip->i_d.di_nextents > 0 && ip->i_d.di_nextents <= 2); ASSERT(ip->i_d.di_nextents > 0 && ip->i_d.di_nextents <= 2);
tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); if (error)
if (error) {
xfs_trans_cancel(tp);
return error; return error;
}
xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_ilock(ip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip, 0); xfs_trans_ijoin(tp, ip, 0);

View File

@ -17,10 +17,11 @@
*/ */
#include "xfs.h" #include "xfs.h"
#include "xfs_sysfs.h" #include "xfs_shared.h"
#include "xfs_format.h" #include "xfs_format.h"
#include "xfs_log_format.h" #include "xfs_log_format.h"
#include "xfs_trans_resv.h" #include "xfs_trans_resv.h"
#include "xfs_sysfs.h"
#include "xfs_log.h" #include "xfs_log.h"
#include "xfs_log_priv.h" #include "xfs_log_priv.h"
#include "xfs_stats.h" #include "xfs_stats.h"
@ -362,3 +363,291 @@ struct kobj_type xfs_log_ktype = {
.sysfs_ops = &xfs_sysfs_ops, .sysfs_ops = &xfs_sysfs_ops,
.default_attrs = xfs_log_attrs, .default_attrs = xfs_log_attrs,
}; };
/*
* Metadata IO error configuration
*
* The sysfs structure here is:
* ...xfs/<dev>/error/<class>/<errno>/<error_attrs>
*
* where <class> allows us to discriminate between data IO and metadata IO,
* and any other future type of IO (e.g. special inode or directory error
* handling) we care to support.
*/
static inline struct xfs_error_cfg *
to_error_cfg(struct kobject *kobject)
{
struct xfs_kobj *kobj = to_kobj(kobject);
return container_of(kobj, struct xfs_error_cfg, kobj);
}
static inline struct xfs_mount *
err_to_mp(struct kobject *kobject)
{
struct xfs_kobj *kobj = to_kobj(kobject);
return container_of(kobj, struct xfs_mount, m_error_kobj);
}
static ssize_t
max_retries_show(
struct kobject *kobject,
char *buf)
{
struct xfs_error_cfg *cfg = to_error_cfg(kobject);
return snprintf(buf, PAGE_SIZE, "%d\n", cfg->max_retries);
}
static ssize_t
max_retries_store(
struct kobject *kobject,
const char *buf,
size_t count)
{
struct xfs_error_cfg *cfg = to_error_cfg(kobject);
int ret;
int val;
ret = kstrtoint(buf, 0, &val);
if (ret)
return ret;
if (val < -1)
return -EINVAL;
cfg->max_retries = val;
return count;
}
XFS_SYSFS_ATTR_RW(max_retries);
static ssize_t
retry_timeout_seconds_show(
struct kobject *kobject,
char *buf)
{
struct xfs_error_cfg *cfg = to_error_cfg(kobject);
return snprintf(buf, PAGE_SIZE, "%ld\n",
jiffies_to_msecs(cfg->retry_timeout) / MSEC_PER_SEC);
}
static ssize_t
retry_timeout_seconds_store(
struct kobject *kobject,
const char *buf,
size_t count)
{
struct xfs_error_cfg *cfg = to_error_cfg(kobject);
int ret;
int val;
ret = kstrtoint(buf, 0, &val);
if (ret)
return ret;
/* 1 day timeout maximum */
if (val < 0 || val > 86400)
return -EINVAL;
cfg->retry_timeout = msecs_to_jiffies(val * MSEC_PER_SEC);
return count;
}
XFS_SYSFS_ATTR_RW(retry_timeout_seconds);
static ssize_t
fail_at_unmount_show(
struct kobject *kobject,
char *buf)
{
struct xfs_mount *mp = err_to_mp(kobject);
return snprintf(buf, PAGE_SIZE, "%d\n", mp->m_fail_unmount);
}
static ssize_t
fail_at_unmount_store(
struct kobject *kobject,
const char *buf,
size_t count)
{
struct xfs_mount *mp = err_to_mp(kobject);
int ret;
int val;
ret = kstrtoint(buf, 0, &val);
if (ret)
return ret;
if (val < 0 || val > 1)
return -EINVAL;
mp->m_fail_unmount = val;
return count;
}
XFS_SYSFS_ATTR_RW(fail_at_unmount);
static struct attribute *xfs_error_attrs[] = {
ATTR_LIST(max_retries),
ATTR_LIST(retry_timeout_seconds),
NULL,
};
struct kobj_type xfs_error_cfg_ktype = {
.release = xfs_sysfs_release,
.sysfs_ops = &xfs_sysfs_ops,
.default_attrs = xfs_error_attrs,
};
struct kobj_type xfs_error_ktype = {
.release = xfs_sysfs_release,
.sysfs_ops = &xfs_sysfs_ops,
};
/*
* Error initialization tables. These need to be ordered in the same
* order as the enums used to index the array. All class init tables need to
* define a "default" behaviour as the first entry, all other entries can be
* empty.
*/
struct xfs_error_init {
char *name;
int max_retries;
int retry_timeout; /* in seconds */
};
static const struct xfs_error_init xfs_error_meta_init[XFS_ERR_ERRNO_MAX] = {
{ .name = "default",
.max_retries = XFS_ERR_RETRY_FOREVER,
.retry_timeout = 0,
},
{ .name = "EIO",
.max_retries = XFS_ERR_RETRY_FOREVER,
.retry_timeout = 0,
},
{ .name = "ENOSPC",
.max_retries = XFS_ERR_RETRY_FOREVER,
.retry_timeout = 0,
},
{ .name = "ENODEV",
.max_retries = 0,
},
};
static int
xfs_error_sysfs_init_class(
struct xfs_mount *mp,
int class,
const char *parent_name,
struct xfs_kobj *parent_kobj,
const struct xfs_error_init init[])
{
struct xfs_error_cfg *cfg;
int error;
int i;
ASSERT(class < XFS_ERR_CLASS_MAX);
error = xfs_sysfs_init(parent_kobj, &xfs_error_ktype,
&mp->m_error_kobj, parent_name);
if (error)
return error;
for (i = 0; i < XFS_ERR_ERRNO_MAX; i++) {
cfg = &mp->m_error_cfg[class][i];
error = xfs_sysfs_init(&cfg->kobj, &xfs_error_cfg_ktype,
parent_kobj, init[i].name);
if (error)
goto out_error;
cfg->max_retries = init[i].max_retries;
cfg->retry_timeout = msecs_to_jiffies(
init[i].retry_timeout * MSEC_PER_SEC);
}
return 0;
out_error:
/* unwind the entries that succeeded */
for (i--; i >= 0; i--) {
cfg = &mp->m_error_cfg[class][i];
xfs_sysfs_del(&cfg->kobj);
}
xfs_sysfs_del(parent_kobj);
return error;
}
int
xfs_error_sysfs_init(
struct xfs_mount *mp)
{
int error;
/* .../xfs/<dev>/error/ */
error = xfs_sysfs_init(&mp->m_error_kobj, &xfs_error_ktype,
&mp->m_kobj, "error");
if (error)
return error;
error = sysfs_create_file(&mp->m_error_kobj.kobject,
ATTR_LIST(fail_at_unmount));
if (error)
goto out_error;
/* .../xfs/<dev>/error/metadata/ */
error = xfs_error_sysfs_init_class(mp, XFS_ERR_METADATA,
"metadata", &mp->m_error_meta_kobj,
xfs_error_meta_init);
if (error)
goto out_error;
return 0;
out_error:
xfs_sysfs_del(&mp->m_error_kobj);
return error;
}
void
xfs_error_sysfs_del(
struct xfs_mount *mp)
{
struct xfs_error_cfg *cfg;
int i, j;
for (i = 0; i < XFS_ERR_CLASS_MAX; i++) {
for (j = 0; j < XFS_ERR_ERRNO_MAX; j++) {
cfg = &mp->m_error_cfg[i][j];
xfs_sysfs_del(&cfg->kobj);
}
}
xfs_sysfs_del(&mp->m_error_meta_kobj);
xfs_sysfs_del(&mp->m_error_kobj);
}
struct xfs_error_cfg *
xfs_error_get_cfg(
struct xfs_mount *mp,
int error_class,
int error)
{
struct xfs_error_cfg *cfg;
switch (error) {
case EIO:
cfg = &mp->m_error_cfg[error_class][XFS_ERR_EIO];
break;
case ENOSPC:
cfg = &mp->m_error_cfg[error_class][XFS_ERR_ENOSPC];
break;
case ENODEV:
cfg = &mp->m_error_cfg[error_class][XFS_ERR_ENODEV];
break;
default:
cfg = &mp->m_error_cfg[error_class][XFS_ERR_DEFAULT];
break;
}
return cfg;
}

View File

@ -58,4 +58,7 @@ xfs_sysfs_del(
wait_for_completion(&kobj->complete); wait_for_completion(&kobj->complete);
} }
int xfs_error_sysfs_init(struct xfs_mount *mp);
void xfs_error_sysfs_del(struct xfs_mount *mp);
#endif /* __XFS_SYSFS_H__ */ #endif /* __XFS_SYSFS_H__ */

View File

@ -364,7 +364,6 @@ DEFINE_BUF_EVENT(xfs_buf_delwri_split);
DEFINE_BUF_EVENT(xfs_buf_get_uncached); DEFINE_BUF_EVENT(xfs_buf_get_uncached);
DEFINE_BUF_EVENT(xfs_bdstrat_shut); DEFINE_BUF_EVENT(xfs_bdstrat_shut);
DEFINE_BUF_EVENT(xfs_buf_item_relse); DEFINE_BUF_EVENT(xfs_buf_item_relse);
DEFINE_BUF_EVENT(xfs_buf_item_iodone);
DEFINE_BUF_EVENT(xfs_buf_item_iodone_async); DEFINE_BUF_EVENT(xfs_buf_item_iodone_async);
DEFINE_BUF_EVENT(xfs_buf_error_relse); DEFINE_BUF_EVENT(xfs_buf_error_relse);
DEFINE_BUF_EVENT(xfs_buf_wait_buftarg); DEFINE_BUF_EVENT(xfs_buf_wait_buftarg);
@ -944,7 +943,6 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class,
TP_ARGS(log, tic), TP_ARGS(log, tic),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(dev_t, dev) __field(dev_t, dev)
__field(unsigned, trans_type)
__field(char, ocnt) __field(char, ocnt)
__field(char, cnt) __field(char, cnt)
__field(int, curr_res) __field(int, curr_res)
@ -962,7 +960,6 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class,
), ),
TP_fast_assign( TP_fast_assign(
__entry->dev = log->l_mp->m_super->s_dev; __entry->dev = log->l_mp->m_super->s_dev;
__entry->trans_type = tic->t_trans_type;
__entry->ocnt = tic->t_ocnt; __entry->ocnt = tic->t_ocnt;
__entry->cnt = tic->t_cnt; __entry->cnt = tic->t_cnt;
__entry->curr_res = tic->t_curr_res; __entry->curr_res = tic->t_curr_res;
@ -980,14 +977,13 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class,
__entry->curr_block = log->l_curr_block; __entry->curr_block = log->l_curr_block;
__entry->tail_lsn = atomic64_read(&log->l_tail_lsn); __entry->tail_lsn = atomic64_read(&log->l_tail_lsn);
), ),
TP_printk("dev %d:%d type %s t_ocnt %u t_cnt %u t_curr_res %u " TP_printk("dev %d:%d t_ocnt %u t_cnt %u t_curr_res %u "
"t_unit_res %u t_flags %s reserveq %s " "t_unit_res %u t_flags %s reserveq %s "
"writeq %s grant_reserve_cycle %d " "writeq %s grant_reserve_cycle %d "
"grant_reserve_bytes %d grant_write_cycle %d " "grant_reserve_bytes %d grant_write_cycle %d "
"grant_write_bytes %d curr_cycle %d curr_block %d " "grant_write_bytes %d curr_cycle %d curr_block %d "
"tail_cycle %d tail_block %d", "tail_cycle %d tail_block %d",
MAJOR(__entry->dev), MINOR(__entry->dev), MAJOR(__entry->dev), MINOR(__entry->dev),
__print_symbolic(__entry->trans_type, XFS_TRANS_TYPES),
__entry->ocnt, __entry->ocnt,
__entry->cnt, __entry->cnt,
__entry->curr_res, __entry->curr_res,
@ -1053,19 +1049,21 @@ DECLARE_EVENT_CLASS(xfs_log_item_class,
) )
TRACE_EVENT(xfs_log_force, TRACE_EVENT(xfs_log_force,
TP_PROTO(struct xfs_mount *mp, xfs_lsn_t lsn), TP_PROTO(struct xfs_mount *mp, xfs_lsn_t lsn, unsigned long caller_ip),
TP_ARGS(mp, lsn), TP_ARGS(mp, lsn, caller_ip),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(dev_t, dev) __field(dev_t, dev)
__field(xfs_lsn_t, lsn) __field(xfs_lsn_t, lsn)
__field(unsigned long, caller_ip)
), ),
TP_fast_assign( TP_fast_assign(
__entry->dev = mp->m_super->s_dev; __entry->dev = mp->m_super->s_dev;
__entry->lsn = lsn; __entry->lsn = lsn;
__entry->caller_ip = caller_ip;
), ),
TP_printk("dev %d:%d lsn 0x%llx", TP_printk("dev %d:%d lsn 0x%llx caller %ps",
MAJOR(__entry->dev), MINOR(__entry->dev), MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->lsn) __entry->lsn, (void *)__entry->caller_ip)
) )
#define DEFINE_LOG_ITEM_EVENT(name) \ #define DEFINE_LOG_ITEM_EVENT(name) \

View File

@ -46,47 +46,6 @@ xfs_trans_init(
xfs_trans_resv_calc(mp, M_RES(mp)); xfs_trans_resv_calc(mp, M_RES(mp));
} }
/*
* This routine is called to allocate a transaction structure.
* The type parameter indicates the type of the transaction. These
* are enumerated in xfs_trans.h.
*
* Dynamically allocate the transaction structure from the transaction
* zone, initialize it, and return it to the caller.
*/
xfs_trans_t *
xfs_trans_alloc(
xfs_mount_t *mp,
uint type)
{
xfs_trans_t *tp;
sb_start_intwrite(mp->m_super);
tp = _xfs_trans_alloc(mp, type, KM_SLEEP);
tp->t_flags |= XFS_TRANS_FREEZE_PROT;
return tp;
}
xfs_trans_t *
_xfs_trans_alloc(
xfs_mount_t *mp,
uint type,
xfs_km_flags_t memflags)
{
xfs_trans_t *tp;
WARN_ON(mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE);
atomic_inc(&mp->m_active_trans);
tp = kmem_zone_zalloc(xfs_trans_zone, memflags);
tp->t_magic = XFS_TRANS_HEADER_MAGIC;
tp->t_type = type;
tp->t_mountp = mp;
INIT_LIST_HEAD(&tp->t_items);
INIT_LIST_HEAD(&tp->t_busy);
return tp;
}
/* /*
* Free the transaction structure. If there is more clean up * Free the transaction structure. If there is more clean up
* to do when the structure is freed, add it here. * to do when the structure is freed, add it here.
@ -99,7 +58,7 @@ xfs_trans_free(
xfs_extent_busy_clear(tp->t_mountp, &tp->t_busy, false); xfs_extent_busy_clear(tp->t_mountp, &tp->t_busy, false);
atomic_dec(&tp->t_mountp->m_active_trans); atomic_dec(&tp->t_mountp->m_active_trans);
if (tp->t_flags & XFS_TRANS_FREEZE_PROT) if (!(tp->t_flags & XFS_TRANS_NO_WRITECOUNT))
sb_end_intwrite(tp->t_mountp->m_super); sb_end_intwrite(tp->t_mountp->m_super);
xfs_trans_free_dqinfo(tp); xfs_trans_free_dqinfo(tp);
kmem_zone_free(xfs_trans_zone, tp); kmem_zone_free(xfs_trans_zone, tp);
@ -125,7 +84,6 @@ xfs_trans_dup(
* Initialize the new transaction structure. * Initialize the new transaction structure.
*/ */
ntp->t_magic = XFS_TRANS_HEADER_MAGIC; ntp->t_magic = XFS_TRANS_HEADER_MAGIC;
ntp->t_type = tp->t_type;
ntp->t_mountp = tp->t_mountp; ntp->t_mountp = tp->t_mountp;
INIT_LIST_HEAD(&ntp->t_items); INIT_LIST_HEAD(&ntp->t_items);
INIT_LIST_HEAD(&ntp->t_busy); INIT_LIST_HEAD(&ntp->t_busy);
@ -135,9 +93,9 @@ xfs_trans_dup(
ntp->t_flags = XFS_TRANS_PERM_LOG_RES | ntp->t_flags = XFS_TRANS_PERM_LOG_RES |
(tp->t_flags & XFS_TRANS_RESERVE) | (tp->t_flags & XFS_TRANS_RESERVE) |
(tp->t_flags & XFS_TRANS_FREEZE_PROT); (tp->t_flags & XFS_TRANS_NO_WRITECOUNT);
/* We gave our writer reference to the new transaction */ /* We gave our writer reference to the new transaction */
tp->t_flags &= ~XFS_TRANS_FREEZE_PROT; tp->t_flags |= XFS_TRANS_NO_WRITECOUNT;
ntp->t_ticket = xfs_log_ticket_get(tp->t_ticket); ntp->t_ticket = xfs_log_ticket_get(tp->t_ticket);
ntp->t_blk_res = tp->t_blk_res - tp->t_blk_res_used; ntp->t_blk_res = tp->t_blk_res - tp->t_blk_res_used;
tp->t_blk_res = tp->t_blk_res_used; tp->t_blk_res = tp->t_blk_res_used;
@ -165,7 +123,7 @@ xfs_trans_dup(
* This does not do quota reservations. That typically is done by the * This does not do quota reservations. That typically is done by the
* caller afterwards. * caller afterwards.
*/ */
int static int
xfs_trans_reserve( xfs_trans_reserve(
struct xfs_trans *tp, struct xfs_trans *tp,
struct xfs_trans_res *resp, struct xfs_trans_res *resp,
@ -219,7 +177,7 @@ xfs_trans_reserve(
resp->tr_logres, resp->tr_logres,
resp->tr_logcount, resp->tr_logcount,
&tp->t_ticket, XFS_TRANSACTION, &tp->t_ticket, XFS_TRANSACTION,
permanent, tp->t_type); permanent);
} }
if (error) if (error)
@ -268,6 +226,42 @@ undo_blocks:
return error; return error;
} }
int
xfs_trans_alloc(
struct xfs_mount *mp,
struct xfs_trans_res *resp,
uint blocks,
uint rtextents,
uint flags,
struct xfs_trans **tpp)
{
struct xfs_trans *tp;
int error;
if (!(flags & XFS_TRANS_NO_WRITECOUNT))
sb_start_intwrite(mp->m_super);
WARN_ON(mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE);
atomic_inc(&mp->m_active_trans);
tp = kmem_zone_zalloc(xfs_trans_zone,
(flags & XFS_TRANS_NOFS) ? KM_NOFS : KM_SLEEP);
tp->t_magic = XFS_TRANS_HEADER_MAGIC;
tp->t_flags = flags;
tp->t_mountp = mp;
INIT_LIST_HEAD(&tp->t_items);
INIT_LIST_HEAD(&tp->t_busy);
error = xfs_trans_reserve(tp, resp, blocks, rtextents);
if (error) {
xfs_trans_cancel(tp);
return error;
}
*tpp = tp;
return 0;
}
/* /*
* Record the indicated change to the given field for application * Record the indicated change to the given field for application
* to the file system's superblock when the transaction commits. * to the file system's superblock when the transaction commits.

View File

@ -90,7 +90,6 @@ void xfs_log_item_init(struct xfs_mount *mp, struct xfs_log_item *item,
*/ */
typedef struct xfs_trans { typedef struct xfs_trans {
unsigned int t_magic; /* magic number */ unsigned int t_magic; /* magic number */
unsigned int t_type; /* transaction type */
unsigned int t_log_res; /* amt of log space resvd */ unsigned int t_log_res; /* amt of log space resvd */
unsigned int t_log_count; /* count for perm log res */ unsigned int t_log_count; /* count for perm log res */
unsigned int t_blk_res; /* # of blocks resvd */ unsigned int t_blk_res; /* # of blocks resvd */
@ -148,10 +147,9 @@ typedef struct xfs_trans {
/* /*
* XFS transaction mechanism exported interfaces. * XFS transaction mechanism exported interfaces.
*/ */
xfs_trans_t *xfs_trans_alloc(struct xfs_mount *, uint); int xfs_trans_alloc(struct xfs_mount *mp, struct xfs_trans_res *resp,
xfs_trans_t *_xfs_trans_alloc(struct xfs_mount *, uint, xfs_km_flags_t); uint blocks, uint rtextents, uint flags,
int xfs_trans_reserve(struct xfs_trans *, struct xfs_trans_res *, struct xfs_trans **tpp);
uint, uint);
void xfs_trans_mod_sb(xfs_trans_t *, uint, int64_t); void xfs_trans_mod_sb(xfs_trans_t *, uint, int64_t);
struct xfs_buf *xfs_trans_get_buf_map(struct xfs_trans *tp, struct xfs_buf *xfs_trans_get_buf_map(struct xfs_trans *tp,

View File

@ -146,7 +146,7 @@ __xfs_xattr_put_listent(
arraytop = context->count + prefix_len + namelen + 1; arraytop = context->count + prefix_len + namelen + 1;
if (arraytop > context->firstu) { if (arraytop > context->firstu) {
context->count = -1; /* insufficient space */ context->count = -1; /* insufficient space */
return 1; return 0;
} }
offset = (char *)context->alist + context->count; offset = (char *)context->alist + context->count;
strncpy(offset, prefix, prefix_len); strncpy(offset, prefix, prefix_len);
@ -166,8 +166,7 @@ xfs_xattr_put_listent(
int flags, int flags,
unsigned char *name, unsigned char *name,
int namelen, int namelen,
int valuelen, int valuelen)
unsigned char *value)
{ {
char *prefix; char *prefix;
int prefix_len; int prefix_len;
@ -221,11 +220,15 @@ xfs_xattr_put_listent(
} }
ssize_t ssize_t
xfs_vn_listxattr(struct dentry *dentry, char *data, size_t size) xfs_vn_listxattr(
struct dentry *dentry,
char *data,
size_t size)
{ {
struct xfs_attr_list_context context; struct xfs_attr_list_context context;
struct attrlist_cursor_kern cursor = { 0 }; struct attrlist_cursor_kern cursor = { 0 };
struct inode *inode = d_inode(dentry); struct inode *inode = d_inode(dentry);
int error;
/* /*
* First read the regular on-disk attributes. * First read the regular on-disk attributes.
@ -239,7 +242,9 @@ xfs_vn_listxattr(struct dentry *dentry, char *data, size_t size)
context.firstu = context.bufsize; context.firstu = context.bufsize;
context.put_listent = xfs_xattr_put_listent; context.put_listent = xfs_xattr_put_listent;
xfs_attr_list_int(&context); error = xfs_attr_list_int(&context);
if (error)
return error;
if (context.count < 0) if (context.count < 0)
return -ERANGE; return -ERANGE;