fs: record I_DIRTY_TIME even if inode already has I_DIRTY_INODE
Currently the I_DIRTY_TIME will never get set if the inode already has I_DIRTY_INODE with assumption that it supersedes I_DIRTY_TIME. That's true, however ext4 will only update the on-disk inode in ->dirty_inode(), not on actual writeback. As a result if the inode already has I_DIRTY_INODE state by the time we get to __mark_inode_dirty() only with I_DIRTY_TIME, the time was already filled into on-disk inode and will not get updated until the next I_DIRTY_INODE update, which might never come if we crash or get a power failure. The problem can be reproduced on ext4 by running xfstest generic/622 with -o iversion mount option. Fix it by allowing I_DIRTY_TIME to be set even if the inode already has I_DIRTY_INODE. Also make sure that the case is properly handled in writeback_single_inode() as well. Additionally changes in xfs_fs_dirty_inode() was made to accommodate for I_DIRTY_TIME in flag. Thanks Jan Kara for suggestions on how to make this work properly. Cc: Dave Chinner <david@fromorbit.com> Cc: Christoph Hellwig <hch@infradead.org> Cc: stable@kernel.org Signed-off-by: Lukas Czerner <lczerner@redhat.com> Suggested-by: Jan Kara <jack@suse.cz> Reviewed-by: Jan Kara <jack@suse.cz> Link: https://lore.kernel.org/r/20220825100657.44217-1-lczerner@redhat.com Signed-off-by: Theodore Ts'o <tytso@mit.edu>
This commit is contained in:
parent
50f094a558
commit
cbfecb927f
@ -274,6 +274,9 @@ or bottom half).
|
||||
This is specifically for the inode itself being marked dirty,
|
||||
not its data. If the update needs to be persisted by fdatasync(),
|
||||
then I_DIRTY_DATASYNC will be set in the flags argument.
|
||||
I_DIRTY_TIME will be set in the flags in case lazytime is enabled
|
||||
and struct inode has times updated since the last ->dirty_inode
|
||||
call.
|
||||
|
||||
``write_inode``
|
||||
this method is called when the VFS needs to write an inode to
|
||||
|
@ -1718,9 +1718,14 @@ static int writeback_single_inode(struct inode *inode,
|
||||
*/
|
||||
if (!(inode->i_state & I_DIRTY_ALL))
|
||||
inode_cgwb_move_to_attached(inode, wb);
|
||||
else if (!(inode->i_state & I_SYNC_QUEUED) &&
|
||||
(inode->i_state & I_DIRTY))
|
||||
redirty_tail_locked(inode, wb);
|
||||
else if (!(inode->i_state & I_SYNC_QUEUED)) {
|
||||
if ((inode->i_state & I_DIRTY))
|
||||
redirty_tail_locked(inode, wb);
|
||||
else if (inode->i_state & I_DIRTY_TIME) {
|
||||
inode->dirtied_when = jiffies;
|
||||
inode_io_list_move_locked(inode, wb, &wb->b_dirty_time);
|
||||
}
|
||||
}
|
||||
|
||||
spin_unlock(&wb->list_lock);
|
||||
inode_sync_complete(inode);
|
||||
@ -2369,6 +2374,20 @@ void __mark_inode_dirty(struct inode *inode, int flags)
|
||||
trace_writeback_mark_inode_dirty(inode, flags);
|
||||
|
||||
if (flags & I_DIRTY_INODE) {
|
||||
/*
|
||||
* Inode timestamp update will piggback on this dirtying.
|
||||
* We tell ->dirty_inode callback that timestamps need to
|
||||
* be updated by setting I_DIRTY_TIME in flags.
|
||||
*/
|
||||
if (inode->i_state & I_DIRTY_TIME) {
|
||||
spin_lock(&inode->i_lock);
|
||||
if (inode->i_state & I_DIRTY_TIME) {
|
||||
inode->i_state &= ~I_DIRTY_TIME;
|
||||
flags |= I_DIRTY_TIME;
|
||||
}
|
||||
spin_unlock(&inode->i_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Notify the filesystem about the inode being dirtied, so that
|
||||
* (if needed) it can update on-disk fields and journal the
|
||||
@ -2378,7 +2397,8 @@ void __mark_inode_dirty(struct inode *inode, int flags)
|
||||
*/
|
||||
trace_writeback_dirty_inode_start(inode, flags);
|
||||
if (sb->s_op->dirty_inode)
|
||||
sb->s_op->dirty_inode(inode, flags & I_DIRTY_INODE);
|
||||
sb->s_op->dirty_inode(inode,
|
||||
flags & (I_DIRTY_INODE | I_DIRTY_TIME));
|
||||
trace_writeback_dirty_inode(inode, flags);
|
||||
|
||||
/* I_DIRTY_INODE supersedes I_DIRTY_TIME. */
|
||||
@ -2399,21 +2419,15 @@ void __mark_inode_dirty(struct inode *inode, int flags)
|
||||
*/
|
||||
smp_mb();
|
||||
|
||||
if (((inode->i_state & flags) == flags) ||
|
||||
(dirtytime && (inode->i_state & I_DIRTY_INODE)))
|
||||
if ((inode->i_state & flags) == flags)
|
||||
return;
|
||||
|
||||
spin_lock(&inode->i_lock);
|
||||
if (dirtytime && (inode->i_state & I_DIRTY_INODE))
|
||||
goto out_unlock_inode;
|
||||
if ((inode->i_state & flags) != flags) {
|
||||
const int was_dirty = inode->i_state & I_DIRTY;
|
||||
|
||||
inode_attach_wb(inode, NULL);
|
||||
|
||||
/* I_DIRTY_INODE supersedes I_DIRTY_TIME. */
|
||||
if (flags & I_DIRTY_INODE)
|
||||
inode->i_state &= ~I_DIRTY_TIME;
|
||||
inode->i_state |= flags;
|
||||
|
||||
/*
|
||||
@ -2486,7 +2500,6 @@ void __mark_inode_dirty(struct inode *inode, int flags)
|
||||
out_unlock:
|
||||
if (wb)
|
||||
spin_unlock(&wb->list_lock);
|
||||
out_unlock_inode:
|
||||
spin_unlock(&inode->i_lock);
|
||||
}
|
||||
EXPORT_SYMBOL(__mark_inode_dirty);
|
||||
|
@ -653,7 +653,7 @@ xfs_fs_destroy_inode(
|
||||
static void
|
||||
xfs_fs_dirty_inode(
|
||||
struct inode *inode,
|
||||
int flag)
|
||||
int flags)
|
||||
{
|
||||
struct xfs_inode *ip = XFS_I(inode);
|
||||
struct xfs_mount *mp = ip->i_mount;
|
||||
@ -661,7 +661,13 @@ xfs_fs_dirty_inode(
|
||||
|
||||
if (!(inode->i_sb->s_flags & SB_LAZYTIME))
|
||||
return;
|
||||
if (flag != I_DIRTY_SYNC || !(inode->i_state & I_DIRTY_TIME))
|
||||
|
||||
/*
|
||||
* Only do the timestamp update if the inode is dirty (I_DIRTY_SYNC)
|
||||
* and has dirty timestamp (I_DIRTY_TIME). I_DIRTY_TIME can be passed
|
||||
* in flags possibly together with I_DIRTY_SYNC.
|
||||
*/
|
||||
if ((flags & ~I_DIRTY_TIME) != I_DIRTY_SYNC || !(flags & I_DIRTY_TIME))
|
||||
return;
|
||||
|
||||
if (xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp))
|
||||
|
@ -2371,13 +2371,14 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src,
|
||||
* don't have to write inode on fdatasync() when only
|
||||
* e.g. the timestamps have changed.
|
||||
* I_DIRTY_PAGES Inode has dirty pages. Inode itself may be clean.
|
||||
* I_DIRTY_TIME The inode itself only has dirty timestamps, and the
|
||||
* I_DIRTY_TIME The inode itself has dirty timestamps, and the
|
||||
* lazytime mount option is enabled. We keep track of this
|
||||
* separately from I_DIRTY_SYNC in order to implement
|
||||
* lazytime. This gets cleared if I_DIRTY_INODE
|
||||
* (I_DIRTY_SYNC and/or I_DIRTY_DATASYNC) gets set. I.e.
|
||||
* either I_DIRTY_TIME *or* I_DIRTY_INODE can be set in
|
||||
* i_state, but not both. I_DIRTY_PAGES may still be set.
|
||||
* (I_DIRTY_SYNC and/or I_DIRTY_DATASYNC) gets set. But
|
||||
* I_DIRTY_TIME can still be set if I_DIRTY_SYNC is already
|
||||
* in place because writeback might already be in progress
|
||||
* and we don't want to lose the time update
|
||||
* I_NEW Serves as both a mutex and completion notification.
|
||||
* New inodes set I_NEW. If two processes both create
|
||||
* the same inode, one of them will release its inode and
|
||||
|
Loading…
Reference in New Issue
Block a user