cifs: move cifsFileInfo_put logic into a work-queue
This patch moves the final part of the cifsFileInfo_put() logic where we need a write lock on lock_sem to be processed in a separate thread that holds no other locks. This is to prevent deadlocks like the one below: > there are 6 processes looping to while trying to down_write > cinode->lock_sem, 5 of them from _cifsFileInfo_put, and one from > cifs_new_fileinfo > > and there are 5 other processes which are blocked, several of them > waiting on either PG_writeback or PG_locked (which are both set), all > for the same page of the file > > 2 inode_lock() (inode->i_rwsem) for the file > 1 wait_on_page_writeback() for the page > 1 down_read(inode->i_rwsem) for the inode of the directory > 1 inode_lock()(inode->i_rwsem) for the inode of the directory > 1 __lock_page > > > so processes are blocked waiting on: > page flags PG_locked and PG_writeback for one specific page > inode->i_rwsem for the directory > inode->i_rwsem for the file > cifsInodeInflock_sem > > > > here are the more gory details (let me know if I need to provide > anything more/better): > > [0 00:48:22.765] [UN] PID: 8863 TASK: ffff8c691547c5c0 CPU: 3 > COMMAND: "reopen_file" > #0 [ffff9965007e3ba8] __schedule at ffffffff9b6e6095 > #1 [ffff9965007e3c38] schedule at ffffffff9b6e64df > #2 [ffff9965007e3c48] rwsem_down_write_slowpath at ffffffff9af283d7 > #3 [ffff9965007e3cb8] legitimize_path at ffffffff9b0f975d > #4 [ffff9965007e3d08] path_openat at ffffffff9b0fe55d > #5 [ffff9965007e3dd8] do_filp_open at ffffffff9b100a33 > #6 [ffff9965007e3ee0] do_sys_open at ffffffff9b0eb2d6 > #7 [ffff9965007e3f38] do_syscall_64 at ffffffff9ae04315 > * (I think legitimize_path is bogus) > > in path_openat > } else { > const char *s = path_init(nd, flags); > while (!(error = link_path_walk(s, nd)) && > (error = do_last(nd, file, op)) > 0) { <<<< > > do_last: > if (open_flag & O_CREAT) > inode_lock(dir->d_inode); <<<< > else > so it's trying to take inode->i_rwsem for the directory > > DENTRY INODE SUPERBLK TYPE PATH > ffff8c68bb8e79c0 ffff8c691158ef20 ffff8c6915bf9000 DIR /mnt/vm1_smb/ > inode.i_rwsem is ffff8c691158efc0 > > <struct rw_semaphore 0xffff8c691158efc0>: > owner: <struct task_struct 0xffff8c6914275d00> (UN - 8856 - > reopen_file), counter: 0x0000000000000003 > waitlist: 2 > 0xffff9965007e3c90 8863 reopen_file UN 0 1:29:22.926 > RWSEM_WAITING_FOR_WRITE > 0xffff996500393e00 9802 ls UN 0 1:17:26.700 > RWSEM_WAITING_FOR_READ > > > the owner of the inode.i_rwsem of the directory is: > > [0 00:00:00.109] [UN] PID: 8856 TASK: ffff8c6914275d00 CPU: 3 > COMMAND: "reopen_file" > #0 [ffff99650065b828] __schedule at ffffffff9b6e6095 > #1 [ffff99650065b8b8] schedule at ffffffff9b6e64df > #2 [ffff99650065b8c8] schedule_timeout at ffffffff9b6e9f89 > #3 [ffff99650065b940] msleep at ffffffff9af573a9 > #4 [ffff99650065b948] _cifsFileInfo_put.cold.63 at ffffffffc0a42dd6 [cifs] > #5 [ffff99650065ba38] cifs_writepage_locked at ffffffffc0a0b8f3 [cifs] > #6 [ffff99650065bab0] cifs_launder_page at ffffffffc0a0bb72 [cifs] > #7 [ffff99650065bb30] invalidate_inode_pages2_range at ffffffff9b04d4bd > #8 [ffff99650065bcb8] cifs_invalidate_mapping at ffffffffc0a11339 [cifs] > #9 [ffff99650065bcd0] cifs_revalidate_mapping at ffffffffc0a1139a [cifs] > #10 [ffff99650065bcf0] cifs_d_revalidate at ffffffffc0a014f6 [cifs] > #11 [ffff99650065bd08] path_openat at ffffffff9b0fe7f7 > #12 [ffff99650065bdd8] do_filp_open at ffffffff9b100a33 > #13 [ffff99650065bee0] do_sys_open at ffffffff9b0eb2d6 > #14 [ffff99650065bf38] do_syscall_64 at ffffffff9ae04315 > > cifs_launder_page is for page 0xffffd1e2c07d2480 > > crash> page.index,mapping,flags 0xffffd1e2c07d2480 > index = 0x8 > mapping = 0xffff8c68f3cd0db0 > flags = 0xfffffc0008095 > > PAGE-FLAG BIT VALUE > PG_locked 0 0000001 > PG_uptodate 2 0000004 > PG_lru 4 0000010 > PG_waiters 7 0000080 > PG_writeback 15 0008000 > > > inode is ffff8c68f3cd0c40 > inode.i_rwsem is ffff8c68f3cd0ce0 > DENTRY INODE SUPERBLK TYPE PATH > ffff8c68a1f1b480 ffff8c68f3cd0c40 ffff8c6915bf9000 REG > /mnt/vm1_smb/testfile.8853 > > > this process holds the inode->i_rwsem for the parent directory, is > laundering a page attached to the inode of the file it's opening, and in > _cifsFileInfo_put is trying to down_write the cifsInodeInflock_sem > for the file itself. > > > <struct rw_semaphore 0xffff8c68f3cd0ce0>: > owner: <struct task_struct 0xffff8c6914272e80> (UN - 8854 - > reopen_file), counter: 0x0000000000000003 > waitlist: 1 > 0xffff9965005dfd80 8855 reopen_file UN 0 1:29:22.912 > RWSEM_WAITING_FOR_WRITE > > this is the inode.i_rwsem for the file > > the owner: > > [0 00:48:22.739] [UN] PID: 8854 TASK: ffff8c6914272e80 CPU: 2 > COMMAND: "reopen_file" > #0 [ffff99650054fb38] __schedule at ffffffff9b6e6095 > #1 [ffff99650054fbc8] schedule at ffffffff9b6e64df > #2 [ffff99650054fbd8] io_schedule at ffffffff9b6e68e2 > #3 [ffff99650054fbe8] __lock_page at ffffffff9b03c56f > #4 [ffff99650054fc80] pagecache_get_page at ffffffff9b03dcdf > #5 [ffff99650054fcc0] grab_cache_page_write_begin at ffffffff9b03ef4c > #6 [ffff99650054fcd0] cifs_write_begin at ffffffffc0a064ec [cifs] > #7 [ffff99650054fd30] generic_perform_write at ffffffff9b03bba4 > #8 [ffff99650054fda8] __generic_file_write_iter at ffffffff9b04060a > #9 [ffff99650054fdf0] cifs_strict_writev.cold.70 at ffffffffc0a4469b [cifs] > #10 [ffff99650054fe48] new_sync_write at ffffffff9b0ec1dd > #11 [ffff99650054fed0] vfs_write at ffffffff9b0eed35 > #12 [ffff99650054ff00] ksys_write at ffffffff9b0eefd9 > #13 [ffff99650054ff38] do_syscall_64 at ffffffff9ae04315 > > the process holds the inode->i_rwsem for the file to which it's writing, > and is trying to __lock_page for the same page as in the other processes > > > the other tasks: > [0 00:00:00.028] [UN] PID: 8859 TASK: ffff8c6915479740 CPU: 2 > COMMAND: "reopen_file" > #0 [ffff9965007b39d8] __schedule at ffffffff9b6e6095 > #1 [ffff9965007b3a68] schedule at ffffffff9b6e64df > #2 [ffff9965007b3a78] schedule_timeout at ffffffff9b6e9f89 > #3 [ffff9965007b3af0] msleep at ffffffff9af573a9 > #4 [ffff9965007b3af8] cifs_new_fileinfo.cold.61 at ffffffffc0a42a07 [cifs] > #5 [ffff9965007b3b78] cifs_open at ffffffffc0a0709d [cifs] > #6 [ffff9965007b3cd8] do_dentry_open at ffffffff9b0e9b7a > #7 [ffff9965007b3d08] path_openat at ffffffff9b0fe34f > #8 [ffff9965007b3dd8] do_filp_open at ffffffff9b100a33 > #9 [ffff9965007b3ee0] do_sys_open at ffffffff9b0eb2d6 > #10 [ffff9965007b3f38] do_syscall_64 at ffffffff9ae04315 > > this is opening the file, and is trying to down_write cinode->lock_sem > > > [0 00:00:00.041] [UN] PID: 8860 TASK: ffff8c691547ae80 CPU: 2 > COMMAND: "reopen_file" > [0 00:00:00.057] [UN] PID: 8861 TASK: ffff8c6915478000 CPU: 3 > COMMAND: "reopen_file" > [0 00:00:00.059] [UN] PID: 8858 TASK: ffff8c6914271740 CPU: 2 > COMMAND: "reopen_file" > [0 00:00:00.109] [UN] PID: 8862 TASK: ffff8c691547dd00 CPU: 6 > COMMAND: "reopen_file" > #0 [ffff9965007c3c78] __schedule at ffffffff9b6e6095 > #1 [ffff9965007c3d08] schedule at ffffffff9b6e64df > #2 [ffff9965007c3d18] schedule_timeout at ffffffff9b6e9f89 > #3 [ffff9965007c3d90] msleep at ffffffff9af573a9 > #4 [ffff9965007c3d98] _cifsFileInfo_put.cold.63 at ffffffffc0a42dd6 [cifs] > #5 [ffff9965007c3e88] cifs_close at ffffffffc0a07aaf [cifs] > #6 [ffff9965007c3ea0] __fput at ffffffff9b0efa6e > #7 [ffff9965007c3ee8] task_work_run at ffffffff9aef1614 > #8 [ffff9965007c3f20] exit_to_usermode_loop at ffffffff9ae03d6f > #9 [ffff9965007c3f38] do_syscall_64 at ffffffff9ae0444c > > closing the file, and trying to down_write cifsi->lock_sem > > > [0 00:48:22.839] [UN] PID: 8857 TASK: ffff8c6914270000 CPU: 7 > COMMAND: "reopen_file" > #0 [ffff9965006a7cc8] __schedule at ffffffff9b6e6095 > #1 [ffff9965006a7d58] schedule at ffffffff9b6e64df > #2 [ffff9965006a7d68] io_schedule at ffffffff9b6e68e2 > #3 [ffff9965006a7d78] wait_on_page_bit at ffffffff9b03cac6 > #4 [ffff9965006a7e10] __filemap_fdatawait_range at ffffffff9b03b028 > #5 [ffff9965006a7ed8] filemap_write_and_wait at ffffffff9b040165 > #6 [ffff9965006a7ef0] cifs_flush at ffffffffc0a0c2fa [cifs] > #7 [ffff9965006a7f10] filp_close at ffffffff9b0e93f1 > #8 [ffff9965006a7f30] __x64_sys_close at ffffffff9b0e9a0e > #9 [ffff9965006a7f38] do_syscall_64 at ffffffff9ae04315 > > in __filemap_fdatawait_range > wait_on_page_writeback(page); > for the same page of the file > > > > [0 00:48:22.718] [UN] PID: 8855 TASK: ffff8c69142745c0 CPU: 7 > COMMAND: "reopen_file" > #0 [ffff9965005dfc98] __schedule at ffffffff9b6e6095 > #1 [ffff9965005dfd28] schedule at ffffffff9b6e64df > #2 [ffff9965005dfd38] rwsem_down_write_slowpath at ffffffff9af283d7 > #3 [ffff9965005dfdf0] cifs_strict_writev at ffffffffc0a0c40a [cifs] > #4 [ffff9965005dfe48] new_sync_write at ffffffff9b0ec1dd > #5 [ffff9965005dfed0] vfs_write at ffffffff9b0eed35 > #6 [ffff9965005dff00] ksys_write at ffffffff9b0eefd9 > #7 [ffff9965005dff38] do_syscall_64 at ffffffff9ae04315 > > inode_lock(inode); > > > and one 'ls' later on, to see whether the rest of the mount is available > (the test file is in the root, so we get blocked up on the directory > ->i_rwsem), so the entire mount is unavailable > > [0 00:36:26.473] [UN] PID: 9802 TASK: ffff8c691436ae80 CPU: 4 > COMMAND: "ls" > #0 [ffff996500393d28] __schedule at ffffffff9b6e6095 > #1 [ffff996500393db8] schedule at ffffffff9b6e64df > #2 [ffff996500393dc8] rwsem_down_read_slowpath at ffffffff9b6e9421 > #3 [ffff996500393e78] down_read_killable at ffffffff9b6e95e2 > #4 [ffff996500393e88] iterate_dir at ffffffff9b103c56 > #5 [ffff996500393ec8] ksys_getdents64 at ffffffff9b104b0c > #6 [ffff996500393f30] __x64_sys_getdents64 at ffffffff9b104bb6 > #7 [ffff996500393f38] do_syscall_64 at ffffffff9ae04315 > > in iterate_dir: > if (shared) > res = down_read_killable(&inode->i_rwsem); <<<< > else > res = down_write_killable(&inode->i_rwsem); > Reported-by: Frank Sorenson <sorenson@redhat.com> Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com> Signed-off-by: Ronnie Sahlberg <lsahlber@redhat.com> Signed-off-by: Steve French <stfrench@microsoft.com>
This commit is contained in:
parent
d70e9fa558
commit
32546a9586
@ -119,6 +119,7 @@ extern mempool_t *cifs_mid_poolp;
|
||||
|
||||
struct workqueue_struct *cifsiod_wq;
|
||||
struct workqueue_struct *decrypt_wq;
|
||||
struct workqueue_struct *fileinfo_put_wq;
|
||||
struct workqueue_struct *cifsoplockd_wq;
|
||||
__u32 cifs_lock_secret;
|
||||
|
||||
@ -1561,11 +1562,18 @@ init_cifs(void)
|
||||
goto out_destroy_cifsiod_wq;
|
||||
}
|
||||
|
||||
fileinfo_put_wq = alloc_workqueue("cifsfileinfoput",
|
||||
WQ_UNBOUND|WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
|
||||
if (!fileinfo_put_wq) {
|
||||
rc = -ENOMEM;
|
||||
goto out_destroy_decrypt_wq;
|
||||
}
|
||||
|
||||
cifsoplockd_wq = alloc_workqueue("cifsoplockd",
|
||||
WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
|
||||
if (!cifsoplockd_wq) {
|
||||
rc = -ENOMEM;
|
||||
goto out_destroy_decrypt_wq;
|
||||
goto out_destroy_fileinfo_put_wq;
|
||||
}
|
||||
|
||||
rc = cifs_fscache_register();
|
||||
@ -1631,6 +1639,8 @@ out_unreg_fscache:
|
||||
cifs_fscache_unregister();
|
||||
out_destroy_cifsoplockd_wq:
|
||||
destroy_workqueue(cifsoplockd_wq);
|
||||
out_destroy_fileinfo_put_wq:
|
||||
destroy_workqueue(fileinfo_put_wq);
|
||||
out_destroy_decrypt_wq:
|
||||
destroy_workqueue(decrypt_wq);
|
||||
out_destroy_cifsiod_wq:
|
||||
@ -1660,6 +1670,7 @@ exit_cifs(void)
|
||||
cifs_fscache_unregister();
|
||||
destroy_workqueue(cifsoplockd_wq);
|
||||
destroy_workqueue(decrypt_wq);
|
||||
destroy_workqueue(fileinfo_put_wq);
|
||||
destroy_workqueue(cifsiod_wq);
|
||||
cifs_proc_clean();
|
||||
}
|
||||
|
@ -1308,6 +1308,7 @@ struct cifsFileInfo {
|
||||
struct mutex fh_mutex; /* prevents reopen race after dead ses*/
|
||||
struct cifs_search_info srch_inf;
|
||||
struct work_struct oplock_break; /* work for oplock breaks */
|
||||
struct work_struct put; /* work for the final part of _put */
|
||||
};
|
||||
|
||||
struct cifs_io_parms {
|
||||
@ -1413,7 +1414,8 @@ cifsFileInfo_get_locked(struct cifsFileInfo *cifs_file)
|
||||
}
|
||||
|
||||
struct cifsFileInfo *cifsFileInfo_get(struct cifsFileInfo *cifs_file);
|
||||
void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, bool wait_oplock_hdlr);
|
||||
void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, bool wait_oplock_hdlr,
|
||||
bool offload);
|
||||
void cifsFileInfo_put(struct cifsFileInfo *cifs_file);
|
||||
|
||||
#define CIFS_CACHE_READ_FLG 1
|
||||
@ -1953,6 +1955,7 @@ void cifs_queue_oplock_break(struct cifsFileInfo *cfile);
|
||||
extern const struct slow_work_ops cifs_oplock_break_ops;
|
||||
extern struct workqueue_struct *cifsiod_wq;
|
||||
extern struct workqueue_struct *decrypt_wq;
|
||||
extern struct workqueue_struct *fileinfo_put_wq;
|
||||
extern struct workqueue_struct *cifsoplockd_wq;
|
||||
extern __u32 cifs_lock_secret;
|
||||
|
||||
|
@ -288,6 +288,8 @@ cifs_down_write(struct rw_semaphore *sem)
|
||||
msleep(10);
|
||||
}
|
||||
|
||||
static void cifsFileInfo_put_work(struct work_struct *work);
|
||||
|
||||
struct cifsFileInfo *
|
||||
cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
|
||||
struct tcon_link *tlink, __u32 oplock)
|
||||
@ -325,6 +327,7 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
|
||||
cfile->invalidHandle = false;
|
||||
cfile->tlink = cifs_get_tlink(tlink);
|
||||
INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
|
||||
INIT_WORK(&cfile->put, cifsFileInfo_put_work);
|
||||
mutex_init(&cfile->fh_mutex);
|
||||
spin_lock_init(&cfile->file_info_lock);
|
||||
|
||||
@ -375,6 +378,41 @@ cifsFileInfo_get(struct cifsFileInfo *cifs_file)
|
||||
return cifs_file;
|
||||
}
|
||||
|
||||
static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
|
||||
{
|
||||
struct inode *inode = d_inode(cifs_file->dentry);
|
||||
struct cifsInodeInfo *cifsi = CIFS_I(inode);
|
||||
struct cifsLockInfo *li, *tmp;
|
||||
struct super_block *sb = inode->i_sb;
|
||||
|
||||
/*
|
||||
* Delete any outstanding lock records. We'll lose them when the file
|
||||
* is closed anyway.
|
||||
*/
|
||||
cifs_down_write(&cifsi->lock_sem);
|
||||
list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
|
||||
list_del(&li->llist);
|
||||
cifs_del_lock_waiters(li);
|
||||
kfree(li);
|
||||
}
|
||||
list_del(&cifs_file->llist->llist);
|
||||
kfree(cifs_file->llist);
|
||||
up_write(&cifsi->lock_sem);
|
||||
|
||||
cifs_put_tlink(cifs_file->tlink);
|
||||
dput(cifs_file->dentry);
|
||||
cifs_sb_deactive(sb);
|
||||
kfree(cifs_file);
|
||||
}
|
||||
|
||||
static void cifsFileInfo_put_work(struct work_struct *work)
|
||||
{
|
||||
struct cifsFileInfo *cifs_file = container_of(work,
|
||||
struct cifsFileInfo, put);
|
||||
|
||||
cifsFileInfo_put_final(cifs_file);
|
||||
}
|
||||
|
||||
/**
|
||||
* cifsFileInfo_put - release a reference of file priv data
|
||||
*
|
||||
@ -382,15 +420,15 @@ cifsFileInfo_get(struct cifsFileInfo *cifs_file)
|
||||
*/
|
||||
void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
|
||||
{
|
||||
_cifsFileInfo_put(cifs_file, true);
|
||||
_cifsFileInfo_put(cifs_file, true, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* _cifsFileInfo_put - release a reference of file priv data
|
||||
*
|
||||
* This may involve closing the filehandle @cifs_file out on the
|
||||
* server. Must be called without holding tcon->open_file_lock and
|
||||
* cifs_file->file_info_lock.
|
||||
* server. Must be called without holding tcon->open_file_lock,
|
||||
* cinode->open_file_lock and cifs_file->file_info_lock.
|
||||
*
|
||||
* If @wait_for_oplock_handler is true and we are releasing the last
|
||||
* reference, wait for any running oplock break handler of the file
|
||||
@ -398,7 +436,8 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
|
||||
* oplock break handler, you need to pass false.
|
||||
*
|
||||
*/
|
||||
void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, bool wait_oplock_handler)
|
||||
void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
|
||||
bool wait_oplock_handler, bool offload)
|
||||
{
|
||||
struct inode *inode = d_inode(cifs_file->dentry);
|
||||
struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
|
||||
@ -406,7 +445,6 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, bool wait_oplock_handler)
|
||||
struct cifsInodeInfo *cifsi = CIFS_I(inode);
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
|
||||
struct cifsLockInfo *li, *tmp;
|
||||
struct cifs_fid fid;
|
||||
struct cifs_pending_open open;
|
||||
bool oplock_break_cancelled;
|
||||
@ -467,24 +505,10 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, bool wait_oplock_handler)
|
||||
|
||||
cifs_del_pending_open(&open);
|
||||
|
||||
/*
|
||||
* Delete any outstanding lock records. We'll lose them when the file
|
||||
* is closed anyway.
|
||||
*/
|
||||
cifs_down_write(&cifsi->lock_sem);
|
||||
list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
|
||||
list_del(&li->llist);
|
||||
cifs_del_lock_waiters(li);
|
||||
kfree(li);
|
||||
}
|
||||
list_del(&cifs_file->llist->llist);
|
||||
kfree(cifs_file->llist);
|
||||
up_write(&cifsi->lock_sem);
|
||||
|
||||
cifs_put_tlink(cifs_file->tlink);
|
||||
dput(cifs_file->dentry);
|
||||
cifs_sb_deactive(sb);
|
||||
kfree(cifs_file);
|
||||
if (offload)
|
||||
queue_work(fileinfo_put_wq, &cifs_file->put);
|
||||
else
|
||||
cifsFileInfo_put_final(cifs_file);
|
||||
}
|
||||
|
||||
int cifs_open(struct inode *inode, struct file *file)
|
||||
@ -815,7 +839,7 @@ reopen_error_exit:
|
||||
int cifs_close(struct inode *inode, struct file *file)
|
||||
{
|
||||
if (file->private_data != NULL) {
|
||||
cifsFileInfo_put(file->private_data);
|
||||
_cifsFileInfo_put(file->private_data, true, false);
|
||||
file->private_data = NULL;
|
||||
}
|
||||
|
||||
@ -4749,7 +4773,7 @@ void cifs_oplock_break(struct work_struct *work)
|
||||
cinode);
|
||||
cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
|
||||
}
|
||||
_cifsFileInfo_put(cfile, false /* do not wait for ourself */);
|
||||
_cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
|
||||
cifs_done_oplock_break(cinode);
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user