2005-04-17 02:20:36 +04:00
/*
* fs / cifs / cifsfs . c
*
2008-05-17 07:12:45 +04:00
* Copyright ( C ) International Business Machines Corp . , 2002 , 2008
2005-04-17 02:20:36 +04:00
* Author ( s ) : Steve French ( sfrench @ us . ibm . com )
*
* Common Internet FileSystem ( CIFS ) client
*
* This library is free software ; you can redistribute it and / or modify
* it under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation ; either version 2.1 of the License , or
* ( at your option ) any later version .
*
* This library is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See
* the GNU Lesser General Public License for more details .
*
* You should have received a copy of the GNU Lesser General Public License
* along with this library ; if not , write to the Free Software
* Foundation , Inc . , 59 Temple Place , Suite 330 , Boston , MA 02111 - 1307 USA
*/
/* Note that BB means BUGBUG (ie something to fix eventually) */
# include <linux/module.h>
# include <linux/fs.h>
# include <linux/mount.h>
# include <linux/slab.h>
# include <linux/init.h>
# include <linux/list.h>
# include <linux/seq_file.h>
# include <linux/vfs.h>
# include <linux/mempool.h>
2005-11-30 07:55:11 +03:00
# include <linux/delay.h>
2006-04-22 02:52:25 +04:00
# include <linux/kthread.h>
2006-12-07 07:34:23 +03:00
# include <linux/freezer.h>
2011-07-18 21:50:40 +04:00
# include <linux/namei.h>
2012-09-19 17:22:44 +04:00
# include <linux/random.h>
2017-03-29 00:45:06 +03:00
# include <linux/uuid.h>
2016-04-22 13:11:38 +03:00
# include <linux/xattr.h>
2010-09-02 04:06:02 +04:00
# include <net/ipv6.h>
2005-04-17 02:20:36 +04:00
# include "cifsfs.h"
# include "cifspdu.h"
# define DECLARE_GLOBALS_HERE
# include "cifsglob.h"
# include "cifsproto.h"
# include "cifs_debug.h"
# include "cifs_fs_sb.h"
# include <linux/mm.h>
2007-11-03 08:02:24 +03:00
# include <linux/key-type.h>
2007-11-03 08:11:06 +03:00
# include "cifs_spnego.h"
2010-07-05 16:41:50 +04:00
# include "fscache.h"
2012-01-12 22:40:50 +04:00
# include "smb2pdu.h"
2018-11-14 21:24:03 +03:00
# ifdef CONFIG_CIFS_DFS_UPCALL
# include "dfs_cache.h"
# endif
2005-04-17 02:20:36 +04:00
2019-03-23 00:32:35 +03:00
/*
* DOS dates from 1980 / 1 / 1 through 2107 / 12 / 31
* Protocol specifications indicate the range should be to 119 , which
* limits maximum year to 2099. But this range has not been checked .
*/
# define SMB_DATE_MAX (127<<9 | 12<<5 | 31)
# define SMB_DATE_MIN (0<<9 | 1<<5 | 1)
# define SMB_TIME_MAX (23<<11 | 59<<5 | 29)
2005-04-17 02:20:36 +04:00
int cifsFYI = 0 ;
2016-03-18 00:22:54 +03:00
bool traceSMB ;
2011-10-13 02:47:03 +04:00
bool enable_oplocks = true ;
2016-03-18 00:22:54 +03:00
bool linuxExtEnabled = true ;
bool lookupCacheEnabled = true ;
2018-05-24 12:11:07 +03:00
bool disable_legacy_dialects ; /* false by default */
2010-04-24 15:57:45 +04:00
unsigned int global_secflags = CIFSSEC_DEF ;
2006-06-01 02:40:51 +04:00
/* unsigned int ntlmv2_support = 0; */
2005-04-17 02:20:36 +04:00
unsigned int sign_CIFS_PDUs = 1 ;
2007-02-12 11:55:41 +03:00
static const struct super_operations cifs_super_ops ;
2005-04-17 02:20:36 +04:00
unsigned int CIFSMaxBufSize = CIFS_MAX_MSGSIZE ;
2016-10-01 05:25:24 +03:00
module_param ( CIFSMaxBufSize , uint , 0444 ) ;
2018-05-24 10:09:20 +03:00
MODULE_PARM_DESC ( CIFSMaxBufSize , " Network buffer size (not including header) "
" for CIFS requests. "
2007-07-17 21:34:02 +04:00
" Default: 16384 Range: 8192 to 130048 " ) ;
2005-04-17 02:20:36 +04:00
unsigned int cifs_min_rcv = CIFS_MIN_RCV_POOL ;
2016-10-01 05:25:24 +03:00
module_param ( cifs_min_rcv , uint , 0444 ) ;
2007-07-17 21:34:02 +04:00
MODULE_PARM_DESC ( cifs_min_rcv , " Network buffers in pool. Default: 4 Range: "
" 1 to 64 " ) ;
2005-04-17 02:20:36 +04:00
unsigned int cifs_min_small = 30 ;
2016-10-01 05:25:24 +03:00
module_param ( cifs_min_small , uint , 0444 ) ;
2007-07-17 21:34:02 +04:00
MODULE_PARM_DESC ( cifs_min_small , " Small network buffers in pool. Default: 30 "
" Range: 2 to 256 " ) ;
2005-04-17 02:20:36 +04:00
unsigned int cifs_max_pending = CIFS_MAX_REQ ;
2012-11-25 17:00:34 +04:00
module_param ( cifs_max_pending , uint , 0444 ) ;
2018-05-24 10:09:20 +03:00
MODULE_PARM_DESC ( cifs_max_pending , " Simultaneous requests to server for "
" CIFS/SMB1 dialect (N/A for SMB3) "
2012-03-20 13:55:09 +04:00
" Default: 32767 Range: 2 to 32767. " ) ;
2018-09-18 22:05:18 +03:00
# ifdef CONFIG_CIFS_STATS2
unsigned int slow_rsp_threshold = 1 ;
module_param ( slow_rsp_threshold , uint , 0644 ) ;
MODULE_PARM_DESC ( slow_rsp_threshold , " Amount of time (in seconds) to wait "
" before logging that a response is delayed. "
" Default: 1 (if set to 0 disables msg). " ) ;
# endif /* STATS2 */
2011-10-13 02:47:03 +04:00
module_param ( enable_oplocks , bool , 0644 ) ;
2012-11-25 17:00:34 +04:00
MODULE_PARM_DESC ( enable_oplocks , " Enable or disable oplocks. Default: y/Y/1 " ) ;
2011-10-13 02:47:03 +04:00
2018-05-24 12:11:07 +03:00
module_param ( disable_legacy_dialects , bool , 0644 ) ;
MODULE_PARM_DESC ( disable_legacy_dialects , " To improve security it may be "
" helpful to restrict the ability to "
" override the default dialects (SMB2.1, "
" SMB3 and SMB3.02) on mount with old "
" dialects (CIFS/SMB1 and SMB2) since "
" vers=1.0 (CIFS/SMB1) and vers=2.0 are weaker "
" and less secure. Default: n/N/0 " ) ;
2005-04-17 02:20:36 +04:00
extern mempool_t * cifs_sm_req_poolp ;
extern mempool_t * cifs_req_poolp ;
extern mempool_t * cifs_mid_poolp ;
2012-03-23 22:40:53 +04:00
struct workqueue_struct * cifsiod_wq ;
2019-09-07 09:09:49 +03:00
struct workqueue_struct * decrypt_wq ;
cifs: move cifsFileInfo_put logic into a work-queue
This patch moves the final part of the cifsFileInfo_put() logic where we
need a write lock on lock_sem to be processed in a separate thread that
holds no other locks.
This is to prevent deadlocks like the one below:
> there are 6 processes looping to while trying to down_write
> cinode->lock_sem, 5 of them from _cifsFileInfo_put, and one from
> cifs_new_fileinfo
>
> and there are 5 other processes which are blocked, several of them
> waiting on either PG_writeback or PG_locked (which are both set), all
> for the same page of the file
>
> 2 inode_lock() (inode->i_rwsem) for the file
> 1 wait_on_page_writeback() for the page
> 1 down_read(inode->i_rwsem) for the inode of the directory
> 1 inode_lock()(inode->i_rwsem) for the inode of the directory
> 1 __lock_page
>
>
> so processes are blocked waiting on:
> page flags PG_locked and PG_writeback for one specific page
> inode->i_rwsem for the directory
> inode->i_rwsem for the file
> cifsInodeInflock_sem
>
>
>
> here are the more gory details (let me know if I need to provide
> anything more/better):
>
> [0 00:48:22.765] [UN] PID: 8863 TASK: ffff8c691547c5c0 CPU: 3
> COMMAND: "reopen_file"
> #0 [ffff9965007e3ba8] __schedule at ffffffff9b6e6095
> #1 [ffff9965007e3c38] schedule at ffffffff9b6e64df
> #2 [ffff9965007e3c48] rwsem_down_write_slowpath at ffffffff9af283d7
> #3 [ffff9965007e3cb8] legitimize_path at ffffffff9b0f975d
> #4 [ffff9965007e3d08] path_openat at ffffffff9b0fe55d
> #5 [ffff9965007e3dd8] do_filp_open at ffffffff9b100a33
> #6 [ffff9965007e3ee0] do_sys_open at ffffffff9b0eb2d6
> #7 [ffff9965007e3f38] do_syscall_64 at ffffffff9ae04315
> * (I think legitimize_path is bogus)
>
> in path_openat
> } else {
> const char *s = path_init(nd, flags);
> while (!(error = link_path_walk(s, nd)) &&
> (error = do_last(nd, file, op)) > 0) { <<<<
>
> do_last:
> if (open_flag & O_CREAT)
> inode_lock(dir->d_inode); <<<<
> else
> so it's trying to take inode->i_rwsem for the directory
>
> DENTRY INODE SUPERBLK TYPE PATH
> ffff8c68bb8e79c0 ffff8c691158ef20 ffff8c6915bf9000 DIR /mnt/vm1_smb/
> inode.i_rwsem is ffff8c691158efc0
>
> <struct rw_semaphore 0xffff8c691158efc0>:
> owner: <struct task_struct 0xffff8c6914275d00> (UN - 8856 -
> reopen_file), counter: 0x0000000000000003
> waitlist: 2
> 0xffff9965007e3c90 8863 reopen_file UN 0 1:29:22.926
> RWSEM_WAITING_FOR_WRITE
> 0xffff996500393e00 9802 ls UN 0 1:17:26.700
> RWSEM_WAITING_FOR_READ
>
>
> the owner of the inode.i_rwsem of the directory is:
>
> [0 00:00:00.109] [UN] PID: 8856 TASK: ffff8c6914275d00 CPU: 3
> COMMAND: "reopen_file"
> #0 [ffff99650065b828] __schedule at ffffffff9b6e6095
> #1 [ffff99650065b8b8] schedule at ffffffff9b6e64df
> #2 [ffff99650065b8c8] schedule_timeout at ffffffff9b6e9f89
> #3 [ffff99650065b940] msleep at ffffffff9af573a9
> #4 [ffff99650065b948] _cifsFileInfo_put.cold.63 at ffffffffc0a42dd6 [cifs]
> #5 [ffff99650065ba38] cifs_writepage_locked at ffffffffc0a0b8f3 [cifs]
> #6 [ffff99650065bab0] cifs_launder_page at ffffffffc0a0bb72 [cifs]
> #7 [ffff99650065bb30] invalidate_inode_pages2_range at ffffffff9b04d4bd
> #8 [ffff99650065bcb8] cifs_invalidate_mapping at ffffffffc0a11339 [cifs]
> #9 [ffff99650065bcd0] cifs_revalidate_mapping at ffffffffc0a1139a [cifs]
> #10 [ffff99650065bcf0] cifs_d_revalidate at ffffffffc0a014f6 [cifs]
> #11 [ffff99650065bd08] path_openat at ffffffff9b0fe7f7
> #12 [ffff99650065bdd8] do_filp_open at ffffffff9b100a33
> #13 [ffff99650065bee0] do_sys_open at ffffffff9b0eb2d6
> #14 [ffff99650065bf38] do_syscall_64 at ffffffff9ae04315
>
> cifs_launder_page is for page 0xffffd1e2c07d2480
>
> crash> page.index,mapping,flags 0xffffd1e2c07d2480
> index = 0x8
> mapping = 0xffff8c68f3cd0db0
> flags = 0xfffffc0008095
>
> PAGE-FLAG BIT VALUE
> PG_locked 0 0000001
> PG_uptodate 2 0000004
> PG_lru 4 0000010
> PG_waiters 7 0000080
> PG_writeback 15 0008000
>
>
> inode is ffff8c68f3cd0c40
> inode.i_rwsem is ffff8c68f3cd0ce0
> DENTRY INODE SUPERBLK TYPE PATH
> ffff8c68a1f1b480 ffff8c68f3cd0c40 ffff8c6915bf9000 REG
> /mnt/vm1_smb/testfile.8853
>
>
> this process holds the inode->i_rwsem for the parent directory, is
> laundering a page attached to the inode of the file it's opening, and in
> _cifsFileInfo_put is trying to down_write the cifsInodeInflock_sem
> for the file itself.
>
>
> <struct rw_semaphore 0xffff8c68f3cd0ce0>:
> owner: <struct task_struct 0xffff8c6914272e80> (UN - 8854 -
> reopen_file), counter: 0x0000000000000003
> waitlist: 1
> 0xffff9965005dfd80 8855 reopen_file UN 0 1:29:22.912
> RWSEM_WAITING_FOR_WRITE
>
> this is the inode.i_rwsem for the file
>
> the owner:
>
> [0 00:48:22.739] [UN] PID: 8854 TASK: ffff8c6914272e80 CPU: 2
> COMMAND: "reopen_file"
> #0 [ffff99650054fb38] __schedule at ffffffff9b6e6095
> #1 [ffff99650054fbc8] schedule at ffffffff9b6e64df
> #2 [ffff99650054fbd8] io_schedule at ffffffff9b6e68e2
> #3 [ffff99650054fbe8] __lock_page at ffffffff9b03c56f
> #4 [ffff99650054fc80] pagecache_get_page at ffffffff9b03dcdf
> #5 [ffff99650054fcc0] grab_cache_page_write_begin at ffffffff9b03ef4c
> #6 [ffff99650054fcd0] cifs_write_begin at ffffffffc0a064ec [cifs]
> #7 [ffff99650054fd30] generic_perform_write at ffffffff9b03bba4
> #8 [ffff99650054fda8] __generic_file_write_iter at ffffffff9b04060a
> #9 [ffff99650054fdf0] cifs_strict_writev.cold.70 at ffffffffc0a4469b [cifs]
> #10 [ffff99650054fe48] new_sync_write at ffffffff9b0ec1dd
> #11 [ffff99650054fed0] vfs_write at ffffffff9b0eed35
> #12 [ffff99650054ff00] ksys_write at ffffffff9b0eefd9
> #13 [ffff99650054ff38] do_syscall_64 at ffffffff9ae04315
>
> the process holds the inode->i_rwsem for the file to which it's writing,
> and is trying to __lock_page for the same page as in the other processes
>
>
> the other tasks:
> [0 00:00:00.028] [UN] PID: 8859 TASK: ffff8c6915479740 CPU: 2
> COMMAND: "reopen_file"
> #0 [ffff9965007b39d8] __schedule at ffffffff9b6e6095
> #1 [ffff9965007b3a68] schedule at ffffffff9b6e64df
> #2 [ffff9965007b3a78] schedule_timeout at ffffffff9b6e9f89
> #3 [ffff9965007b3af0] msleep at ffffffff9af573a9
> #4 [ffff9965007b3af8] cifs_new_fileinfo.cold.61 at ffffffffc0a42a07 [cifs]
> #5 [ffff9965007b3b78] cifs_open at ffffffffc0a0709d [cifs]
> #6 [ffff9965007b3cd8] do_dentry_open at ffffffff9b0e9b7a
> #7 [ffff9965007b3d08] path_openat at ffffffff9b0fe34f
> #8 [ffff9965007b3dd8] do_filp_open at ffffffff9b100a33
> #9 [ffff9965007b3ee0] do_sys_open at ffffffff9b0eb2d6
> #10 [ffff9965007b3f38] do_syscall_64 at ffffffff9ae04315
>
> this is opening the file, and is trying to down_write cinode->lock_sem
>
>
> [0 00:00:00.041] [UN] PID: 8860 TASK: ffff8c691547ae80 CPU: 2
> COMMAND: "reopen_file"
> [0 00:00:00.057] [UN] PID: 8861 TASK: ffff8c6915478000 CPU: 3
> COMMAND: "reopen_file"
> [0 00:00:00.059] [UN] PID: 8858 TASK: ffff8c6914271740 CPU: 2
> COMMAND: "reopen_file"
> [0 00:00:00.109] [UN] PID: 8862 TASK: ffff8c691547dd00 CPU: 6
> COMMAND: "reopen_file"
> #0 [ffff9965007c3c78] __schedule at ffffffff9b6e6095
> #1 [ffff9965007c3d08] schedule at ffffffff9b6e64df
> #2 [ffff9965007c3d18] schedule_timeout at ffffffff9b6e9f89
> #3 [ffff9965007c3d90] msleep at ffffffff9af573a9
> #4 [ffff9965007c3d98] _cifsFileInfo_put.cold.63 at ffffffffc0a42dd6 [cifs]
> #5 [ffff9965007c3e88] cifs_close at ffffffffc0a07aaf [cifs]
> #6 [ffff9965007c3ea0] __fput at ffffffff9b0efa6e
> #7 [ffff9965007c3ee8] task_work_run at ffffffff9aef1614
> #8 [ffff9965007c3f20] exit_to_usermode_loop at ffffffff9ae03d6f
> #9 [ffff9965007c3f38] do_syscall_64 at ffffffff9ae0444c
>
> closing the file, and trying to down_write cifsi->lock_sem
>
>
> [0 00:48:22.839] [UN] PID: 8857 TASK: ffff8c6914270000 CPU: 7
> COMMAND: "reopen_file"
> #0 [ffff9965006a7cc8] __schedule at ffffffff9b6e6095
> #1 [ffff9965006a7d58] schedule at ffffffff9b6e64df
> #2 [ffff9965006a7d68] io_schedule at ffffffff9b6e68e2
> #3 [ffff9965006a7d78] wait_on_page_bit at ffffffff9b03cac6
> #4 [ffff9965006a7e10] __filemap_fdatawait_range at ffffffff9b03b028
> #5 [ffff9965006a7ed8] filemap_write_and_wait at ffffffff9b040165
> #6 [ffff9965006a7ef0] cifs_flush at ffffffffc0a0c2fa [cifs]
> #7 [ffff9965006a7f10] filp_close at ffffffff9b0e93f1
> #8 [ffff9965006a7f30] __x64_sys_close at ffffffff9b0e9a0e
> #9 [ffff9965006a7f38] do_syscall_64 at ffffffff9ae04315
>
> in __filemap_fdatawait_range
> wait_on_page_writeback(page);
> for the same page of the file
>
>
>
> [0 00:48:22.718] [UN] PID: 8855 TASK: ffff8c69142745c0 CPU: 7
> COMMAND: "reopen_file"
> #0 [ffff9965005dfc98] __schedule at ffffffff9b6e6095
> #1 [ffff9965005dfd28] schedule at ffffffff9b6e64df
> #2 [ffff9965005dfd38] rwsem_down_write_slowpath at ffffffff9af283d7
> #3 [ffff9965005dfdf0] cifs_strict_writev at ffffffffc0a0c40a [cifs]
> #4 [ffff9965005dfe48] new_sync_write at ffffffff9b0ec1dd
> #5 [ffff9965005dfed0] vfs_write at ffffffff9b0eed35
> #6 [ffff9965005dff00] ksys_write at ffffffff9b0eefd9
> #7 [ffff9965005dff38] do_syscall_64 at ffffffff9ae04315
>
> inode_lock(inode);
>
>
> and one 'ls' later on, to see whether the rest of the mount is available
> (the test file is in the root, so we get blocked up on the directory
> ->i_rwsem), so the entire mount is unavailable
>
> [0 00:36:26.473] [UN] PID: 9802 TASK: ffff8c691436ae80 CPU: 4
> COMMAND: "ls"
> #0 [ffff996500393d28] __schedule at ffffffff9b6e6095
> #1 [ffff996500393db8] schedule at ffffffff9b6e64df
> #2 [ffff996500393dc8] rwsem_down_read_slowpath at ffffffff9b6e9421
> #3 [ffff996500393e78] down_read_killable at ffffffff9b6e95e2
> #4 [ffff996500393e88] iterate_dir at ffffffff9b103c56
> #5 [ffff996500393ec8] ksys_getdents64 at ffffffff9b104b0c
> #6 [ffff996500393f30] __x64_sys_getdents64 at ffffffff9b104bb6
> #7 [ffff996500393f38] do_syscall_64 at ffffffff9ae04315
>
> in iterate_dir:
> if (shared)
> res = down_read_killable(&inode->i_rwsem); <<<<
> else
> res = down_write_killable(&inode->i_rwsem);
>
Reported-by: Frank Sorenson <sorenson@redhat.com>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
Signed-off-by: Ronnie Sahlberg <lsahlber@redhat.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
2019-11-03 06:06:37 +03:00
struct workqueue_struct * fileinfo_put_wq ;
2017-05-03 18:54:01 +03:00
struct workqueue_struct * cifsoplockd_wq ;
2016-05-24 13:27:44 +03:00
__u32 cifs_lock_secret ;
2012-03-23 22:40:53 +04:00
2013-03-08 19:30:03 +04:00
/*
* Bumps refcount for cifs super block .
* Note that it should be only called if a referece to VFS super block is
* already held , e . g . in open - type syscalls context . Otherwise it can race with
* atomic_dec_and_test in deactivate_locked_super .
*/
void
cifs_sb_active ( struct super_block * sb )
{
struct cifs_sb_info * server = CIFS_SB ( sb ) ;
if ( atomic_inc_return ( & server - > active ) = = 1 )
atomic_inc ( & sb - > s_active ) ;
}
void
cifs_sb_deactive ( struct super_block * sb )
{
struct cifs_sb_info * server = CIFS_SB ( sb ) ;
if ( atomic_dec_and_test ( & server - > active ) )
deactivate_super ( sb ) ;
}
2005-04-17 02:20:36 +04:00
static int
2011-06-17 17:29:57 +04:00
cifs_read_super ( struct super_block * sb )
2005-04-17 02:20:36 +04:00
{
struct inode * inode ;
2011-05-25 13:35:34 +04:00
struct cifs_sb_info * cifs_sb ;
2013-10-06 23:08:20 +04:00
struct cifs_tcon * tcon ;
2019-03-23 00:32:35 +03:00
struct timespec64 ts ;
2005-04-17 02:20:36 +04:00
int rc = 0 ;
2007-07-13 04:33:32 +04:00
2011-05-25 13:35:34 +04:00
cifs_sb = CIFS_SB ( sb ) ;
2013-10-06 23:08:20 +04:00
tcon = cifs_sb_master_tcon ( cifs_sb ) ;
2005-04-17 02:20:36 +04:00
2011-06-17 17:05:48 +04:00
if ( cifs_sb - > mnt_cifs_flags & CIFS_MOUNT_POSIXACL )
2017-11-28 00:05:09 +03:00
sb - > s_flags | = SB_POSIXACL ;
2011-06-17 17:05:48 +04:00
smb3: snapshot mounts are read-only and make sure info is displayable about the mount
snapshot mounts were not marked as read-only and did not display the snapshot
time (in /proc/mounts) specified on mount
With this patch - note that can not write to the snapshot mount (see "ro" in
/proc/mounts line) and also the missing snapshot timewarp token time is
dumped. Sample line from /proc/mounts with the patch:
//127.0.0.1/scratch /mnt2 smb3 ro,relatime,vers=default,cache=strict,username=testuser,domain=,uid=0,noforceuid,gid=0,noforcegid,addr=127.0.0.1,file_mode=0755,dir_mode=0755,soft,nounix,serverino,mapposix,noperm,rsize=1048576,wsize=1048576,echo_interval=60,snapshot=1234567,actimeo=1 0 0
Signed-off-by: Steve French <stfrench@microsoft.com>
Reviewed-by: Paulo Alcantara <palcantara@suse.de>
2018-06-30 00:06:15 +03:00
if ( tcon - > snapshot_time )
sb - > s_flags | = SB_RDONLY ;
2013-10-06 23:08:20 +04:00
if ( tcon - > ses - > capabilities & tcon - > ses - > server - > vals - > cap_large_files )
2011-06-17 17:05:48 +04:00
sb - > s_maxbytes = MAX_LFS_FILESIZE ;
else
sb - > s_maxbytes = MAX_NON_LFS ;
2019-10-12 03:36:13 +03:00
/*
* Some very old servers like DOS and OS / 2 used 2 second granularity
* ( while all current servers use 100 ns granularity - see MS - DTYP )
* but 1 second is the maximum allowed granularity for the VFS
* so for old servers set time granularity to 1 second while for
* everything else ( current servers ) set it to 100 ns .
*/
2019-10-08 08:27:14 +03:00
if ( ( tcon - > ses - > server - > vals - > protocol_id = = SMB10_PROT_ID ) & &
( ( tcon - > ses - > capabilities &
tcon - > ses - > server - > vals - > cap_nt_find ) = = 0 ) & &
! tcon - > unix_ext ) {
sb - > s_time_gran = 1000000000 ; /* 1 second is max allowed gran */
ts = cnvrtDosUnixTm ( cpu_to_le16 ( SMB_DATE_MIN ) , 0 , 0 ) ;
2019-03-23 00:32:35 +03:00
sb - > s_time_min = ts . tv_sec ;
2019-10-08 08:27:14 +03:00
ts = cnvrtDosUnixTm ( cpu_to_le16 ( SMB_DATE_MAX ) ,
cpu_to_le16 ( SMB_TIME_MAX ) , 0 ) ;
2019-03-23 00:32:35 +03:00
sb - > s_time_max = ts . tv_sec ;
} else {
2019-10-08 08:27:14 +03:00
/*
* Almost every server , including all SMB2 + , uses DCE TIME
* ie 100 nanosecond units , since 1601. See MS - DTYP and MS - FSCC
*/
sb - > s_time_gran = 100 ;
ts = cifs_NTtimeToUnix ( 0 ) ;
2019-03-23 00:32:35 +03:00
sb - > s_time_min = ts . tv_sec ;
2019-10-08 08:27:14 +03:00
ts = cifs_NTtimeToUnix ( cpu_to_le64 ( S64_MAX ) ) ;
2019-03-23 00:32:35 +03:00
sb - > s_time_max = ts . tv_sec ;
}
2005-04-17 02:20:36 +04:00
sb - > s_magic = CIFS_MAGIC_NUMBER ;
sb - > s_op = & cifs_super_ops ;
2016-04-22 13:11:38 +03:00
sb - > s_xattr = cifs_xattr_handlers ;
2017-04-12 13:24:34 +03:00
rc = super_setup_bdi ( sb ) ;
if ( rc )
goto out_no_root ;
/* tune readahead according to rsize */
sb - > s_bdi - > ra_pages = cifs_sb - > rsize / PAGE_SIZE ;
2005-04-17 02:20:36 +04:00
sb - > s_blocksize = CIFS_MAX_MSGSIZE ;
sb - > s_blocksize_bits = 14 ; /* default 2**14 = CIFS_MAX_MSGSIZE */
2011-02-22 08:56:59 +03:00
inode = cifs_root_iget ( sb ) ;
2005-04-17 02:20:36 +04:00
2008-02-07 11:15:33 +03:00
if ( IS_ERR ( inode ) ) {
rc = PTR_ERR ( inode ) ;
2005-04-17 02:20:36 +04:00
goto out_no_root ;
}
2013-10-06 23:08:20 +04:00
if ( tcon - > nocase )
2013-07-30 19:38:44 +04:00
sb - > s_d_op = & cifs_ci_dentry_ops ;
else
sb - > s_d_op = & cifs_dentry_ops ;
2012-01-09 07:15:13 +04:00
sb - > s_root = d_make_root ( inode ) ;
2005-04-17 02:20:36 +04:00
if ( ! sb - > s_root ) {
rc = - ENOMEM ;
goto out_no_root ;
}
2007-07-13 04:33:32 +04:00
2011-10-12 16:14:04 +04:00
# ifdef CONFIG_CIFS_NFSD_EXPORT
2007-07-11 22:30:34 +04:00
if ( cifs_sb - > mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM ) {
2013-05-05 07:12:25 +04:00
cifs_dbg ( FYI , " export ops supported \n " ) ;
2007-07-11 22:30:34 +04:00
sb - > s_export_op = & cifs_export_ops ;
}
2011-10-12 16:14:04 +04:00
# endif /* CONFIG_CIFS_NFSD_EXPORT */
2005-04-17 02:20:36 +04:00
return 0 ;
out_no_root :
2013-05-05 07:12:25 +04:00
cifs_dbg ( VFS , " %s: get root inode failed \n " , __func__ ) ;
2005-04-17 02:20:36 +04:00
return rc ;
}
2011-06-17 16:34:57 +04:00
static void cifs_kill_sb ( struct super_block * sb )
{
struct cifs_sb_info * cifs_sb = CIFS_SB ( sb ) ;
kill_anon_super ( sb ) ;
2011-06-17 17:32:10 +04:00
cifs_umount ( cifs_sb ) ;
2005-04-17 02:20:36 +04:00
}
static int
2006-06-23 13:02:58 +04:00
cifs_statfs ( struct dentry * dentry , struct kstatfs * buf )
2005-04-17 02:20:36 +04:00
{
2006-06-23 13:02:58 +04:00
struct super_block * sb = dentry - > d_sb ;
2008-04-28 08:04:34 +04:00
struct cifs_sb_info * cifs_sb = CIFS_SB ( sb ) ;
2011-05-27 08:34:02 +04:00
struct cifs_tcon * tcon = cifs_sb_master_tcon ( cifs_sb ) ;
2012-09-19 03:20:33 +04:00
struct TCP_Server_Info * server = tcon - > ses - > server ;
2012-06-20 11:21:16 +04:00
unsigned int xid ;
2012-09-19 03:20:33 +04:00
int rc = 0 ;
2005-04-17 02:20:36 +04:00
2012-06-20 11:21:16 +04:00
xid = get_xid ( ) ;
2005-04-17 02:20:36 +04:00
2018-06-25 07:18:52 +03:00
if ( le32_to_cpu ( tcon - > fsAttrInfo . MaxPathNameComponentLength ) > 0 )
buf - > f_namelen =
le32_to_cpu ( tcon - > fsAttrInfo . MaxPathNameComponentLength ) ;
else
buf - > f_namelen = PATH_MAX ;
buf - > f_fsid . val [ 0 ] = tcon - > vol_serial_number ;
/* are using part of create time for more randomness, see man statfs */
buf - > f_fsid . val [ 1 ] = ( int ) le64_to_cpu ( tcon - > vol_create_time ) ;
2005-04-17 02:20:36 +04:00
buf - > f_files = 0 ; /* undefined */
buf - > f_ffree = 0 ; /* unlimited */
2012-09-19 03:20:33 +04:00
if ( server - > ops - > queryfs )
2020-02-03 22:46:43 +03:00
rc = server - > ops - > queryfs ( xid , tcon , cifs_sb , buf ) ;
2008-04-28 08:04:34 +04:00
2012-06-20 11:21:16 +04:00
free_xid ( xid ) ;
2008-04-28 08:04:34 +04:00
return 0 ;
2005-04-17 02:20:36 +04:00
}
2014-08-17 17:38:47 +04:00
static long cifs_fallocate ( struct file * file , int mode , loff_t off , loff_t len )
{
2014-10-22 08:25:12 +04:00
struct cifs_sb_info * cifs_sb = CIFS_FILE_SB ( file ) ;
2014-08-17 17:38:47 +04:00
struct cifs_tcon * tcon = cifs_sb_master_tcon ( cifs_sb ) ;
struct TCP_Server_Info * server = tcon - > ses - > server ;
if ( server - > ops - > fallocate )
return server - > ops - > fallocate ( file , tcon , mode , off , len ) ;
return - EOPNOTSUPP ;
}
2011-06-21 03:28:19 +04:00
static int cifs_permission ( struct inode * inode , int mask )
2005-04-17 02:20:36 +04:00
{
struct cifs_sb_info * cifs_sb ;
cifs_sb = CIFS_SB ( inode - > i_sb ) ;
2008-07-31 15:41:58 +04:00
if ( cifs_sb - > mnt_cifs_flags & CIFS_MOUNT_NO_PERM ) {
if ( ( mask & MAY_EXEC ) & & ! execute_ok ( inode ) )
return - EACCES ;
else
return 0 ;
} else /* file mode might have been restricted at mount time
2007-07-13 04:33:32 +04:00
on the client ( above and beyond ACL on servers ) for
2005-04-17 02:20:36 +04:00
servers which do not support setting and viewing mode bits ,
2007-07-13 04:33:32 +04:00
so allowing client to check permissions is useful */
2011-06-21 03:16:29 +04:00
return generic_permission ( inode , mask ) ;
2005-04-17 02:20:36 +04:00
}
2006-12-07 07:33:20 +03:00
static struct kmem_cache * cifs_inode_cachep ;
static struct kmem_cache * cifs_req_cachep ;
static struct kmem_cache * cifs_mid_cachep ;
static struct kmem_cache * cifs_sm_req_cachep ;
2005-04-17 02:20:36 +04:00
mempool_t * cifs_sm_req_poolp ;
mempool_t * cifs_req_poolp ;
mempool_t * cifs_mid_poolp ;
static struct inode *
cifs_alloc_inode ( struct super_block * sb )
{
struct cifsInodeInfo * cifs_inode ;
2006-12-07 07:33:17 +03:00
cifs_inode = kmem_cache_alloc ( cifs_inode_cachep , GFP_KERNEL ) ;
2005-04-17 02:20:36 +04:00
if ( ! cifs_inode )
return NULL ;
cifs_inode - > cifsAttrs = 0x20 ; /* default */
cifs_inode - > time = 0 ;
2012-09-19 17:22:44 +04:00
/*
* Until the file is open and we have gotten oplock info back from the
* server , can not assume caching of file data or metadata .
*/
2010-11-03 10:58:57 +03:00
cifs_set_oplock_level ( cifs_inode , 0 ) ;
2014-04-30 17:31:45 +04:00
cifs_inode - > flags = 0 ;
2014-03-11 20:11:47 +04:00
spin_lock_init ( & cifs_inode - > writers_lock ) ;
cifs_inode - > writers = 0 ;
2005-04-17 02:20:36 +04:00
cifs_inode - > vfs_inode . i_blkbits = 14 ; /* 2**14 = CIFS_MAX_MSGSIZE */
2009-04-03 21:44:00 +04:00
cifs_inode - > server_eof = 0 ;
2011-01-07 19:30:27 +03:00
cifs_inode - > uniqueid = 0 ;
cifs_inode - > createtime = 0 ;
2013-09-05 21:30:16 +04:00
cifs_inode - > epoch = 0 ;
2019-06-05 03:38:38 +03:00
spin_lock_init ( & cifs_inode - > open_file_lock ) ;
2016-09-22 08:39:34 +03:00
generate_random_uuid ( cifs_inode - > lease_key ) ;
2017-07-09 02:48:15 +03:00
2012-09-19 17:22:44 +04:00
/*
* Can not set i_flags here - they get immediately overwritten to zero
* by the VFS .
*/
/* cifs_inode->vfs_inode.i_flags = S_NOATIME | S_NOCMTIME; */
2005-04-17 02:20:36 +04:00
INIT_LIST_HEAD ( & cifs_inode - > openFileList ) ;
2012-09-19 17:22:43 +04:00
INIT_LIST_HEAD ( & cifs_inode - > llist ) ;
2005-04-17 02:20:36 +04:00
return & cifs_inode - > vfs_inode ;
}
static void
2019-04-15 06:18:35 +03:00
cifs_free_inode ( struct inode * inode )
2005-04-17 02:20:36 +04:00
{
2019-04-15 06:18:35 +03:00
kmem_cache_free ( cifs_inode_cachep , CIFS_I ( inode ) ) ;
2005-04-17 02:20:36 +04:00
}
2010-07-05 16:42:45 +04:00
static void
2010-06-07 22:34:48 +04:00
cifs_evict_inode ( struct inode * inode )
2010-07-05 16:42:45 +04:00
{
2014-04-04 01:47:49 +04:00
truncate_inode_pages_final ( & inode - > i_data ) ;
2012-05-03 16:48:02 +04:00
clear_inode ( inode ) ;
2010-07-05 16:42:45 +04:00
cifs_fscache_release_inode_cookie ( inode ) ;
}
2009-06-11 18:27:32 +04:00
static void
cifs_show_address ( struct seq_file * s , struct TCP_Server_Info * server )
{
2010-12-13 19:08:35 +03:00
struct sockaddr_in * sa = ( struct sockaddr_in * ) & server - > dstaddr ;
struct sockaddr_in6 * sa6 = ( struct sockaddr_in6 * ) & server - > dstaddr ;
2014-05-13 20:04:17 +04:00
seq_puts ( s , " ,addr= " ) ;
2009-06-11 18:27:32 +04:00
2010-12-13 19:08:35 +03:00
switch ( server - > dstaddr . ss_family ) {
2009-06-11 18:27:32 +04:00
case AF_INET :
2010-12-13 19:08:35 +03:00
seq_printf ( s , " %pI4 " , & sa - > sin_addr . s_addr ) ;
2009-06-11 18:27:32 +04:00
break ;
case AF_INET6 :
2010-12-13 19:08:35 +03:00
seq_printf ( s , " %pI6 " , & sa6 - > sin6_addr . s6_addr ) ;
if ( sa6 - > sin6_scope_id )
seq_printf ( s , " %%%u " , sa6 - > sin6_scope_id ) ;
2009-06-11 18:27:32 +04:00
break ;
default :
2014-05-13 20:04:17 +04:00
seq_puts ( s , " (unknown) " ) ;
2009-06-11 18:27:32 +04:00
}
2017-11-07 11:54:55 +03:00
if ( server - > rdma )
seq_puts ( s , " ,rdma " ) ;
2009-06-11 18:27:32 +04:00
}
2011-06-13 19:50:41 +04:00
static void
2013-05-26 15:01:00 +04:00
cifs_show_security ( struct seq_file * s , struct cifs_ses * ses )
2011-06-13 19:50:41 +04:00
{
2015-09-12 03:24:19 +03:00
if ( ses - > sectype = = Unspecified ) {
if ( ses - > user_name = = NULL )
seq_puts ( s , " ,sec=none " ) ;
2013-05-26 15:01:00 +04:00
return ;
2015-09-12 03:24:19 +03:00
}
2013-05-26 15:01:00 +04:00
2014-05-13 20:04:17 +04:00
seq_puts ( s , " ,sec= " ) ;
2011-06-13 19:50:41 +04:00
2013-05-26 15:01:00 +04:00
switch ( ses - > sectype ) {
2011-06-13 19:50:41 +04:00
case LANMAN :
2014-05-13 20:04:17 +04:00
seq_puts ( s , " lanman " ) ;
2011-06-13 19:50:41 +04:00
break ;
case NTLMv2 :
2014-05-13 20:04:17 +04:00
seq_puts ( s , " ntlmv2 " ) ;
2011-06-13 19:50:41 +04:00
break ;
case NTLM :
2014-05-13 20:04:17 +04:00
seq_puts ( s , " ntlm " ) ;
2011-06-13 19:50:41 +04:00
break ;
case Kerberos :
2020-02-10 12:38:14 +03:00
seq_puts ( s , " krb5 " ) ;
2011-06-13 19:50:41 +04:00
break ;
case RawNTLMSSP :
2014-05-13 20:04:17 +04:00
seq_puts ( s , " ntlmssp " ) ;
2011-06-13 19:50:41 +04:00
break ;
default :
/* shouldn't ever happen */
2014-05-13 20:04:17 +04:00
seq_puts ( s , " unknown " ) ;
2011-06-13 19:50:41 +04:00
break ;
}
2013-05-26 15:01:00 +04:00
if ( ses - > sign )
2014-05-13 20:04:17 +04:00
seq_puts ( s , " i " ) ;
2020-02-10 12:38:14 +03:00
if ( ses - > sectype = = Kerberos )
seq_printf ( s , " ,cruid=%u " ,
from_kuid_munged ( & init_user_ns , ses - > cred_uid ) ) ;
2011-06-13 19:50:41 +04:00
}
2012-05-16 15:53:01 +04:00
static void
cifs_show_cache_flavor ( struct seq_file * s , struct cifs_sb_info * cifs_sb )
{
2014-05-13 20:04:17 +04:00
seq_puts ( s , " ,cache= " ) ;
2012-05-16 15:53:01 +04:00
if ( cifs_sb - > mnt_cifs_flags & CIFS_MOUNT_STRICT_IO )
2014-05-13 20:04:17 +04:00
seq_puts ( s , " strict " ) ;
2012-05-16 15:53:01 +04:00
else if ( cifs_sb - > mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO )
2014-05-13 20:04:17 +04:00
seq_puts ( s , " none " ) ;
2019-08-30 10:12:41 +03:00
else if ( cifs_sb - > mnt_cifs_flags & CIFS_MOUNT_RW_CACHE )
seq_puts ( s , " singleclient " ) ; /* assume only one client access */
2019-08-28 07:58:54 +03:00
else if ( cifs_sb - > mnt_cifs_flags & CIFS_MOUNT_RO_CACHE )
seq_puts ( s , " ro " ) ; /* read only caching assumed */
2012-05-16 15:53:01 +04:00
else
2014-05-13 20:04:17 +04:00
seq_puts ( s , " loose " ) ;
2012-05-16 15:53:01 +04:00
}
2013-07-30 21:34:40 +04:00
static void
cifs_show_nls ( struct seq_file * s , struct nls_table * cur )
{
struct nls_table * def ;
/* Display iocharset= option if it's not default charset */
def = load_nls_default ( ) ;
if ( def ! = cur )
seq_printf ( s , " ,iocharset=%s " , cur - > charset ) ;
unload_nls ( def ) ;
}
2005-04-17 02:20:36 +04:00
/*
* cifs_show_options ( ) is for displaying mount options in / proc / mounts .
* Not all settable options are displayed but most of the important
* ones are .
*/
static int
2011-12-09 06:32:45 +04:00
cifs_show_options ( struct seq_file * s , struct dentry * root )
2005-04-17 02:20:36 +04:00
{
2011-12-09 06:32:45 +04:00
struct cifs_sb_info * cifs_sb = CIFS_SB ( root - > d_sb ) ;
2011-05-27 08:34:02 +04:00
struct cifs_tcon * tcon = cifs_sb_master_tcon ( cifs_sb ) ;
2010-09-02 04:06:02 +04:00
struct sockaddr * srcaddr ;
srcaddr = ( struct sockaddr * ) & tcon - > ses - > server - > srcaddr ;
2009-06-11 18:27:28 +04:00
fs: create and use seq_show_option for escaping
Many file systems that implement the show_options hook fail to correctly
escape their output which could lead to unescaped characters (e.g. new
lines) leaking into /proc/mounts and /proc/[pid]/mountinfo files. This
could lead to confusion, spoofed entries (resulting in things like
systemd issuing false d-bus "mount" notifications), and who knows what
else. This looks like it would only be the root user stepping on
themselves, but it's possible weird things could happen in containers or
in other situations with delegated mount privileges.
Here's an example using overlay with setuid fusermount trusting the
contents of /proc/mounts (via the /etc/mtab symlink). Imagine the use
of "sudo" is something more sneaky:
$ BASE="ovl"
$ MNT="$BASE/mnt"
$ LOW="$BASE/lower"
$ UP="$BASE/upper"
$ WORK="$BASE/work/ 0 0
none /proc fuse.pwn user_id=1000"
$ mkdir -p "$LOW" "$UP" "$WORK"
$ sudo mount -t overlay -o "lowerdir=$LOW,upperdir=$UP,workdir=$WORK" none /mnt
$ cat /proc/mounts
none /root/ovl/mnt overlay rw,relatime,lowerdir=ovl/lower,upperdir=ovl/upper,workdir=ovl/work/ 0 0
none /proc fuse.pwn user_id=1000 0 0
$ fusermount -u /proc
$ cat /proc/mounts
cat: /proc/mounts: No such file or directory
This fixes the problem by adding new seq_show_option and
seq_show_option_n helpers, and updating the vulnerable show_option
handlers to use them as needed. Some, like SELinux, need to be open
coded due to unusual existing escape mechanisms.
[akpm@linux-foundation.org: add lost chunk, per Kees]
[keescook@chromium.org: seq_show_option should be using const parameters]
Signed-off-by: Kees Cook <keescook@chromium.org>
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Acked-by: Jan Kara <jack@suse.com>
Acked-by: Paul Moore <paul@paul-moore.com>
Cc: J. R. Okajima <hooanon05g@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2015-09-05 01:44:57 +03:00
seq_show_option ( s , " vers " , tcon - > ses - > server - > vals - > version_string ) ;
2013-05-26 15:01:00 +04:00
cifs_show_security ( s , tcon - > ses ) ;
2012-05-16 15:53:01 +04:00
cifs_show_cache_flavor ( s , cifs_sb ) ;
2011-06-13 19:50:41 +04:00
2019-09-12 05:46:20 +03:00
if ( tcon - > no_lease )
seq_puts ( s , " ,nolease " ) ;
2010-09-30 03:51:12 +04:00
if ( cifs_sb - > mnt_cifs_flags & CIFS_MOUNT_MULTIUSER )
2014-05-13 20:04:17 +04:00
seq_puts ( s , " ,multiuser " ) ;
2011-02-25 10:11:56 +03:00
else if ( tcon - > ses - > user_name )
fs: create and use seq_show_option for escaping
Many file systems that implement the show_options hook fail to correctly
escape their output which could lead to unescaped characters (e.g. new
lines) leaking into /proc/mounts and /proc/[pid]/mountinfo files. This
could lead to confusion, spoofed entries (resulting in things like
systemd issuing false d-bus "mount" notifications), and who knows what
else. This looks like it would only be the root user stepping on
themselves, but it's possible weird things could happen in containers or
in other situations with delegated mount privileges.
Here's an example using overlay with setuid fusermount trusting the
contents of /proc/mounts (via the /etc/mtab symlink). Imagine the use
of "sudo" is something more sneaky:
$ BASE="ovl"
$ MNT="$BASE/mnt"
$ LOW="$BASE/lower"
$ UP="$BASE/upper"
$ WORK="$BASE/work/ 0 0
none /proc fuse.pwn user_id=1000"
$ mkdir -p "$LOW" "$UP" "$WORK"
$ sudo mount -t overlay -o "lowerdir=$LOW,upperdir=$UP,workdir=$WORK" none /mnt
$ cat /proc/mounts
none /root/ovl/mnt overlay rw,relatime,lowerdir=ovl/lower,upperdir=ovl/upper,workdir=ovl/work/ 0 0
none /proc fuse.pwn user_id=1000 0 0
$ fusermount -u /proc
$ cat /proc/mounts
cat: /proc/mounts: No such file or directory
This fixes the problem by adding new seq_show_option and
seq_show_option_n helpers, and updating the vulnerable show_option
handlers to use them as needed. Some, like SELinux, need to be open
coded due to unusual existing escape mechanisms.
[akpm@linux-foundation.org: add lost chunk, per Kees]
[keescook@chromium.org: seq_show_option should be using const parameters]
Signed-off-by: Kees Cook <keescook@chromium.org>
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Acked-by: Jan Kara <jack@suse.com>
Acked-by: Paul Moore <paul@paul-moore.com>
Cc: J. R. Okajima <hooanon05g@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2015-09-05 01:44:57 +03:00
seq_show_option ( s , " username " , tcon - > ses - > user_name ) ;
2010-09-30 03:51:12 +04:00
2018-08-10 04:31:10 +03:00
if ( tcon - > ses - > domainName & & tcon - > ses - > domainName [ 0 ] ! = 0 )
fs: create and use seq_show_option for escaping
Many file systems that implement the show_options hook fail to correctly
escape their output which could lead to unescaped characters (e.g. new
lines) leaking into /proc/mounts and /proc/[pid]/mountinfo files. This
could lead to confusion, spoofed entries (resulting in things like
systemd issuing false d-bus "mount" notifications), and who knows what
else. This looks like it would only be the root user stepping on
themselves, but it's possible weird things could happen in containers or
in other situations with delegated mount privileges.
Here's an example using overlay with setuid fusermount trusting the
contents of /proc/mounts (via the /etc/mtab symlink). Imagine the use
of "sudo" is something more sneaky:
$ BASE="ovl"
$ MNT="$BASE/mnt"
$ LOW="$BASE/lower"
$ UP="$BASE/upper"
$ WORK="$BASE/work/ 0 0
none /proc fuse.pwn user_id=1000"
$ mkdir -p "$LOW" "$UP" "$WORK"
$ sudo mount -t overlay -o "lowerdir=$LOW,upperdir=$UP,workdir=$WORK" none /mnt
$ cat /proc/mounts
none /root/ovl/mnt overlay rw,relatime,lowerdir=ovl/lower,upperdir=ovl/upper,workdir=ovl/work/ 0 0
none /proc fuse.pwn user_id=1000 0 0
$ fusermount -u /proc
$ cat /proc/mounts
cat: /proc/mounts: No such file or directory
This fixes the problem by adding new seq_show_option and
seq_show_option_n helpers, and updating the vulnerable show_option
handlers to use them as needed. Some, like SELinux, need to be open
coded due to unusual existing escape mechanisms.
[akpm@linux-foundation.org: add lost chunk, per Kees]
[keescook@chromium.org: seq_show_option should be using const parameters]
Signed-off-by: Kees Cook <keescook@chromium.org>
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Acked-by: Jan Kara <jack@suse.com>
Acked-by: Paul Moore <paul@paul-moore.com>
Cc: J. R. Okajima <hooanon05g@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2015-09-05 01:44:57 +03:00
seq_show_option ( s , " domain " , tcon - > ses - > domainName ) ;
2009-06-11 18:27:28 +04:00
2010-09-02 04:06:02 +04:00
if ( srcaddr - > sa_family ! = AF_UNSPEC ) {
struct sockaddr_in * saddr4 ;
struct sockaddr_in6 * saddr6 ;
saddr4 = ( struct sockaddr_in * ) srcaddr ;
saddr6 = ( struct sockaddr_in6 * ) srcaddr ;
if ( srcaddr - > sa_family = = AF_INET6 )
seq_printf ( s , " ,srcaddr=%pI6c " ,
& saddr6 - > sin6_addr ) ;
else if ( srcaddr - > sa_family = = AF_INET )
seq_printf ( s , " ,srcaddr=%pI4 " ,
& saddr4 - > sin_addr . s_addr ) ;
else
seq_printf ( s , " ,srcaddr=BAD-AF:%i " ,
( int ) ( srcaddr - > sa_family ) ) ;
}
2013-02-06 13:20:20 +04:00
seq_printf ( s , " ,uid=%u " ,
from_kuid_munged ( & init_user_ns , cifs_sb - > mnt_uid ) ) ;
2009-06-11 18:27:29 +04:00
if ( cifs_sb - > mnt_cifs_flags & CIFS_MOUNT_OVERR_UID )
2014-05-13 20:04:17 +04:00
seq_puts ( s , " ,forceuid " ) ;
2009-08-03 20:45:10 +04:00
else
2014-05-13 20:04:17 +04:00
seq_puts ( s , " ,noforceuid " ) ;
2009-06-11 18:27:29 +04:00
2013-02-06 13:20:20 +04:00
seq_printf ( s , " ,gid=%u " ,
from_kgid_munged ( & init_user_ns , cifs_sb - > mnt_gid ) ) ;
2009-06-11 18:27:29 +04:00
if ( cifs_sb - > mnt_cifs_flags & CIFS_MOUNT_OVERR_GID )
2014-05-13 20:04:17 +04:00
seq_puts ( s , " ,forcegid " ) ;
2009-08-03 20:45:10 +04:00
else
2014-05-13 20:04:17 +04:00
seq_puts ( s , " ,noforcegid " ) ;
2009-06-11 18:27:28 +04:00
2009-06-11 18:27:32 +04:00
cifs_show_address ( s , tcon - > ses - > server ) ;
2005-04-17 02:20:36 +04:00
2009-06-11 18:27:28 +04:00
if ( ! tcon - > unix_ext )
2011-07-26 11:22:14 +04:00
seq_printf ( s , " ,file_mode=0%ho,dir_mode=0%ho " ,
2008-05-17 07:12:45 +04:00
cifs_sb - > mnt_file_mode ,
cifs_sb - > mnt_dir_mode ) ;
2013-07-30 21:34:40 +04:00
cifs_show_nls ( s , cifs_sb - > local_nls ) ;
2009-06-11 18:27:28 +04:00
if ( tcon - > seal )
2014-05-13 20:04:17 +04:00
seq_puts ( s , " ,seal " ) ;
2020-02-20 08:59:32 +03:00
else if ( tcon - > ses - > server - > ignore_signature )
seq_puts ( s , " ,signloosely " ) ;
2009-06-11 18:27:28 +04:00
if ( tcon - > nocase )
2014-05-13 20:04:17 +04:00
seq_puts ( s , " ,nocase " ) ;
2020-05-19 11:06:57 +03:00
if ( tcon - > nodelete )
seq_puts ( s , " ,nodelete " ) ;
2019-01-21 04:51:59 +03:00
if ( tcon - > local_lease )
seq_puts ( s , " ,locallease " ) ;
2009-06-11 18:27:28 +04:00
if ( tcon - > retry )
2014-05-13 20:04:17 +04:00
seq_puts ( s , " ,hard " ) ;
2017-09-20 06:09:23 +03:00
else
seq_puts ( s , " ,soft " ) ;
2015-10-01 05:07:59 +03:00
if ( tcon - > use_persistent )
seq_puts ( s , " ,persistenthandles " ) ;
2015-11-03 19:08:53 +03:00
else if ( tcon - > use_resilient )
seq_puts ( s , " ,resilienthandles " ) ;
2018-05-21 07:41:10 +03:00
if ( tcon - > posix_extensions )
seq_puts ( s , " ,posix " ) ;
else if ( tcon - > unix_ext )
seq_puts ( s , " ,unix " ) ;
else
seq_puts ( s , " ,nounix " ) ;
2018-09-21 04:10:25 +03:00
if ( cifs_sb - > mnt_cifs_flags & CIFS_MOUNT_NO_DFS )
seq_puts ( s , " ,nodfs " ) ;
2009-06-11 18:27:28 +04:00
if ( cifs_sb - > mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS )
2014-05-13 20:04:17 +04:00
seq_puts ( s , " ,posixpaths " ) ;
2009-06-11 18:27:28 +04:00
if ( cifs_sb - > mnt_cifs_flags & CIFS_MOUNT_SET_UID )
2014-05-13 20:04:17 +04:00
seq_puts ( s , " ,setuids " ) ;
2016-09-23 09:36:34 +03:00
if ( cifs_sb - > mnt_cifs_flags & CIFS_MOUNT_UID_FROM_ACL )
seq_puts ( s , " ,idsfromsid " ) ;
2009-06-11 18:27:28 +04:00
if ( cifs_sb - > mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM )
2014-05-13 20:04:17 +04:00
seq_puts ( s , " ,serverino " ) ;
2011-05-26 10:02:00 +04:00
if ( cifs_sb - > mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD )
2014-05-13 20:04:17 +04:00
seq_puts ( s , " ,rwpidforward " ) ;
2011-05-26 10:02:00 +04:00
if ( cifs_sb - > mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL )
2014-05-13 20:04:17 +04:00
seq_puts ( s , " ,forcemand " ) ;
2009-06-11 18:27:28 +04:00
if ( cifs_sb - > mnt_cifs_flags & CIFS_MOUNT_NO_XATTR )
2014-05-13 20:04:17 +04:00
seq_puts ( s , " ,nouser_xattr " ) ;
2009-06-11 18:27:28 +04:00
if ( cifs_sb - > mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR )
2014-05-13 20:04:17 +04:00
seq_puts ( s , " ,mapchars " ) ;
2015-02-13 09:35:58 +03:00
if ( cifs_sb - > mnt_cifs_flags & CIFS_MOUNT_MAP_SFM_CHR )
seq_puts ( s , " ,mapposix " ) ;
2009-06-11 18:27:28 +04:00
if ( cifs_sb - > mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL )
2014-05-13 20:04:17 +04:00
seq_puts ( s , " ,sfu " ) ;
2009-06-11 18:27:28 +04:00
if ( cifs_sb - > mnt_cifs_flags & CIFS_MOUNT_NO_BRL )
2014-05-13 20:04:17 +04:00
seq_puts ( s , " ,nobrl " ) ;
2018-04-26 06:19:09 +03:00
if ( cifs_sb - > mnt_cifs_flags & CIFS_MOUNT_NO_HANDLE_CACHE )
seq_puts ( s , " ,nohandlecache " ) ;
2019-06-24 10:01:42 +03:00
if ( cifs_sb - > mnt_cifs_flags & CIFS_MOUNT_MODE_FROM_SID )
seq_puts ( s , " ,modefromsid " ) ;
2009-06-11 18:27:28 +04:00
if ( cifs_sb - > mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL )
2014-05-13 20:04:17 +04:00
seq_puts ( s , " ,cifsacl " ) ;
2009-06-11 18:27:28 +04:00
if ( cifs_sb - > mnt_cifs_flags & CIFS_MOUNT_DYNPERM )
2014-05-13 20:04:17 +04:00
seq_puts ( s , " ,dynperm " ) ;
2017-11-28 00:05:09 +03:00
if ( root - > d_sb - > s_flags & SB_POSIXACL )
2014-05-13 20:04:17 +04:00
seq_puts ( s , " ,acl " ) ;
2010-07-30 16:56:00 +04:00
if ( cifs_sb - > mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS )
2014-05-13 20:04:17 +04:00
seq_puts ( s , " ,mfsymlinks " ) ;
2010-11-24 15:19:07 +03:00
if ( cifs_sb - > mnt_cifs_flags & CIFS_MOUNT_FSCACHE )
2014-05-13 20:04:17 +04:00
seq_puts ( s , " ,fsc " ) ;
2011-10-20 05:44:48 +04:00
if ( cifs_sb - > mnt_cifs_flags & CIFS_MOUNT_NOSSYNC )
2014-05-13 20:04:17 +04:00
seq_puts ( s , " ,nostrictsync " ) ;
2011-10-20 05:44:48 +04:00
if ( cifs_sb - > mnt_cifs_flags & CIFS_MOUNT_NO_PERM )
2014-05-13 20:04:17 +04:00
seq_puts ( s , " ,noperm " ) ;
2012-04-24 18:28:14 +04:00
if ( cifs_sb - > mnt_cifs_flags & CIFS_MOUNT_CIFS_BACKUPUID )
2013-02-06 13:20:20 +04:00
seq_printf ( s , " ,backupuid=%u " ,
from_kuid_munged ( & init_user_ns ,
cifs_sb - > mnt_backupuid ) ) ;
2012-04-24 18:28:14 +04:00
if ( cifs_sb - > mnt_cifs_flags & CIFS_MOUNT_CIFS_BACKUPGID )
2013-02-06 13:20:20 +04:00
seq_printf ( s , " ,backupgid=%u " ,
from_kgid_munged ( & init_user_ns ,
cifs_sb - > mnt_backupgid ) ) ;
2009-06-11 18:27:28 +04:00
2012-04-24 18:28:30 +04:00
seq_printf ( s , " ,rsize=%u " , cifs_sb - > rsize ) ;
seq_printf ( s , " ,wsize=%u " , cifs_sb - > wsize ) ;
2019-03-01 06:32:15 +03:00
seq_printf ( s , " ,bsize=%u " , cifs_sb - > bsize ) ;
2019-09-09 07:22:02 +03:00
if ( tcon - > ses - > server - > min_offload )
seq_printf ( s , " ,esize=%u " , tcon - > ses - > server - > min_offload ) ;
2015-12-18 21:31:36 +03:00
seq_printf ( s , " ,echo_interval=%lu " ,
tcon - > ses - > server - > echo_interval / HZ ) ;
2019-06-18 01:34:57 +03:00
/* Only display max_credits if it was overridden on mount */
if ( tcon - > ses - > server - > max_credits ! = SMB2_MAX_CREDITS_AVAILABLE )
seq_printf ( s , " ,max_credits=%u " , tcon - > ses - > server - > max_credits ) ;
smb3: snapshot mounts are read-only and make sure info is displayable about the mount
snapshot mounts were not marked as read-only and did not display the snapshot
time (in /proc/mounts) specified on mount
With this patch - note that can not write to the snapshot mount (see "ro" in
/proc/mounts line) and also the missing snapshot timewarp token time is
dumped. Sample line from /proc/mounts with the patch:
//127.0.0.1/scratch /mnt2 smb3 ro,relatime,vers=default,cache=strict,username=testuser,domain=,uid=0,noforceuid,gid=0,noforcegid,addr=127.0.0.1,file_mode=0755,dir_mode=0755,soft,nounix,serverino,mapposix,noperm,rsize=1048576,wsize=1048576,echo_interval=60,snapshot=1234567,actimeo=1 0 0
Signed-off-by: Steve French <stfrench@microsoft.com>
Reviewed-by: Paulo Alcantara <palcantara@suse.de>
2018-06-30 00:06:15 +03:00
if ( tcon - > snapshot_time )
seq_printf ( s , " ,snapshot=%llu " , tcon - > snapshot_time ) ;
2019-03-30 00:31:07 +03:00
if ( tcon - > handle_timeout )
seq_printf ( s , " ,handletimeout=%u " , tcon - > handle_timeout ) ;
2010-12-01 12:12:28 +03:00
/* convert actimeo and display it in seconds */
2012-04-25 15:10:14 +04:00
seq_printf ( s , " ,actimeo=%lu " , cifs_sb - > actimeo / HZ ) ;
2009-06-11 18:27:28 +04:00
2019-09-20 05:32:20 +03:00
if ( tcon - > ses - > chan_max > 1 )
2020-06-10 03:50:40 +03:00
seq_printf ( s , " ,multichannel,max_channels=%zu " ,
2019-09-20 05:32:20 +03:00
tcon - > ses - > chan_max ) ;
2005-04-17 02:20:36 +04:00
return 0 ;
}
2008-04-24 15:21:56 +04:00
static void cifs_umount_begin ( struct super_block * sb )
2005-10-10 21:34:22 +04:00
{
2008-04-24 15:21:56 +04:00
struct cifs_sb_info * cifs_sb = CIFS_SB ( sb ) ;
2011-05-27 08:34:02 +04:00
struct cifs_tcon * tcon ;
2005-10-10 21:34:22 +04:00
2007-05-01 00:13:06 +04:00
if ( cifs_sb = = NULL )
2005-10-11 01:28:38 +04:00
return ;
2010-09-21 03:01:35 +04:00
tcon = cifs_sb_master_tcon ( cifs_sb ) ;
2008-11-15 19:12:47 +03:00
2010-10-18 21:59:37 +04:00
spin_lock ( & cifs_tcp_ses_lock ) ;
2009-06-26 07:25:49 +04:00
if ( ( tcon - > tc_count > 1 ) | | ( tcon - > tidStatus = = CifsExiting ) ) {
/* we have other mounts to same share or we have
already tried to force umount this and woken up
all waiting network requests , nothing to do */
2010-10-18 21:59:37 +04:00
spin_unlock ( & cifs_tcp_ses_lock ) ;
2009-06-26 07:25:49 +04:00
return ;
} else if ( tcon - > tc_count = = 1 )
2005-10-11 01:06:37 +04:00
tcon - > tidStatus = CifsExiting ;
2010-10-18 21:59:37 +04:00
spin_unlock ( & cifs_tcp_ses_lock ) ;
2005-10-11 01:06:37 +04:00
2006-07-15 02:37:11 +04:00
/* cancel_brl_requests(tcon); */ /* BB mark all brl mids as exiting */
2005-11-10 02:21:09 +03:00
/* cancel_notify_requests(tcon); */
2007-07-13 04:33:32 +04:00
if ( tcon - > ses & & tcon - > ses - > server ) {
2013-05-05 07:12:25 +04:00
cifs_dbg ( FYI , " wake up tasks now - umount begin not complete \n " ) ;
2005-10-11 01:28:38 +04:00
wake_up_all ( & tcon - > ses - > server - > request_q ) ;
2005-11-30 07:55:11 +03:00
wake_up_all ( & tcon - > ses - > server - > response_q ) ;
msleep ( 1 ) ; /* yield */
/* we have to kick the requests once more */
wake_up_all ( & tcon - > ses - > server - > response_q ) ;
msleep ( 1 ) ;
2005-10-11 01:06:37 +04:00
}
2005-10-10 21:34:22 +04:00
return ;
}
2006-09-29 01:34:06 +04:00
# ifdef CONFIG_CIFS_STATS2
2011-12-09 05:51:13 +04:00
static int cifs_show_stats ( struct seq_file * s , struct dentry * root )
2006-09-29 01:34:06 +04:00
{
/* BB FIXME */
return 0 ;
}
# endif
2005-04-17 02:20:36 +04:00
static int cifs_remount ( struct super_block * sb , int * flags , char * data )
{
2014-03-13 18:14:33 +04:00
sync_filesystem ( sb ) ;
2017-11-28 00:05:09 +03:00
* flags | = SB_NODIRATIME ;
2005-04-17 02:20:36 +04:00
return 0 ;
}
2010-06-07 21:43:19 +04:00
static int cifs_drop_inode ( struct inode * inode )
2010-06-01 22:47:40 +04:00
{
struct cifs_sb_info * cifs_sb = CIFS_SB ( inode - > i_sb ) ;
2010-06-07 21:43:19 +04:00
/* no serverino => unconditional eviction */
return ! ( cifs_sb - > mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM ) | |
generic_drop_inode ( inode ) ;
2010-06-01 22:47:40 +04:00
}
2007-02-12 11:55:41 +03:00
static const struct super_operations cifs_super_ops = {
2005-04-17 02:20:36 +04:00
. statfs = cifs_statfs ,
. alloc_inode = cifs_alloc_inode ,
2019-04-15 06:18:35 +03:00
. free_inode = cifs_free_inode ,
2010-06-01 22:47:40 +04:00
. drop_inode = cifs_drop_inode ,
2010-06-07 22:34:48 +04:00
. evict_inode = cifs_evict_inode ,
2010-06-01 22:47:40 +04:00
/* .delete_inode = cifs_delete_inode, */ /* Do not need above
function unless later we add lazy close of inodes or unless the
2007-07-13 04:33:32 +04:00
kernel forgets to call us with the same number of releases ( closes )
as opens */
2005-04-17 02:20:36 +04:00
. show_options = cifs_show_options ,
2005-11-10 02:21:09 +03:00
. umount_begin = cifs_umount_begin ,
2005-04-17 02:20:36 +04:00
. remount_fs = cifs_remount ,
2006-09-29 01:34:06 +04:00
# ifdef CONFIG_CIFS_STATS2
2006-09-30 05:08:55 +04:00
. show_stats = cifs_show_stats ,
2006-09-29 01:34:06 +04:00
# endif
2005-04-17 02:20:36 +04:00
} ;
2011-05-27 07:50:55 +04:00
/*
* Get root dentry from superblock according to prefix path mount option .
* Return dentry with refcount + 1 on success and NULL otherwise .
*/
static struct dentry *
cifs_get_root ( struct smb_vol * vol , struct super_block * sb )
{
2011-07-18 21:50:40 +04:00
struct dentry * dentry ;
2011-05-27 07:50:55 +04:00
struct cifs_sb_info * cifs_sb = CIFS_SB ( sb ) ;
2011-07-18 21:50:40 +04:00
char * full_path = NULL ;
char * s , * p ;
2011-05-27 07:50:55 +04:00
char sep ;
2016-07-30 00:38:21 +03:00
if ( cifs_sb - > mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH )
return dget ( sb - > s_root ) ;
2012-11-29 08:34:41 +04:00
full_path = cifs_build_path_to_root ( vol , cifs_sb ,
2016-12-15 10:01:19 +03:00
cifs_sb_master_tcon ( cifs_sb ) , 0 ) ;
2011-05-27 07:50:55 +04:00
if ( full_path = = NULL )
2011-06-17 18:02:59 +04:00
return ERR_PTR ( - ENOMEM ) ;
2011-05-27 07:50:55 +04:00
2013-05-05 07:12:25 +04:00
cifs_dbg ( FYI , " Get root dentry for %s \n " , full_path ) ;
2011-05-27 07:50:55 +04:00
sep = CIFS_DIR_SEP ( cifs_sb ) ;
2011-07-18 21:50:40 +04:00
dentry = dget ( sb - > s_root ) ;
p = s = full_path ;
do {
2015-03-18 01:25:59 +03:00
struct inode * dir = d_inode ( dentry ) ;
2011-07-18 21:50:40 +04:00
struct dentry * child ;
2013-02-02 00:11:01 +04:00
if ( ! S_ISDIR ( dir - > i_mode ) ) {
dput ( dentry ) ;
dentry = ERR_PTR ( - ENOTDIR ) ;
break ;
}
2011-08-21 19:30:15 +04:00
2011-07-18 21:50:40 +04:00
/* skip separators */
while ( * s = = sep )
s + + ;
if ( ! * s )
break ;
p = s + + ;
/* next separator */
while ( * s & & * s ! = sep )
s + + ;
2019-10-31 08:21:58 +03:00
child = lookup_positive_unlocked ( p , dentry , s - p ) ;
2011-07-18 21:50:40 +04:00
dput ( dentry ) ;
dentry = child ;
} while ( ! IS_ERR ( dentry ) ) ;
2011-05-27 07:50:55 +04:00
kfree ( full_path ) ;
2011-07-18 21:50:40 +04:00
return dentry ;
2011-05-27 07:50:55 +04:00
}
2011-06-17 17:47:23 +04:00
static int cifs_set_super ( struct super_block * sb , void * data )
{
struct cifs_mnt_data * mnt_data = data ;
sb - > s_fs_info = mnt_data - > cifs_sb ;
return set_anon_super ( sb , NULL ) ;
}
2010-07-26 12:52:33 +04:00
static struct dentry *
2018-06-07 01:59:29 +03:00
cifs_smb3_do_mount ( struct file_system_type * fs_type ,
int flags , const char * dev_name , void * data , bool is_smb3 )
2005-04-17 02:20:36 +04:00
{
int rc ;
2010-08-16 00:51:10 +04:00
struct super_block * sb ;
2011-05-05 13:55:12 +04:00
struct cifs_sb_info * cifs_sb ;
struct smb_vol * volume_info ;
2011-05-26 23:35:47 +04:00
struct cifs_mnt_data mnt_data ;
2011-05-05 13:55:12 +04:00
struct dentry * root ;
2005-04-17 02:20:36 +04:00
2018-10-07 21:52:18 +03:00
/*
* Prints in Kernel / CIFS log the attempted mount operation
* If CIFS_DEBUG & & cifs_FYI
*/
2018-10-07 18:21:26 +03:00
if ( cifsFYI )
cifs_dbg ( FYI , " Devname: %s flags: %d \n " , dev_name , flags ) ;
else
cifs_info ( " Attempting to mount %s \n " , dev_name ) ;
2005-04-17 02:20:36 +04:00
2018-06-07 01:59:29 +03:00
volume_info = cifs_get_volume_info ( ( char * ) data , dev_name , is_smb3 ) ;
2011-07-06 16:10:38 +04:00
if ( IS_ERR ( volume_info ) )
return ERR_CAST ( volume_info ) ;
2011-05-05 13:55:12 +04:00
cifs_sb = kzalloc ( sizeof ( struct cifs_sb_info ) , GFP_KERNEL ) ;
if ( cifs_sb = = NULL ) {
root = ERR_PTR ( - ENOMEM ) ;
2011-06-17 17:56:55 +04:00
goto out_nls ;
2011-05-05 13:55:12 +04:00
}
2011-06-17 17:17:28 +04:00
cifs_sb - > mountdata = kstrndup ( data , PAGE_SIZE , GFP_KERNEL ) ;
if ( cifs_sb - > mountdata = = NULL ) {
root = ERR_PTR ( - ENOMEM ) ;
2016-07-30 00:38:19 +03:00
goto out_free ;
2011-06-17 17:17:28 +04:00
}
2016-07-30 00:38:19 +03:00
rc = cifs_setup_cifs_sb ( volume_info , cifs_sb ) ;
if ( rc ) {
root = ERR_PTR ( rc ) ;
goto out_free ;
2016-05-25 20:59:09 +03:00
}
2011-06-17 17:29:57 +04:00
rc = cifs_mount ( cifs_sb , volume_info ) ;
if ( rc ) {
2017-11-28 00:05:09 +03:00
if ( ! ( flags & SB_SILENT ) )
2013-05-05 07:12:25 +04:00
cifs_dbg ( VFS , " cifs_mount failed w/return code = %d \n " ,
rc ) ;
2011-06-17 17:29:57 +04:00
root = ERR_PTR ( rc ) ;
2016-07-30 00:38:19 +03:00
goto out_free ;
2011-06-17 17:29:57 +04:00
}
2011-05-26 23:35:47 +04:00
mnt_data . vol = volume_info ;
mnt_data . cifs_sb = cifs_sb ;
mnt_data . flags = flags ;
2012-06-25 15:55:37 +04:00
/* BB should we make this contingent on mount parm? */
2017-11-28 00:05:09 +03:00
flags | = SB_NODIRATIME | SB_NOATIME ;
2012-06-25 15:55:37 +04:00
sb = sget ( fs_type , cifs_match_super , cifs_set_super , flags , & mnt_data ) ;
2011-05-05 13:55:12 +04:00
if ( IS_ERR ( sb ) ) {
root = ERR_CAST ( sb ) ;
2011-06-17 17:29:57 +04:00
cifs_umount ( cifs_sb ) ;
2011-06-17 17:42:43 +04:00
goto out ;
2011-05-05 13:55:12 +04:00
}
2005-04-17 02:20:36 +04:00
2011-06-17 17:47:23 +04:00
if ( sb - > s_root ) {
2013-05-05 07:12:25 +04:00
cifs_dbg ( FYI , " Use existing superblock \n " ) ;
2011-06-17 17:29:57 +04:00
cifs_umount ( cifs_sb ) ;
2011-06-17 17:56:55 +04:00
} else {
rc = cifs_read_super ( sb ) ;
if ( rc ) {
root = ERR_PTR ( rc ) ;
goto out_super ;
}
2011-05-25 13:35:34 +04:00
2017-11-28 00:05:09 +03:00
sb - > s_flags | = SB_ACTIVE ;
2005-04-17 02:20:36 +04:00
}
2011-05-05 13:55:12 +04:00
2016-07-30 00:38:21 +03:00
root = cifs_get_root ( volume_info , sb ) ;
2011-06-17 18:02:59 +04:00
if ( IS_ERR ( root ) )
2011-05-27 07:50:55 +04:00
goto out_super ;
2011-05-26 23:35:47 +04:00
2013-05-05 07:12:25 +04:00
cifs_dbg ( FYI , " dentry root is: %p \n " , root ) ;
2011-05-26 00:02:16 +04:00
goto out ;
2011-05-05 13:55:12 +04:00
2011-05-26 00:02:16 +04:00
out_super :
deactivate_locked_super ( sb ) ;
out :
2011-07-06 16:10:37 +04:00
cifs_cleanup_volume_info ( volume_info ) ;
2011-05-05 13:55:12 +04:00
return root ;
2011-06-17 17:56:55 +04:00
2016-07-30 00:38:19 +03:00
out_free :
kfree ( cifs_sb - > prepath ) ;
2011-06-17 17:56:55 +04:00
kfree ( cifs_sb - > mountdata ) ;
kfree ( cifs_sb ) ;
out_nls :
unload_nls ( volume_info - > local_nls ) ;
goto out ;
2005-04-17 02:20:36 +04:00
}
2018-06-07 01:59:29 +03:00
static struct dentry *
smb3_do_mount ( struct file_system_type * fs_type ,
int flags , const char * dev_name , void * data )
{
return cifs_smb3_do_mount ( fs_type , flags , dev_name , data , true ) ;
}
static struct dentry *
cifs_do_mount ( struct file_system_type * fs_type ,
int flags , const char * dev_name , void * data )
{
return cifs_smb3_do_mount ( fs_type , flags , dev_name , data , false ) ;
}
2014-05-23 14:50:21 +04:00
static ssize_t
cifs_loose_read_iter ( struct kiocb * iocb , struct iov_iter * iter )
{
ssize_t rc ;
struct inode * inode = file_inode ( iocb - > ki_filp ) ;
2015-12-02 17:46:07 +03:00
if ( iocb - > ki_filp - > f_flags & O_DIRECT )
return cifs_user_readv ( iocb , iter ) ;
2014-05-23 14:50:21 +04:00
rc = cifs_revalidate_mapping ( inode ) ;
if ( rc )
return rc ;
return generic_file_read_iter ( iocb , iter ) ;
}
2014-04-03 20:05:17 +04:00
static ssize_t cifs_file_write_iter ( struct kiocb * iocb , struct iov_iter * from )
2005-04-17 02:20:36 +04:00
{
2013-01-24 02:07:38 +04:00
struct inode * inode = file_inode ( iocb - > ki_filp ) ;
2014-03-11 20:11:47 +04:00
struct cifsInodeInfo * cinode = CIFS_I ( inode ) ;
2005-04-17 02:20:36 +04:00
ssize_t written ;
2011-01-24 22:16:35 +03:00
int rc ;
2005-04-17 02:20:36 +04:00
2015-12-02 17:46:07 +03:00
if ( iocb - > ki_filp - > f_flags & O_DIRECT ) {
written = cifs_user_writev ( iocb , from ) ;
if ( written > 0 & & CIFS_CACHE_READ ( cinode ) ) {
cifs_zap_mapping ( inode ) ;
cifs_dbg ( FYI ,
" Set no oplock for inode=%p after a write operation \n " ,
inode ) ;
cinode - > oplock = 0 ;
}
return written ;
}
2014-03-11 20:11:47 +04:00
written = cifs_get_writer ( cinode ) ;
if ( written )
return written ;
2014-04-03 20:05:17 +04:00
written = generic_file_write_iter ( iocb , from ) ;
2011-01-24 22:16:35 +03:00
2013-09-05 13:01:06 +04:00
if ( CIFS_CACHE_WRITE ( CIFS_I ( inode ) ) )
2014-03-11 20:11:47 +04:00
goto out ;
2011-01-24 22:16:35 +03:00
rc = filemap_fdatawrite ( inode - > i_mapping ) ;
if ( rc )
2014-04-03 20:05:17 +04:00
cifs_dbg ( FYI , " cifs_file_write_iter: %d rc on %p inode \n " ,
2013-05-05 07:12:25 +04:00
rc , inode ) ;
2011-01-24 22:16:35 +03:00
2014-03-11 20:11:47 +04:00
out :
cifs_put_writer ( cinode ) ;
2005-04-17 02:20:36 +04:00
return written ;
}
2012-12-18 03:59:39 +04:00
static loff_t cifs_llseek ( struct file * file , loff_t offset , int whence )
2006-01-13 01:41:28 +03:00
{
2019-05-15 00:17:02 +03:00
struct cifsFileInfo * cfile = file - > private_data ;
struct cifs_tcon * tcon ;
2011-07-18 21:21:38 +04:00
/*
2012-12-18 03:59:39 +04:00
* whence = = SEEK_END | | SEEK_DATA | | SEEK_HOLE = > we must revalidate
2011-07-18 21:21:38 +04:00
* the cached file length
*/
2012-12-18 03:59:39 +04:00
if ( whence ! = SEEK_SET & & whence ! = SEEK_CUR ) {
2011-04-07 18:18:11 +04:00
int rc ;
2013-01-24 02:07:38 +04:00
struct inode * inode = file_inode ( file ) ;
2011-04-07 18:18:11 +04:00
/*
* We need to be sure that all dirty pages are written and the
* server has the newest file length .
*/
2013-09-05 13:01:06 +04:00
if ( ! CIFS_CACHE_READ ( CIFS_I ( inode ) ) & & inode - > i_mapping & &
2011-04-07 18:18:11 +04:00
inode - > i_mapping - > nrpages ! = 0 ) {
rc = filemap_fdatawait ( inode - > i_mapping ) ;
2011-05-20 21:00:01 +04:00
if ( rc ) {
mapping_set_error ( inode - > i_mapping , rc ) ;
return rc ;
}
2011-04-07 18:18:11 +04:00
}
/*
* Some applications poll for the file length in this strange
* way so we must seek to end on non - oplocked files by
* setting the revalidate time to zero .
*/
CIFS_I ( inode ) - > time = 0 ;
rc = cifs_revalidate_file_attr ( file ) ;
if ( rc < 0 )
return ( loff_t ) rc ;
2006-01-13 01:41:28 +03:00
}
2019-05-15 00:17:02 +03:00
if ( cfile & & cfile - > tlink ) {
tcon = tlink_tcon ( cfile - > tlink ) ;
if ( tcon - > ses - > server - > ops - > llseek )
return tcon - > ses - > server - > ops - > llseek ( file , tcon ,
offset , whence ) ;
}
2012-12-18 03:59:39 +04:00
return generic_file_llseek ( file , offset , whence ) ;
2006-01-13 01:41:28 +03:00
}
2014-08-22 18:40:25 +04:00
static int
cifs_setlease ( struct file * file , long arg , struct file_lock * * lease , void * * priv )
2008-10-23 08:42:37 +04:00
{
2013-09-05 13:01:06 +04:00
/*
* Note that this is called by vfs setlease with i_lock held to
* protect * lease from going away .
*/
2013-01-24 02:07:38 +04:00
struct inode * inode = file_inode ( file ) ;
2010-09-21 03:01:31 +04:00
struct cifsFileInfo * cfile = file - > private_data ;
2008-10-23 08:42:37 +04:00
if ( ! ( S_ISREG ( inode - > i_mode ) ) )
return - EINVAL ;
2014-08-09 18:16:44 +04:00
/* Check if file is oplocked if this is request for new lease */
if ( arg = = F_UNLCK | |
( ( arg = = F_RDLCK ) & & CIFS_CACHE_READ ( CIFS_I ( inode ) ) ) | |
2013-09-05 13:01:06 +04:00
( ( arg = = F_WRLCK ) & & CIFS_CACHE_WRITE ( CIFS_I ( inode ) ) ) )
2014-08-22 18:40:25 +04:00
return generic_setlease ( file , arg , lease , priv ) ;
2010-09-30 03:51:11 +04:00
else if ( tlink_tcon ( cfile - > tlink ) - > local_lease & &
2013-09-05 13:01:06 +04:00
! CIFS_CACHE_READ ( CIFS_I ( inode ) ) )
/*
* If the server claims to support oplock on this file , then we
* still need to check oplock even if the local_lease mount
* option is set , but there are servers which do not support
* oplock for which this mount option may be useful if the user
* knows that the file won ' t be changed on the server by anyone
* else .
*/
2014-08-22 18:40:25 +04:00
return generic_setlease ( file , arg , lease , priv ) ;
2010-10-31 15:35:10 +03:00
else
2008-10-23 08:42:37 +04:00
return - EAGAIN ;
}
2008-01-11 04:49:48 +03:00
struct file_system_type cifs_fs_type = {
2005-04-17 02:20:36 +04:00
. owner = THIS_MODULE ,
. name = " cifs " ,
2010-07-26 12:52:33 +04:00
. mount = cifs_do_mount ,
2011-06-17 16:34:57 +04:00
. kill_sb = cifs_kill_sb ,
2020-02-26 03:08:54 +03:00
. fs_flags = FS_RENAME_DOES_D_MOVE ,
2005-04-17 02:20:36 +04:00
} ;
2013-03-11 18:05:42 +04:00
MODULE_ALIAS_FS ( " cifs " ) ;
2018-05-24 05:44:53 +03:00
static struct file_system_type smb3_fs_type = {
. owner = THIS_MODULE ,
. name = " smb3 " ,
2018-06-07 01:59:29 +03:00
. mount = smb3_do_mount ,
2018-05-24 05:44:53 +03:00
. kill_sb = cifs_kill_sb ,
2020-02-26 03:08:54 +03:00
. fs_flags = FS_RENAME_DOES_D_MOVE ,
2018-05-24 05:44:53 +03:00
} ;
MODULE_ALIAS_FS ( " smb3 " ) ;
MODULE_ALIAS ( " smb3 " ) ;
2007-02-12 11:55:38 +03:00
const struct inode_operations cifs_dir_inode_ops = {
2005-04-17 02:20:36 +04:00
. create = cifs_create ,
2012-06-05 17:10:23 +04:00
. atomic_open = cifs_atomic_open ,
2005-04-17 02:20:36 +04:00
. lookup = cifs_lookup ,
. getattr = cifs_getattr ,
. unlink = cifs_unlink ,
. link = cifs_hardlink ,
. mkdir = cifs_mkdir ,
. rmdir = cifs_rmdir ,
2016-09-27 12:03:58 +03:00
. rename = cifs_rename2 ,
2005-04-17 02:20:36 +04:00
. permission = cifs_permission ,
. setattr = cifs_setattr ,
. symlink = cifs_symlink ,
. mknod = cifs_mknod ,
. listxattr = cifs_listxattr ,
} ;
2007-02-12 11:55:38 +03:00
const struct inode_operations cifs_file_inode_ops = {
2005-04-17 02:20:36 +04:00
. setattr = cifs_setattr ,
2016-05-19 04:48:32 +03:00
. getattr = cifs_getattr ,
2005-04-17 02:20:36 +04:00
. permission = cifs_permission ,
. listxattr = cifs_listxattr ,
2019-04-25 09:45:29 +03:00
. fiemap = cifs_fiemap ,
2005-04-17 02:20:36 +04:00
} ;
2007-02-12 11:55:38 +03:00
const struct inode_operations cifs_symlink_inode_ops = {
2015-11-17 18:20:54 +03:00
. get_link = cifs_get_link ,
2005-04-17 02:20:36 +04:00
. permission = cifs_permission ,
. listxattr = cifs_listxattr ,
} ;
2018-10-30 02:41:49 +03:00
static loff_t cifs_remap_file_range ( struct file * src_file , loff_t off ,
struct file * dst_file , loff_t destoff , loff_t len ,
2018-10-30 02:41:21 +03:00
unsigned int remap_flags )
2015-12-03 14:59:50 +03:00
{
struct inode * src_inode = file_inode ( src_file ) ;
struct inode * target_inode = file_inode ( dst_file ) ;
struct cifsFileInfo * smb_file_src = src_file - > private_data ;
2018-11-01 16:14:30 +03:00
struct cifsFileInfo * smb_file_target ;
struct cifs_tcon * target_tcon ;
2015-12-03 14:59:50 +03:00
unsigned int xid ;
int rc ;
2019-03-16 07:11:54 +03:00
if ( remap_flags & ~ ( REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY ) )
2018-10-30 02:41:21 +03:00
return - EINVAL ;
2015-12-03 14:59:50 +03:00
cifs_dbg ( FYI , " clone range \n " ) ;
xid = get_xid ( ) ;
if ( ! src_file - > private_data | | ! dst_file - > private_data ) {
rc = - EBADF ;
cifs_dbg ( VFS , " missing cifsFileInfo on copy range src file \n " ) ;
goto out ;
}
2018-11-01 16:14:30 +03:00
smb_file_target = dst_file - > private_data ;
target_tcon = tlink_tcon ( smb_file_target - > tlink ) ;
2015-12-03 14:59:50 +03:00
/*
* Note : cifs case is easier than btrfs since server responsible for
* checks for proper open modes and file type and if it wants
* server could even support copy of range where source = target
*/
lock_two_nondirectories ( target_inode , src_inode ) ;
if ( len = = 0 )
len = src_inode - > i_size - off ;
cifs_dbg ( FYI , " about to flush pages \n " ) ;
/* should we flush first and last page first */
truncate_inode_pages_range ( & target_inode - > i_data , destoff ,
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros
PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time
ago with promise that one day it will be possible to implement page
cache with bigger chunks than PAGE_SIZE.
This promise never materialized. And unlikely will.
We have many places where PAGE_CACHE_SIZE assumed to be equal to
PAGE_SIZE. And it's constant source of confusion on whether
PAGE_CACHE_* or PAGE_* constant should be used in a particular case,
especially on the border between fs and mm.
Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much
breakage to be doable.
Let's stop pretending that pages in page cache are special. They are
not.
The changes are pretty straight-forward:
- <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN};
- page_cache_get() -> get_page();
- page_cache_release() -> put_page();
This patch contains automated changes generated with coccinelle using
script below. For some reason, coccinelle doesn't patch header files.
I've called spatch for them manually.
The only adjustment after coccinelle is revert of changes to
PAGE_CAHCE_ALIGN definition: we are going to drop it later.
There are few places in the code where coccinelle didn't reach. I'll
fix them manually in a separate patch. Comments and documentation also
will be addressed with the separate patch.
virtual patch
@@
expression E;
@@
- E << (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
expression E;
@@
- E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
@@
- PAGE_CACHE_SHIFT
+ PAGE_SHIFT
@@
@@
- PAGE_CACHE_SIZE
+ PAGE_SIZE
@@
@@
- PAGE_CACHE_MASK
+ PAGE_MASK
@@
expression E;
@@
- PAGE_CACHE_ALIGN(E)
+ PAGE_ALIGN(E)
@@
expression E;
@@
- page_cache_get(E)
+ get_page(E)
@@
expression E;
@@
- page_cache_release(E)
+ put_page(E)
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 15:29:47 +03:00
PAGE_ALIGN ( destoff + len ) - 1 ) ;
2015-12-03 14:59:50 +03:00
if ( target_tcon - > ses - > server - > ops - > duplicate_extents )
rc = target_tcon - > ses - > server - > ops - > duplicate_extents ( xid ,
smb_file_src , smb_file_target , off , len , destoff ) ;
else
rc = - EOPNOTSUPP ;
/* force revalidate of size and timestamps of target file now
that target is updated on the server */
CIFS_I ( target_inode ) - > time = 0 ;
/* although unlocking in the reverse order from locking is not
strictly necessary here it is a little cleaner to be consistent */
unlock_two_nondirectories ( src_inode , target_inode ) ;
out :
free_xid ( xid ) ;
2018-10-30 02:41:49 +03:00
return rc < 0 ? rc : len ;
2015-12-03 14:59:50 +03:00
}
2017-02-10 13:33:51 +03:00
ssize_t cifs_file_copychunk_range ( unsigned int xid ,
struct file * src_file , loff_t off ,
struct file * dst_file , loff_t destoff ,
size_t len , unsigned int flags )
{
struct inode * src_inode = file_inode ( src_file ) ;
struct inode * target_inode = file_inode ( dst_file ) ;
struct cifsFileInfo * smb_file_src ;
struct cifsFileInfo * smb_file_target ;
struct cifs_tcon * src_tcon ;
struct cifs_tcon * target_tcon ;
ssize_t rc ;
cifs_dbg ( FYI , " copychunk range \n " ) ;
if ( ! src_file - > private_data | | ! dst_file - > private_data ) {
rc = - EBADF ;
cifs_dbg ( VFS , " missing cifsFileInfo on copy range src file \n " ) ;
goto out ;
}
rc = - EXDEV ;
smb_file_target = dst_file - > private_data ;
smb_file_src = src_file - > private_data ;
src_tcon = tlink_tcon ( smb_file_src - > tlink ) ;
target_tcon = tlink_tcon ( smb_file_target - > tlink ) ;
if ( src_tcon - > ses ! = target_tcon - > ses ) {
cifs_dbg ( VFS , " source and target of copy not on same server \n " ) ;
goto out ;
}
2019-06-10 20:36:57 +03:00
rc = - EOPNOTSUPP ;
if ( ! target_tcon - > ses - > server - > ops - > copychunk_range )
goto out ;
2017-02-10 13:33:51 +03:00
/*
* Note : cifs case is easier than btrfs since server responsible for
* checks for proper open modes and file type and if it wants
* server could even support copy of range where source = target
*/
lock_two_nondirectories ( target_inode , src_inode ) ;
cifs_dbg ( FYI , " about to flush pages \n " ) ;
/* should we flush first and last page first */
truncate_inode_pages ( & target_inode - > i_data , 0 ) ;
2019-06-10 20:36:57 +03:00
rc = file_modified ( dst_file ) ;
if ( ! rc )
2017-02-10 13:33:51 +03:00
rc = target_tcon - > ses - > server - > ops - > copychunk_range ( xid ,
smb_file_src , smb_file_target , off , len , destoff ) ;
2019-06-10 20:36:57 +03:00
file_accessed ( src_file ) ;
2017-02-10 13:33:51 +03:00
/* force revalidate of size and timestamps of target file now
* that target is updated on the server
*/
CIFS_I ( target_inode ) - > time = 0 ;
/* although unlocking in the reverse order from locking is not
* strictly necessary here it is a little cleaner to be consistent
*/
unlock_two_nondirectories ( src_inode , target_inode ) ;
out :
return rc ;
}
2018-05-10 18:59:37 +03:00
/*
* Directory operations under CIFS / SMB2 / SMB3 are synchronous , so fsync ( )
* is a dummy operation .
*/
static int cifs_dir_fsync ( struct file * file , loff_t start , loff_t end , int datasync )
{
cifs_dbg ( FYI , " Sync directory - name: %pD datasync: 0x%x \n " ,
file , datasync ) ;
return 0 ;
}
2017-02-10 13:33:51 +03:00
static ssize_t cifs_copy_file_range ( struct file * src_file , loff_t off ,
struct file * dst_file , loff_t destoff ,
size_t len , unsigned int flags )
{
unsigned int xid = get_xid ( ) ;
ssize_t rc ;
2020-04-10 05:42:18 +03:00
struct cifsFileInfo * cfile = dst_file - > private_data ;
if ( cfile - > swapfile )
return - EOPNOTSUPP ;
2017-02-10 13:33:51 +03:00
rc = cifs_file_copychunk_range ( xid , src_file , off , dst_file , destoff ,
len , flags ) ;
free_xid ( xid ) ;
2019-06-05 18:04:47 +03:00
2019-06-05 18:04:50 +03:00
if ( rc = = - EOPNOTSUPP | | rc = = - EXDEV )
2019-06-05 18:04:47 +03:00
rc = generic_copy_file_range ( src_file , off , dst_file ,
destoff , len , flags ) ;
2017-02-10 13:33:51 +03:00
return rc ;
}
2006-03-28 13:56:42 +04:00
const struct file_operations cifs_file_ops = {
2014-05-23 14:50:21 +04:00
. read_iter = cifs_loose_read_iter ,
2014-04-03 20:05:17 +04:00
. write_iter = cifs_file_write_iter ,
2005-04-17 02:20:36 +04:00
. open = cifs_open ,
. release = cifs_close ,
. lock = cifs_lock ,
2019-07-17 02:55:38 +03:00
. flock = cifs_flock ,
2005-04-17 02:20:36 +04:00
. fsync = cifs_fsync ,
. flush = cifs_flush ,
. mmap = cifs_file_mmap ,
2007-06-01 13:49:19 +04:00
. splice_read = generic_file_splice_read ,
2017-12-28 16:23:08 +03:00
. splice_write = iter_file_splice_write ,
2006-01-13 01:41:28 +03:00
. llseek = cifs_llseek ,
2008-05-15 09:51:55 +04:00
. unlocked_ioctl = cifs_ioctl ,
2017-02-10 13:33:51 +03:00
. copy_file_range = cifs_copy_file_range ,
2018-10-30 02:41:21 +03:00
. remap_file_range = cifs_remap_file_range ,
2008-10-23 08:42:37 +04:00
. setlease = cifs_setlease ,
2014-08-17 17:38:47 +04:00
. fallocate = cifs_fallocate ,
2005-04-17 02:20:36 +04:00
} ;
2010-12-12 13:11:13 +03:00
const struct file_operations cifs_file_strict_ops = {
2014-04-03 03:53:36 +04:00
. read_iter = cifs_strict_readv ,
2014-04-03 20:05:17 +04:00
. write_iter = cifs_strict_writev ,
2010-12-12 13:11:13 +03:00
. open = cifs_open ,
. release = cifs_close ,
. lock = cifs_lock ,
2019-07-17 02:55:38 +03:00
. flock = cifs_flock ,
2010-12-12 13:11:13 +03:00
. fsync = cifs_strict_fsync ,
. flush = cifs_flush ,
2010-12-14 11:29:51 +03:00
. mmap = cifs_file_strict_mmap ,
2010-12-12 13:11:13 +03:00
. splice_read = generic_file_splice_read ,
2017-12-28 16:23:08 +03:00
. splice_write = iter_file_splice_write ,
2010-12-12 13:11:13 +03:00
. llseek = cifs_llseek ,
. unlocked_ioctl = cifs_ioctl ,
2017-02-10 13:33:51 +03:00
. copy_file_range = cifs_copy_file_range ,
2018-10-30 02:41:21 +03:00
. remap_file_range = cifs_remap_file_range ,
2010-12-12 13:11:13 +03:00
. setlease = cifs_setlease ,
2014-08-17 17:38:47 +04:00
. fallocate = cifs_fallocate ,
2010-12-12 13:11:13 +03:00
} ;
2006-03-28 13:56:42 +04:00
const struct file_operations cifs_file_direct_ops = {
2018-11-01 01:13:11 +03:00
. read_iter = cifs_direct_readv ,
. write_iter = cifs_direct_writev ,
2005-04-17 02:20:36 +04:00
. open = cifs_open ,
. release = cifs_close ,
. lock = cifs_lock ,
2019-07-17 02:55:38 +03:00
. flock = cifs_flock ,
2005-04-17 02:20:36 +04:00
. fsync = cifs_fsync ,
. flush = cifs_flush ,
2009-12-07 08:44:46 +03:00
. mmap = cifs_file_mmap ,
2007-06-01 13:49:19 +04:00
. splice_read = generic_file_splice_read ,
2017-12-28 16:23:08 +03:00
. splice_write = iter_file_splice_write ,
2008-05-15 09:51:55 +04:00
. unlocked_ioctl = cifs_ioctl ,
2017-02-10 13:33:51 +03:00
. copy_file_range = cifs_copy_file_range ,
2018-10-30 02:41:21 +03:00
. remap_file_range = cifs_remap_file_range ,
2006-01-13 01:41:28 +03:00
. llseek = cifs_llseek ,
2008-10-23 08:42:37 +04:00
. setlease = cifs_setlease ,
2014-08-17 17:38:47 +04:00
. fallocate = cifs_fallocate ,
2005-04-17 02:20:36 +04:00
} ;
2010-12-12 13:11:13 +03:00
2006-03-28 13:56:42 +04:00
const struct file_operations cifs_file_nobrl_ops = {
2014-05-23 14:50:21 +04:00
. read_iter = cifs_loose_read_iter ,
2014-04-03 20:05:17 +04:00
. write_iter = cifs_file_write_iter ,
2005-11-18 04:03:00 +03:00
. open = cifs_open ,
. release = cifs_close ,
. fsync = cifs_fsync ,
. flush = cifs_flush ,
. mmap = cifs_file_mmap ,
2007-06-01 13:49:19 +04:00
. splice_read = generic_file_splice_read ,
2017-12-28 16:23:08 +03:00
. splice_write = iter_file_splice_write ,
2006-01-13 01:41:28 +03:00
. llseek = cifs_llseek ,
2008-05-15 09:51:55 +04:00
. unlocked_ioctl = cifs_ioctl ,
2017-02-10 13:33:51 +03:00
. copy_file_range = cifs_copy_file_range ,
2018-10-30 02:41:21 +03:00
. remap_file_range = cifs_remap_file_range ,
2008-10-23 08:42:37 +04:00
. setlease = cifs_setlease ,
2014-08-17 17:38:47 +04:00
. fallocate = cifs_fallocate ,
2005-11-11 22:41:00 +03:00
} ;
2010-12-12 13:11:13 +03:00
const struct file_operations cifs_file_strict_nobrl_ops = {
2014-04-03 03:53:36 +04:00
. read_iter = cifs_strict_readv ,
2014-04-03 20:05:17 +04:00
. write_iter = cifs_strict_writev ,
2010-12-12 13:11:13 +03:00
. open = cifs_open ,
. release = cifs_close ,
. fsync = cifs_strict_fsync ,
. flush = cifs_flush ,
2010-12-14 11:29:51 +03:00
. mmap = cifs_file_strict_mmap ,
2010-12-12 13:11:13 +03:00
. splice_read = generic_file_splice_read ,
2017-12-28 16:23:08 +03:00
. splice_write = iter_file_splice_write ,
2010-12-12 13:11:13 +03:00
. llseek = cifs_llseek ,
. unlocked_ioctl = cifs_ioctl ,
2017-02-10 13:33:51 +03:00
. copy_file_range = cifs_copy_file_range ,
2018-10-30 02:41:21 +03:00
. remap_file_range = cifs_remap_file_range ,
2010-12-12 13:11:13 +03:00
. setlease = cifs_setlease ,
2014-08-17 17:38:47 +04:00
. fallocate = cifs_fallocate ,
2010-12-12 13:11:13 +03:00
} ;
2006-03-28 13:56:42 +04:00
const struct file_operations cifs_file_direct_nobrl_ops = {
2018-11-01 01:13:11 +03:00
. read_iter = cifs_direct_readv ,
. write_iter = cifs_direct_writev ,
2005-11-18 04:03:00 +03:00
. open = cifs_open ,
. release = cifs_close ,
. fsync = cifs_fsync ,
. flush = cifs_flush ,
2010-03-27 05:00:49 +03:00
. mmap = cifs_file_mmap ,
2007-06-01 13:49:19 +04:00
. splice_read = generic_file_splice_read ,
2017-12-28 16:23:08 +03:00
. splice_write = iter_file_splice_write ,
2008-05-15 09:51:55 +04:00
. unlocked_ioctl = cifs_ioctl ,
2017-02-10 13:33:51 +03:00
. copy_file_range = cifs_copy_file_range ,
2018-10-30 02:41:21 +03:00
. remap_file_range = cifs_remap_file_range ,
2006-01-13 01:41:28 +03:00
. llseek = cifs_llseek ,
2008-10-23 08:42:37 +04:00
. setlease = cifs_setlease ,
2014-08-17 17:38:47 +04:00
. fallocate = cifs_fallocate ,
2005-11-11 22:41:00 +03:00
} ;
2005-04-17 02:20:36 +04:00
2006-03-28 13:56:42 +04:00
const struct file_operations cifs_dir_ops = {
2016-04-21 00:40:47 +03:00
. iterate_shared = cifs_readdir ,
2005-04-17 02:20:36 +04:00
. release = cifs_closedir ,
. read = generic_read_dir ,
2008-05-15 09:51:55 +04:00
. unlocked_ioctl = cifs_ioctl ,
2017-02-10 13:33:51 +03:00
. copy_file_range = cifs_copy_file_range ,
2018-10-30 02:41:21 +03:00
. remap_file_range = cifs_remap_file_range ,
2008-09-03 23:53:01 +04:00
. llseek = generic_file_llseek ,
2018-05-10 18:59:37 +03:00
. fsync = cifs_dir_fsync ,
2005-04-17 02:20:36 +04:00
} ;
static void
2008-07-26 06:45:34 +04:00
cifs_init_once ( void * inode )
2005-04-17 02:20:36 +04:00
{
struct cifsInodeInfo * cifsi = inode ;
2007-05-17 09:10:57 +04:00
inode_init_once ( & cifsi - > vfs_inode ) ;
2012-09-19 17:22:44 +04:00
init_rwsem ( & cifsi - > lock_sem ) ;
2005-04-17 02:20:36 +04:00
}
2014-04-04 01:46:30 +04:00
static int __init
2005-04-17 02:20:36 +04:00
cifs_init_inodecache ( void )
{
cifs_inode_cachep = kmem_cache_create ( " cifs_inode_cache " ,
2007-08-31 02:09:15 +04:00
sizeof ( struct cifsInodeInfo ) ,
2006-03-24 14:16:06 +03:00
0 , ( SLAB_RECLAIM_ACCOUNT |
2016-01-15 02:18:21 +03:00
SLAB_MEM_SPREAD | SLAB_ACCOUNT ) ,
2007-07-20 05:11:58 +04:00
cifs_init_once ) ;
2005-04-17 02:20:36 +04:00
if ( cifs_inode_cachep = = NULL )
return - ENOMEM ;
return 0 ;
}
static void
cifs_destroy_inodecache ( void )
{
2012-09-26 05:33:07 +04:00
/*
* Make sure all delayed rcu free inodes are flushed before we
* destroy cache .
*/
rcu_barrier ( ) ;
2006-09-27 12:49:40 +04:00
kmem_cache_destroy ( cifs_inode_cachep ) ;
2005-04-17 02:20:36 +04:00
}
static int
cifs_init_request_bufs ( void )
{
2012-01-12 22:40:50 +04:00
/*
* SMB2 maximum header size is bigger than CIFS one - no problems to
* allocate some more bytes for CIFS .
*/
2017-07-09 02:48:15 +03:00
size_t max_hdr_size = MAX_SMB2_HDR_SIZE ;
2007-05-01 00:13:06 +04:00
if ( CIFSMaxBufSize < 8192 ) {
2005-04-17 02:20:36 +04:00
/* Buffer size can not be smaller than 2 * PATH_MAX since maximum
Unicode path name has to fit in any SMB / CIFS path based frames */
CIFSMaxBufSize = 8192 ;
} else if ( CIFSMaxBufSize > 1024 * 127 ) {
CIFSMaxBufSize = 1024 * 127 ;
} else {
CIFSMaxBufSize & = 0x1FE00 ; /* Round size to even 512 byte mult*/
}
2013-05-05 07:12:25 +04:00
/*
cifs_dbg ( VFS , " CIFSMaxBufSize %d 0x%x \n " ,
CIFSMaxBufSize , CIFSMaxBufSize ) ;
*/
2017-06-11 05:50:33 +03:00
cifs_req_cachep = kmem_cache_create_usercopy ( " cifs_request " ,
2012-01-12 22:40:50 +04:00
CIFSMaxBufSize + max_hdr_size , 0 ,
2017-06-11 05:50:33 +03:00
SLAB_HWCACHE_ALIGN , 0 ,
CIFSMaxBufSize + max_hdr_size ,
NULL ) ;
2005-04-17 02:20:36 +04:00
if ( cifs_req_cachep = = NULL )
return - ENOMEM ;
2007-05-01 00:13:06 +04:00
if ( cifs_min_rcv < 1 )
2005-04-17 02:20:36 +04:00
cifs_min_rcv = 1 ;
else if ( cifs_min_rcv > 64 ) {
cifs_min_rcv = 64 ;
2013-05-05 07:12:25 +04:00
cifs_dbg ( VFS , " cifs_min_rcv set to maximum (64) \n " ) ;
2005-04-17 02:20:36 +04:00
}
2006-03-26 13:37:50 +04:00
cifs_req_poolp = mempool_create_slab_pool ( cifs_min_rcv ,
cifs_req_cachep ) ;
2005-04-17 02:20:36 +04:00
2007-05-01 00:13:06 +04:00
if ( cifs_req_poolp = = NULL ) {
2005-04-17 02:20:36 +04:00
kmem_cache_destroy ( cifs_req_cachep ) ;
return - ENOMEM ;
}
2005-12-13 07:53:18 +03:00
/* MAX_CIFS_SMALL_BUFFER_SIZE bytes is enough for most SMB responses and
2005-04-17 02:20:36 +04:00
almost all handle based requests ( but not write response , nor is it
sufficient for path based requests ) . A smaller size would have
2007-07-13 04:33:32 +04:00
been more efficient ( compacting multiple slab items on one 4 k page )
2005-04-17 02:20:36 +04:00
for the case in which debug was on , but this larger size allows
more SMBs to use small buffer alloc and is still much more
2007-07-07 03:13:06 +04:00
efficient to alloc 1 per page off the slab compared to 17 K ( 5 page )
2005-04-17 02:20:36 +04:00
alloc of large cifs buffers even when page debugging is on */
2017-06-11 05:50:33 +03:00
cifs_sm_req_cachep = kmem_cache_create_usercopy ( " cifs_small_rq " ,
2007-07-07 03:13:06 +04:00
MAX_CIFS_SMALL_BUFFER_SIZE , 0 , SLAB_HWCACHE_ALIGN ,
2017-06-11 05:50:33 +03:00
0 , MAX_CIFS_SMALL_BUFFER_SIZE , NULL ) ;
2005-04-17 02:20:36 +04:00
if ( cifs_sm_req_cachep = = NULL ) {
mempool_destroy ( cifs_req_poolp ) ;
kmem_cache_destroy ( cifs_req_cachep ) ;
2007-07-07 03:13:06 +04:00
return - ENOMEM ;
2005-04-17 02:20:36 +04:00
}
2007-05-01 00:13:06 +04:00
if ( cifs_min_small < 2 )
2005-04-17 02:20:36 +04:00
cifs_min_small = 2 ;
else if ( cifs_min_small > 256 ) {
cifs_min_small = 256 ;
2013-05-05 07:12:25 +04:00
cifs_dbg ( FYI , " cifs_min_small set to maximum (256) \n " ) ;
2005-04-17 02:20:36 +04:00
}
2006-03-26 13:37:50 +04:00
cifs_sm_req_poolp = mempool_create_slab_pool ( cifs_min_small ,
cifs_sm_req_cachep ) ;
2005-04-17 02:20:36 +04:00
2007-05-01 00:13:06 +04:00
if ( cifs_sm_req_poolp = = NULL ) {
2005-04-17 02:20:36 +04:00
mempool_destroy ( cifs_req_poolp ) ;
kmem_cache_destroy ( cifs_req_cachep ) ;
kmem_cache_destroy ( cifs_sm_req_cachep ) ;
return - ENOMEM ;
}
return 0 ;
}
static void
cifs_destroy_request_bufs ( void )
{
mempool_destroy ( cifs_req_poolp ) ;
2006-09-27 12:49:40 +04:00
kmem_cache_destroy ( cifs_req_cachep ) ;
2005-04-17 02:20:36 +04:00
mempool_destroy ( cifs_sm_req_poolp ) ;
2006-09-27 12:49:40 +04:00
kmem_cache_destroy ( cifs_sm_req_cachep ) ;
2005-04-17 02:20:36 +04:00
}
static int
cifs_init_mids ( void )
{
cifs_mid_cachep = kmem_cache_create ( " cifs_mpx_ids " ,
2007-08-31 02:09:15 +04:00
sizeof ( struct mid_q_entry ) , 0 ,
SLAB_HWCACHE_ALIGN , NULL ) ;
2005-04-17 02:20:36 +04:00
if ( cifs_mid_cachep = = NULL )
return - ENOMEM ;
2006-03-26 13:37:50 +04:00
/* 3 is a reasonable minimum number of simultaneous operations */
cifs_mid_poolp = mempool_create_slab_pool ( 3 , cifs_mid_cachep ) ;
2007-05-01 00:13:06 +04:00
if ( cifs_mid_poolp = = NULL ) {
2005-04-17 02:20:36 +04:00
kmem_cache_destroy ( cifs_mid_cachep ) ;
return - ENOMEM ;
}
return 0 ;
}
static void
cifs_destroy_mids ( void )
{
mempool_destroy ( cifs_mid_poolp ) ;
2006-09-27 12:49:40 +04:00
kmem_cache_destroy ( cifs_mid_cachep ) ;
2005-04-17 02:20:36 +04:00
}
static int __init
init_cifs ( void )
{
int rc = 0 ;
cifs_proc_init ( ) ;
2008-11-14 21:44:38 +03:00
INIT_LIST_HEAD ( & cifs_tcp_ses_list ) ;
2011-02-24 08:39:23 +03:00
# ifdef CONFIG_CIFS_DNOTIFY_EXPERIMENTAL /* unused temporarily */
2005-10-11 06:52:13 +04:00
INIT_LIST_HEAD ( & GlobalDnotifyReqList ) ;
INIT_LIST_HEAD ( & GlobalDnotifyRsp_Q ) ;
2011-02-24 08:39:23 +03:00
# endif /* was needed for dnotify, and will be needed for inotify when VFS fix */
2005-04-17 02:20:36 +04:00
/*
* Initialize Global counters
*/
atomic_set ( & sesInfoAllocCount , 0 ) ;
atomic_set ( & tconInfoAllocCount , 0 ) ;
2007-07-07 03:13:06 +04:00
atomic_set ( & tcpSesAllocCount , 0 ) ;
2005-04-17 02:20:36 +04:00
atomic_set ( & tcpSesReconnectCount , 0 ) ;
atomic_set ( & tconInfoReconnectCount , 0 ) ;
atomic_set ( & bufAllocCount , 0 ) ;
2005-12-04 00:58:57 +03:00
atomic_set ( & smBufAllocCount , 0 ) ;
# ifdef CONFIG_CIFS_STATS2
atomic_set ( & totBufAllocCount , 0 ) ;
atomic_set ( & totSmBufAllocCount , 0 ) ;
2018-09-18 22:05:18 +03:00
if ( slow_rsp_threshold < 1 )
cifs_dbg ( FYI , " slow_response_threshold msgs disabled \n " ) ;
else if ( slow_rsp_threshold > 32767 )
cifs_dbg ( VFS ,
" slow response threshold set higher than recommended (0 to 32767) \n " ) ;
2005-12-04 00:58:57 +03:00
# endif /* CONFIG_CIFS_STATS2 */
2005-04-17 02:20:36 +04:00
atomic_set ( & midCount , 0 ) ;
GlobalCurrentXid = 0 ;
GlobalTotalActiveXid = 0 ;
GlobalMaxActiveXid = 0 ;
2010-10-18 21:59:37 +04:00
spin_lock_init ( & cifs_tcp_ses_lock ) ;
2005-04-17 02:20:36 +04:00
spin_lock_init ( & GlobalMid_Lock ) ;
2017-06-08 03:42:50 +03:00
cifs_lock_secret = get_random_u32 ( ) ;
2016-05-24 13:27:44 +03:00
2007-05-01 00:13:06 +04:00
if ( cifs_max_pending < 2 ) {
2005-04-17 02:20:36 +04:00
cifs_max_pending = 2 ;
2013-05-05 07:12:25 +04:00
cifs_dbg ( FYI , " cifs_max_pending set to min of 2 \n " ) ;
2012-03-20 13:55:09 +04:00
} else if ( cifs_max_pending > CIFS_MAX_REQ ) {
cifs_max_pending = CIFS_MAX_REQ ;
2013-05-05 07:12:25 +04:00
cifs_dbg ( FYI , " cifs_max_pending set to max of %u \n " ,
CIFS_MAX_REQ ) ;
2005-04-17 02:20:36 +04:00
}
2012-03-23 22:40:53 +04:00
cifsiod_wq = alloc_workqueue ( " cifsiod " , WQ_FREEZABLE | WQ_MEM_RECLAIM , 0 ) ;
if ( ! cifsiod_wq ) {
rc = - ENOMEM ;
goto out_clean_proc ;
}
2019-09-07 09:09:49 +03:00
/*
2019-09-09 21:30:15 +03:00
* Consider in future setting limit ! = 0 maybe to min ( num_of_cores - 1 , 3 )
* so that we don ' t launch too many worker threads but
2019-09-24 16:01:28 +03:00
* Documentation / core - api / workqueue . rst recommends setting it to 0
2019-09-07 09:09:49 +03:00
*/
2019-09-09 21:30:15 +03:00
/* WQ_UNBOUND allows decrypt tasks to run on any CPU */
2019-09-07 09:09:49 +03:00
decrypt_wq = alloc_workqueue ( " smb3decryptd " ,
2019-09-09 21:30:15 +03:00
WQ_UNBOUND | WQ_FREEZABLE | WQ_MEM_RECLAIM , 0 ) ;
2019-09-07 09:09:49 +03:00
if ( ! decrypt_wq ) {
rc = - ENOMEM ;
goto out_destroy_cifsiod_wq ;
}
cifs: move cifsFileInfo_put logic into a work-queue
This patch moves the final part of the cifsFileInfo_put() logic where we
need a write lock on lock_sem to be processed in a separate thread that
holds no other locks.
This is to prevent deadlocks like the one below:
> there are 6 processes looping to while trying to down_write
> cinode->lock_sem, 5 of them from _cifsFileInfo_put, and one from
> cifs_new_fileinfo
>
> and there are 5 other processes which are blocked, several of them
> waiting on either PG_writeback or PG_locked (which are both set), all
> for the same page of the file
>
> 2 inode_lock() (inode->i_rwsem) for the file
> 1 wait_on_page_writeback() for the page
> 1 down_read(inode->i_rwsem) for the inode of the directory
> 1 inode_lock()(inode->i_rwsem) for the inode of the directory
> 1 __lock_page
>
>
> so processes are blocked waiting on:
> page flags PG_locked and PG_writeback for one specific page
> inode->i_rwsem for the directory
> inode->i_rwsem for the file
> cifsInodeInflock_sem
>
>
>
> here are the more gory details (let me know if I need to provide
> anything more/better):
>
> [0 00:48:22.765] [UN] PID: 8863 TASK: ffff8c691547c5c0 CPU: 3
> COMMAND: "reopen_file"
> #0 [ffff9965007e3ba8] __schedule at ffffffff9b6e6095
> #1 [ffff9965007e3c38] schedule at ffffffff9b6e64df
> #2 [ffff9965007e3c48] rwsem_down_write_slowpath at ffffffff9af283d7
> #3 [ffff9965007e3cb8] legitimize_path at ffffffff9b0f975d
> #4 [ffff9965007e3d08] path_openat at ffffffff9b0fe55d
> #5 [ffff9965007e3dd8] do_filp_open at ffffffff9b100a33
> #6 [ffff9965007e3ee0] do_sys_open at ffffffff9b0eb2d6
> #7 [ffff9965007e3f38] do_syscall_64 at ffffffff9ae04315
> * (I think legitimize_path is bogus)
>
> in path_openat
> } else {
> const char *s = path_init(nd, flags);
> while (!(error = link_path_walk(s, nd)) &&
> (error = do_last(nd, file, op)) > 0) { <<<<
>
> do_last:
> if (open_flag & O_CREAT)
> inode_lock(dir->d_inode); <<<<
> else
> so it's trying to take inode->i_rwsem for the directory
>
> DENTRY INODE SUPERBLK TYPE PATH
> ffff8c68bb8e79c0 ffff8c691158ef20 ffff8c6915bf9000 DIR /mnt/vm1_smb/
> inode.i_rwsem is ffff8c691158efc0
>
> <struct rw_semaphore 0xffff8c691158efc0>:
> owner: <struct task_struct 0xffff8c6914275d00> (UN - 8856 -
> reopen_file), counter: 0x0000000000000003
> waitlist: 2
> 0xffff9965007e3c90 8863 reopen_file UN 0 1:29:22.926
> RWSEM_WAITING_FOR_WRITE
> 0xffff996500393e00 9802 ls UN 0 1:17:26.700
> RWSEM_WAITING_FOR_READ
>
>
> the owner of the inode.i_rwsem of the directory is:
>
> [0 00:00:00.109] [UN] PID: 8856 TASK: ffff8c6914275d00 CPU: 3
> COMMAND: "reopen_file"
> #0 [ffff99650065b828] __schedule at ffffffff9b6e6095
> #1 [ffff99650065b8b8] schedule at ffffffff9b6e64df
> #2 [ffff99650065b8c8] schedule_timeout at ffffffff9b6e9f89
> #3 [ffff99650065b940] msleep at ffffffff9af573a9
> #4 [ffff99650065b948] _cifsFileInfo_put.cold.63 at ffffffffc0a42dd6 [cifs]
> #5 [ffff99650065ba38] cifs_writepage_locked at ffffffffc0a0b8f3 [cifs]
> #6 [ffff99650065bab0] cifs_launder_page at ffffffffc0a0bb72 [cifs]
> #7 [ffff99650065bb30] invalidate_inode_pages2_range at ffffffff9b04d4bd
> #8 [ffff99650065bcb8] cifs_invalidate_mapping at ffffffffc0a11339 [cifs]
> #9 [ffff99650065bcd0] cifs_revalidate_mapping at ffffffffc0a1139a [cifs]
> #10 [ffff99650065bcf0] cifs_d_revalidate at ffffffffc0a014f6 [cifs]
> #11 [ffff99650065bd08] path_openat at ffffffff9b0fe7f7
> #12 [ffff99650065bdd8] do_filp_open at ffffffff9b100a33
> #13 [ffff99650065bee0] do_sys_open at ffffffff9b0eb2d6
> #14 [ffff99650065bf38] do_syscall_64 at ffffffff9ae04315
>
> cifs_launder_page is for page 0xffffd1e2c07d2480
>
> crash> page.index,mapping,flags 0xffffd1e2c07d2480
> index = 0x8
> mapping = 0xffff8c68f3cd0db0
> flags = 0xfffffc0008095
>
> PAGE-FLAG BIT VALUE
> PG_locked 0 0000001
> PG_uptodate 2 0000004
> PG_lru 4 0000010
> PG_waiters 7 0000080
> PG_writeback 15 0008000
>
>
> inode is ffff8c68f3cd0c40
> inode.i_rwsem is ffff8c68f3cd0ce0
> DENTRY INODE SUPERBLK TYPE PATH
> ffff8c68a1f1b480 ffff8c68f3cd0c40 ffff8c6915bf9000 REG
> /mnt/vm1_smb/testfile.8853
>
>
> this process holds the inode->i_rwsem for the parent directory, is
> laundering a page attached to the inode of the file it's opening, and in
> _cifsFileInfo_put is trying to down_write the cifsInodeInflock_sem
> for the file itself.
>
>
> <struct rw_semaphore 0xffff8c68f3cd0ce0>:
> owner: <struct task_struct 0xffff8c6914272e80> (UN - 8854 -
> reopen_file), counter: 0x0000000000000003
> waitlist: 1
> 0xffff9965005dfd80 8855 reopen_file UN 0 1:29:22.912
> RWSEM_WAITING_FOR_WRITE
>
> this is the inode.i_rwsem for the file
>
> the owner:
>
> [0 00:48:22.739] [UN] PID: 8854 TASK: ffff8c6914272e80 CPU: 2
> COMMAND: "reopen_file"
> #0 [ffff99650054fb38] __schedule at ffffffff9b6e6095
> #1 [ffff99650054fbc8] schedule at ffffffff9b6e64df
> #2 [ffff99650054fbd8] io_schedule at ffffffff9b6e68e2
> #3 [ffff99650054fbe8] __lock_page at ffffffff9b03c56f
> #4 [ffff99650054fc80] pagecache_get_page at ffffffff9b03dcdf
> #5 [ffff99650054fcc0] grab_cache_page_write_begin at ffffffff9b03ef4c
> #6 [ffff99650054fcd0] cifs_write_begin at ffffffffc0a064ec [cifs]
> #7 [ffff99650054fd30] generic_perform_write at ffffffff9b03bba4
> #8 [ffff99650054fda8] __generic_file_write_iter at ffffffff9b04060a
> #9 [ffff99650054fdf0] cifs_strict_writev.cold.70 at ffffffffc0a4469b [cifs]
> #10 [ffff99650054fe48] new_sync_write at ffffffff9b0ec1dd
> #11 [ffff99650054fed0] vfs_write at ffffffff9b0eed35
> #12 [ffff99650054ff00] ksys_write at ffffffff9b0eefd9
> #13 [ffff99650054ff38] do_syscall_64 at ffffffff9ae04315
>
> the process holds the inode->i_rwsem for the file to which it's writing,
> and is trying to __lock_page for the same page as in the other processes
>
>
> the other tasks:
> [0 00:00:00.028] [UN] PID: 8859 TASK: ffff8c6915479740 CPU: 2
> COMMAND: "reopen_file"
> #0 [ffff9965007b39d8] __schedule at ffffffff9b6e6095
> #1 [ffff9965007b3a68] schedule at ffffffff9b6e64df
> #2 [ffff9965007b3a78] schedule_timeout at ffffffff9b6e9f89
> #3 [ffff9965007b3af0] msleep at ffffffff9af573a9
> #4 [ffff9965007b3af8] cifs_new_fileinfo.cold.61 at ffffffffc0a42a07 [cifs]
> #5 [ffff9965007b3b78] cifs_open at ffffffffc0a0709d [cifs]
> #6 [ffff9965007b3cd8] do_dentry_open at ffffffff9b0e9b7a
> #7 [ffff9965007b3d08] path_openat at ffffffff9b0fe34f
> #8 [ffff9965007b3dd8] do_filp_open at ffffffff9b100a33
> #9 [ffff9965007b3ee0] do_sys_open at ffffffff9b0eb2d6
> #10 [ffff9965007b3f38] do_syscall_64 at ffffffff9ae04315
>
> this is opening the file, and is trying to down_write cinode->lock_sem
>
>
> [0 00:00:00.041] [UN] PID: 8860 TASK: ffff8c691547ae80 CPU: 2
> COMMAND: "reopen_file"
> [0 00:00:00.057] [UN] PID: 8861 TASK: ffff8c6915478000 CPU: 3
> COMMAND: "reopen_file"
> [0 00:00:00.059] [UN] PID: 8858 TASK: ffff8c6914271740 CPU: 2
> COMMAND: "reopen_file"
> [0 00:00:00.109] [UN] PID: 8862 TASK: ffff8c691547dd00 CPU: 6
> COMMAND: "reopen_file"
> #0 [ffff9965007c3c78] __schedule at ffffffff9b6e6095
> #1 [ffff9965007c3d08] schedule at ffffffff9b6e64df
> #2 [ffff9965007c3d18] schedule_timeout at ffffffff9b6e9f89
> #3 [ffff9965007c3d90] msleep at ffffffff9af573a9
> #4 [ffff9965007c3d98] _cifsFileInfo_put.cold.63 at ffffffffc0a42dd6 [cifs]
> #5 [ffff9965007c3e88] cifs_close at ffffffffc0a07aaf [cifs]
> #6 [ffff9965007c3ea0] __fput at ffffffff9b0efa6e
> #7 [ffff9965007c3ee8] task_work_run at ffffffff9aef1614
> #8 [ffff9965007c3f20] exit_to_usermode_loop at ffffffff9ae03d6f
> #9 [ffff9965007c3f38] do_syscall_64 at ffffffff9ae0444c
>
> closing the file, and trying to down_write cifsi->lock_sem
>
>
> [0 00:48:22.839] [UN] PID: 8857 TASK: ffff8c6914270000 CPU: 7
> COMMAND: "reopen_file"
> #0 [ffff9965006a7cc8] __schedule at ffffffff9b6e6095
> #1 [ffff9965006a7d58] schedule at ffffffff9b6e64df
> #2 [ffff9965006a7d68] io_schedule at ffffffff9b6e68e2
> #3 [ffff9965006a7d78] wait_on_page_bit at ffffffff9b03cac6
> #4 [ffff9965006a7e10] __filemap_fdatawait_range at ffffffff9b03b028
> #5 [ffff9965006a7ed8] filemap_write_and_wait at ffffffff9b040165
> #6 [ffff9965006a7ef0] cifs_flush at ffffffffc0a0c2fa [cifs]
> #7 [ffff9965006a7f10] filp_close at ffffffff9b0e93f1
> #8 [ffff9965006a7f30] __x64_sys_close at ffffffff9b0e9a0e
> #9 [ffff9965006a7f38] do_syscall_64 at ffffffff9ae04315
>
> in __filemap_fdatawait_range
> wait_on_page_writeback(page);
> for the same page of the file
>
>
>
> [0 00:48:22.718] [UN] PID: 8855 TASK: ffff8c69142745c0 CPU: 7
> COMMAND: "reopen_file"
> #0 [ffff9965005dfc98] __schedule at ffffffff9b6e6095
> #1 [ffff9965005dfd28] schedule at ffffffff9b6e64df
> #2 [ffff9965005dfd38] rwsem_down_write_slowpath at ffffffff9af283d7
> #3 [ffff9965005dfdf0] cifs_strict_writev at ffffffffc0a0c40a [cifs]
> #4 [ffff9965005dfe48] new_sync_write at ffffffff9b0ec1dd
> #5 [ffff9965005dfed0] vfs_write at ffffffff9b0eed35
> #6 [ffff9965005dff00] ksys_write at ffffffff9b0eefd9
> #7 [ffff9965005dff38] do_syscall_64 at ffffffff9ae04315
>
> inode_lock(inode);
>
>
> and one 'ls' later on, to see whether the rest of the mount is available
> (the test file is in the root, so we get blocked up on the directory
> ->i_rwsem), so the entire mount is unavailable
>
> [0 00:36:26.473] [UN] PID: 9802 TASK: ffff8c691436ae80 CPU: 4
> COMMAND: "ls"
> #0 [ffff996500393d28] __schedule at ffffffff9b6e6095
> #1 [ffff996500393db8] schedule at ffffffff9b6e64df
> #2 [ffff996500393dc8] rwsem_down_read_slowpath at ffffffff9b6e9421
> #3 [ffff996500393e78] down_read_killable at ffffffff9b6e95e2
> #4 [ffff996500393e88] iterate_dir at ffffffff9b103c56
> #5 [ffff996500393ec8] ksys_getdents64 at ffffffff9b104b0c
> #6 [ffff996500393f30] __x64_sys_getdents64 at ffffffff9b104bb6
> #7 [ffff996500393f38] do_syscall_64 at ffffffff9ae04315
>
> in iterate_dir:
> if (shared)
> res = down_read_killable(&inode->i_rwsem); <<<<
> else
> res = down_write_killable(&inode->i_rwsem);
>
Reported-by: Frank Sorenson <sorenson@redhat.com>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
Signed-off-by: Ronnie Sahlberg <lsahlber@redhat.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
2019-11-03 06:06:37 +03:00
fileinfo_put_wq = alloc_workqueue ( " cifsfileinfoput " ,
WQ_UNBOUND | WQ_FREEZABLE | WQ_MEM_RECLAIM , 0 ) ;
if ( ! fileinfo_put_wq ) {
rc = - ENOMEM ;
goto out_destroy_decrypt_wq ;
}
2017-05-03 18:54:01 +03:00
cifsoplockd_wq = alloc_workqueue ( " cifsoplockd " ,
WQ_FREEZABLE | WQ_MEM_RECLAIM , 0 ) ;
if ( ! cifsoplockd_wq ) {
rc = - ENOMEM ;
cifs: move cifsFileInfo_put logic into a work-queue
This patch moves the final part of the cifsFileInfo_put() logic where we
need a write lock on lock_sem to be processed in a separate thread that
holds no other locks.
This is to prevent deadlocks like the one below:
> there are 6 processes looping to while trying to down_write
> cinode->lock_sem, 5 of them from _cifsFileInfo_put, and one from
> cifs_new_fileinfo
>
> and there are 5 other processes which are blocked, several of them
> waiting on either PG_writeback or PG_locked (which are both set), all
> for the same page of the file
>
> 2 inode_lock() (inode->i_rwsem) for the file
> 1 wait_on_page_writeback() for the page
> 1 down_read(inode->i_rwsem) for the inode of the directory
> 1 inode_lock()(inode->i_rwsem) for the inode of the directory
> 1 __lock_page
>
>
> so processes are blocked waiting on:
> page flags PG_locked and PG_writeback for one specific page
> inode->i_rwsem for the directory
> inode->i_rwsem for the file
> cifsInodeInflock_sem
>
>
>
> here are the more gory details (let me know if I need to provide
> anything more/better):
>
> [0 00:48:22.765] [UN] PID: 8863 TASK: ffff8c691547c5c0 CPU: 3
> COMMAND: "reopen_file"
> #0 [ffff9965007e3ba8] __schedule at ffffffff9b6e6095
> #1 [ffff9965007e3c38] schedule at ffffffff9b6e64df
> #2 [ffff9965007e3c48] rwsem_down_write_slowpath at ffffffff9af283d7
> #3 [ffff9965007e3cb8] legitimize_path at ffffffff9b0f975d
> #4 [ffff9965007e3d08] path_openat at ffffffff9b0fe55d
> #5 [ffff9965007e3dd8] do_filp_open at ffffffff9b100a33
> #6 [ffff9965007e3ee0] do_sys_open at ffffffff9b0eb2d6
> #7 [ffff9965007e3f38] do_syscall_64 at ffffffff9ae04315
> * (I think legitimize_path is bogus)
>
> in path_openat
> } else {
> const char *s = path_init(nd, flags);
> while (!(error = link_path_walk(s, nd)) &&
> (error = do_last(nd, file, op)) > 0) { <<<<
>
> do_last:
> if (open_flag & O_CREAT)
> inode_lock(dir->d_inode); <<<<
> else
> so it's trying to take inode->i_rwsem for the directory
>
> DENTRY INODE SUPERBLK TYPE PATH
> ffff8c68bb8e79c0 ffff8c691158ef20 ffff8c6915bf9000 DIR /mnt/vm1_smb/
> inode.i_rwsem is ffff8c691158efc0
>
> <struct rw_semaphore 0xffff8c691158efc0>:
> owner: <struct task_struct 0xffff8c6914275d00> (UN - 8856 -
> reopen_file), counter: 0x0000000000000003
> waitlist: 2
> 0xffff9965007e3c90 8863 reopen_file UN 0 1:29:22.926
> RWSEM_WAITING_FOR_WRITE
> 0xffff996500393e00 9802 ls UN 0 1:17:26.700
> RWSEM_WAITING_FOR_READ
>
>
> the owner of the inode.i_rwsem of the directory is:
>
> [0 00:00:00.109] [UN] PID: 8856 TASK: ffff8c6914275d00 CPU: 3
> COMMAND: "reopen_file"
> #0 [ffff99650065b828] __schedule at ffffffff9b6e6095
> #1 [ffff99650065b8b8] schedule at ffffffff9b6e64df
> #2 [ffff99650065b8c8] schedule_timeout at ffffffff9b6e9f89
> #3 [ffff99650065b940] msleep at ffffffff9af573a9
> #4 [ffff99650065b948] _cifsFileInfo_put.cold.63 at ffffffffc0a42dd6 [cifs]
> #5 [ffff99650065ba38] cifs_writepage_locked at ffffffffc0a0b8f3 [cifs]
> #6 [ffff99650065bab0] cifs_launder_page at ffffffffc0a0bb72 [cifs]
> #7 [ffff99650065bb30] invalidate_inode_pages2_range at ffffffff9b04d4bd
> #8 [ffff99650065bcb8] cifs_invalidate_mapping at ffffffffc0a11339 [cifs]
> #9 [ffff99650065bcd0] cifs_revalidate_mapping at ffffffffc0a1139a [cifs]
> #10 [ffff99650065bcf0] cifs_d_revalidate at ffffffffc0a014f6 [cifs]
> #11 [ffff99650065bd08] path_openat at ffffffff9b0fe7f7
> #12 [ffff99650065bdd8] do_filp_open at ffffffff9b100a33
> #13 [ffff99650065bee0] do_sys_open at ffffffff9b0eb2d6
> #14 [ffff99650065bf38] do_syscall_64 at ffffffff9ae04315
>
> cifs_launder_page is for page 0xffffd1e2c07d2480
>
> crash> page.index,mapping,flags 0xffffd1e2c07d2480
> index = 0x8
> mapping = 0xffff8c68f3cd0db0
> flags = 0xfffffc0008095
>
> PAGE-FLAG BIT VALUE
> PG_locked 0 0000001
> PG_uptodate 2 0000004
> PG_lru 4 0000010
> PG_waiters 7 0000080
> PG_writeback 15 0008000
>
>
> inode is ffff8c68f3cd0c40
> inode.i_rwsem is ffff8c68f3cd0ce0
> DENTRY INODE SUPERBLK TYPE PATH
> ffff8c68a1f1b480 ffff8c68f3cd0c40 ffff8c6915bf9000 REG
> /mnt/vm1_smb/testfile.8853
>
>
> this process holds the inode->i_rwsem for the parent directory, is
> laundering a page attached to the inode of the file it's opening, and in
> _cifsFileInfo_put is trying to down_write the cifsInodeInflock_sem
> for the file itself.
>
>
> <struct rw_semaphore 0xffff8c68f3cd0ce0>:
> owner: <struct task_struct 0xffff8c6914272e80> (UN - 8854 -
> reopen_file), counter: 0x0000000000000003
> waitlist: 1
> 0xffff9965005dfd80 8855 reopen_file UN 0 1:29:22.912
> RWSEM_WAITING_FOR_WRITE
>
> this is the inode.i_rwsem for the file
>
> the owner:
>
> [0 00:48:22.739] [UN] PID: 8854 TASK: ffff8c6914272e80 CPU: 2
> COMMAND: "reopen_file"
> #0 [ffff99650054fb38] __schedule at ffffffff9b6e6095
> #1 [ffff99650054fbc8] schedule at ffffffff9b6e64df
> #2 [ffff99650054fbd8] io_schedule at ffffffff9b6e68e2
> #3 [ffff99650054fbe8] __lock_page at ffffffff9b03c56f
> #4 [ffff99650054fc80] pagecache_get_page at ffffffff9b03dcdf
> #5 [ffff99650054fcc0] grab_cache_page_write_begin at ffffffff9b03ef4c
> #6 [ffff99650054fcd0] cifs_write_begin at ffffffffc0a064ec [cifs]
> #7 [ffff99650054fd30] generic_perform_write at ffffffff9b03bba4
> #8 [ffff99650054fda8] __generic_file_write_iter at ffffffff9b04060a
> #9 [ffff99650054fdf0] cifs_strict_writev.cold.70 at ffffffffc0a4469b [cifs]
> #10 [ffff99650054fe48] new_sync_write at ffffffff9b0ec1dd
> #11 [ffff99650054fed0] vfs_write at ffffffff9b0eed35
> #12 [ffff99650054ff00] ksys_write at ffffffff9b0eefd9
> #13 [ffff99650054ff38] do_syscall_64 at ffffffff9ae04315
>
> the process holds the inode->i_rwsem for the file to which it's writing,
> and is trying to __lock_page for the same page as in the other processes
>
>
> the other tasks:
> [0 00:00:00.028] [UN] PID: 8859 TASK: ffff8c6915479740 CPU: 2
> COMMAND: "reopen_file"
> #0 [ffff9965007b39d8] __schedule at ffffffff9b6e6095
> #1 [ffff9965007b3a68] schedule at ffffffff9b6e64df
> #2 [ffff9965007b3a78] schedule_timeout at ffffffff9b6e9f89
> #3 [ffff9965007b3af0] msleep at ffffffff9af573a9
> #4 [ffff9965007b3af8] cifs_new_fileinfo.cold.61 at ffffffffc0a42a07 [cifs]
> #5 [ffff9965007b3b78] cifs_open at ffffffffc0a0709d [cifs]
> #6 [ffff9965007b3cd8] do_dentry_open at ffffffff9b0e9b7a
> #7 [ffff9965007b3d08] path_openat at ffffffff9b0fe34f
> #8 [ffff9965007b3dd8] do_filp_open at ffffffff9b100a33
> #9 [ffff9965007b3ee0] do_sys_open at ffffffff9b0eb2d6
> #10 [ffff9965007b3f38] do_syscall_64 at ffffffff9ae04315
>
> this is opening the file, and is trying to down_write cinode->lock_sem
>
>
> [0 00:00:00.041] [UN] PID: 8860 TASK: ffff8c691547ae80 CPU: 2
> COMMAND: "reopen_file"
> [0 00:00:00.057] [UN] PID: 8861 TASK: ffff8c6915478000 CPU: 3
> COMMAND: "reopen_file"
> [0 00:00:00.059] [UN] PID: 8858 TASK: ffff8c6914271740 CPU: 2
> COMMAND: "reopen_file"
> [0 00:00:00.109] [UN] PID: 8862 TASK: ffff8c691547dd00 CPU: 6
> COMMAND: "reopen_file"
> #0 [ffff9965007c3c78] __schedule at ffffffff9b6e6095
> #1 [ffff9965007c3d08] schedule at ffffffff9b6e64df
> #2 [ffff9965007c3d18] schedule_timeout at ffffffff9b6e9f89
> #3 [ffff9965007c3d90] msleep at ffffffff9af573a9
> #4 [ffff9965007c3d98] _cifsFileInfo_put.cold.63 at ffffffffc0a42dd6 [cifs]
> #5 [ffff9965007c3e88] cifs_close at ffffffffc0a07aaf [cifs]
> #6 [ffff9965007c3ea0] __fput at ffffffff9b0efa6e
> #7 [ffff9965007c3ee8] task_work_run at ffffffff9aef1614
> #8 [ffff9965007c3f20] exit_to_usermode_loop at ffffffff9ae03d6f
> #9 [ffff9965007c3f38] do_syscall_64 at ffffffff9ae0444c
>
> closing the file, and trying to down_write cifsi->lock_sem
>
>
> [0 00:48:22.839] [UN] PID: 8857 TASK: ffff8c6914270000 CPU: 7
> COMMAND: "reopen_file"
> #0 [ffff9965006a7cc8] __schedule at ffffffff9b6e6095
> #1 [ffff9965006a7d58] schedule at ffffffff9b6e64df
> #2 [ffff9965006a7d68] io_schedule at ffffffff9b6e68e2
> #3 [ffff9965006a7d78] wait_on_page_bit at ffffffff9b03cac6
> #4 [ffff9965006a7e10] __filemap_fdatawait_range at ffffffff9b03b028
> #5 [ffff9965006a7ed8] filemap_write_and_wait at ffffffff9b040165
> #6 [ffff9965006a7ef0] cifs_flush at ffffffffc0a0c2fa [cifs]
> #7 [ffff9965006a7f10] filp_close at ffffffff9b0e93f1
> #8 [ffff9965006a7f30] __x64_sys_close at ffffffff9b0e9a0e
> #9 [ffff9965006a7f38] do_syscall_64 at ffffffff9ae04315
>
> in __filemap_fdatawait_range
> wait_on_page_writeback(page);
> for the same page of the file
>
>
>
> [0 00:48:22.718] [UN] PID: 8855 TASK: ffff8c69142745c0 CPU: 7
> COMMAND: "reopen_file"
> #0 [ffff9965005dfc98] __schedule at ffffffff9b6e6095
> #1 [ffff9965005dfd28] schedule at ffffffff9b6e64df
> #2 [ffff9965005dfd38] rwsem_down_write_slowpath at ffffffff9af283d7
> #3 [ffff9965005dfdf0] cifs_strict_writev at ffffffffc0a0c40a [cifs]
> #4 [ffff9965005dfe48] new_sync_write at ffffffff9b0ec1dd
> #5 [ffff9965005dfed0] vfs_write at ffffffff9b0eed35
> #6 [ffff9965005dff00] ksys_write at ffffffff9b0eefd9
> #7 [ffff9965005dff38] do_syscall_64 at ffffffff9ae04315
>
> inode_lock(inode);
>
>
> and one 'ls' later on, to see whether the rest of the mount is available
> (the test file is in the root, so we get blocked up on the directory
> ->i_rwsem), so the entire mount is unavailable
>
> [0 00:36:26.473] [UN] PID: 9802 TASK: ffff8c691436ae80 CPU: 4
> COMMAND: "ls"
> #0 [ffff996500393d28] __schedule at ffffffff9b6e6095
> #1 [ffff996500393db8] schedule at ffffffff9b6e64df
> #2 [ffff996500393dc8] rwsem_down_read_slowpath at ffffffff9b6e9421
> #3 [ffff996500393e78] down_read_killable at ffffffff9b6e95e2
> #4 [ffff996500393e88] iterate_dir at ffffffff9b103c56
> #5 [ffff996500393ec8] ksys_getdents64 at ffffffff9b104b0c
> #6 [ffff996500393f30] __x64_sys_getdents64 at ffffffff9b104bb6
> #7 [ffff996500393f38] do_syscall_64 at ffffffff9ae04315
>
> in iterate_dir:
> if (shared)
> res = down_read_killable(&inode->i_rwsem); <<<<
> else
> res = down_write_killable(&inode->i_rwsem);
>
Reported-by: Frank Sorenson <sorenson@redhat.com>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
Signed-off-by: Ronnie Sahlberg <lsahlber@redhat.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
2019-11-03 06:06:37 +03:00
goto out_destroy_fileinfo_put_wq ;
2017-05-03 18:54:01 +03:00
}
2010-07-05 16:41:50 +04:00
rc = cifs_fscache_register ( ) ;
if ( rc )
2017-05-03 18:54:01 +03:00
goto out_destroy_cifsoplockd_wq ;
2010-07-05 16:41:50 +04:00
2005-04-17 02:20:36 +04:00
rc = cifs_init_inodecache ( ) ;
2006-04-22 02:52:25 +04:00
if ( rc )
2010-09-22 23:15:36 +04:00
goto out_unreg_fscache ;
2006-04-22 02:52:25 +04:00
rc = cifs_init_mids ( ) ;
if ( rc )
goto out_destroy_inodecache ;
rc = cifs_init_request_bufs ( ) ;
if ( rc )
goto out_destroy_mids ;
2018-11-14 21:24:03 +03:00
# ifdef CONFIG_CIFS_DFS_UPCALL
rc = dfs_cache_init ( ) ;
if ( rc )
goto out_destroy_request_bufs ;
# endif /* CONFIG_CIFS_DFS_UPCALL */
2007-11-03 08:02:24 +03:00
# ifdef CONFIG_CIFS_UPCALL
2016-05-18 02:20:13 +03:00
rc = init_cifs_spnego ( ) ;
2007-11-03 08:02:24 +03:00
if ( rc )
2018-11-14 21:24:03 +03:00
goto out_destroy_dfs_cache ;
2011-04-28 08:34:35 +04:00
# endif /* CONFIG_CIFS_UPCALL */
rc = init_cifs_idmap ( ) ;
if ( rc )
2011-05-06 11:35:00 +04:00
goto out_register_key_type ;
2011-04-28 08:34:35 +04:00
rc = register_filesystem ( & cifs_fs_type ) ;
if ( rc )
2011-05-06 11:35:00 +04:00
goto out_init_cifs_idmap ;
2006-04-22 02:52:25 +04:00
2018-05-24 05:44:53 +03:00
rc = register_filesystem ( & smb3_fs_type ) ;
if ( rc ) {
unregister_filesystem ( & cifs_fs_type ) ;
goto out_init_cifs_idmap ;
}
2006-04-22 02:52:25 +04:00
return 0 ;
2011-05-06 11:35:00 +04:00
out_init_cifs_idmap :
2011-04-28 08:34:35 +04:00
exit_cifs_idmap ( ) ;
2011-05-06 11:35:00 +04:00
out_register_key_type :
2007-11-03 08:02:24 +03:00
# ifdef CONFIG_CIFS_UPCALL
2016-05-18 02:20:13 +03:00
exit_cifs_spnego ( ) ;
2018-11-14 21:24:03 +03:00
out_destroy_dfs_cache :
# endif
# ifdef CONFIG_CIFS_DFS_UPCALL
dfs_cache_destroy ( ) ;
2011-05-06 11:35:00 +04:00
out_destroy_request_bufs :
2010-08-07 23:54:46 +04:00
# endif
2006-04-22 02:52:25 +04:00
cifs_destroy_request_bufs ( ) ;
2010-09-22 23:15:36 +04:00
out_destroy_mids :
2006-04-22 02:52:25 +04:00
cifs_destroy_mids ( ) ;
2010-09-22 23:15:36 +04:00
out_destroy_inodecache :
2006-04-22 02:52:25 +04:00
cifs_destroy_inodecache ( ) ;
2010-09-22 23:15:36 +04:00
out_unreg_fscache :
2010-07-05 16:41:50 +04:00
cifs_fscache_unregister ( ) ;
2017-05-03 18:54:01 +03:00
out_destroy_cifsoplockd_wq :
destroy_workqueue ( cifsoplockd_wq ) ;
cifs: move cifsFileInfo_put logic into a work-queue
This patch moves the final part of the cifsFileInfo_put() logic where we
need a write lock on lock_sem to be processed in a separate thread that
holds no other locks.
This is to prevent deadlocks like the one below:
> there are 6 processes looping to while trying to down_write
> cinode->lock_sem, 5 of them from _cifsFileInfo_put, and one from
> cifs_new_fileinfo
>
> and there are 5 other processes which are blocked, several of them
> waiting on either PG_writeback or PG_locked (which are both set), all
> for the same page of the file
>
> 2 inode_lock() (inode->i_rwsem) for the file
> 1 wait_on_page_writeback() for the page
> 1 down_read(inode->i_rwsem) for the inode of the directory
> 1 inode_lock()(inode->i_rwsem) for the inode of the directory
> 1 __lock_page
>
>
> so processes are blocked waiting on:
> page flags PG_locked and PG_writeback for one specific page
> inode->i_rwsem for the directory
> inode->i_rwsem for the file
> cifsInodeInflock_sem
>
>
>
> here are the more gory details (let me know if I need to provide
> anything more/better):
>
> [0 00:48:22.765] [UN] PID: 8863 TASK: ffff8c691547c5c0 CPU: 3
> COMMAND: "reopen_file"
> #0 [ffff9965007e3ba8] __schedule at ffffffff9b6e6095
> #1 [ffff9965007e3c38] schedule at ffffffff9b6e64df
> #2 [ffff9965007e3c48] rwsem_down_write_slowpath at ffffffff9af283d7
> #3 [ffff9965007e3cb8] legitimize_path at ffffffff9b0f975d
> #4 [ffff9965007e3d08] path_openat at ffffffff9b0fe55d
> #5 [ffff9965007e3dd8] do_filp_open at ffffffff9b100a33
> #6 [ffff9965007e3ee0] do_sys_open at ffffffff9b0eb2d6
> #7 [ffff9965007e3f38] do_syscall_64 at ffffffff9ae04315
> * (I think legitimize_path is bogus)
>
> in path_openat
> } else {
> const char *s = path_init(nd, flags);
> while (!(error = link_path_walk(s, nd)) &&
> (error = do_last(nd, file, op)) > 0) { <<<<
>
> do_last:
> if (open_flag & O_CREAT)
> inode_lock(dir->d_inode); <<<<
> else
> so it's trying to take inode->i_rwsem for the directory
>
> DENTRY INODE SUPERBLK TYPE PATH
> ffff8c68bb8e79c0 ffff8c691158ef20 ffff8c6915bf9000 DIR /mnt/vm1_smb/
> inode.i_rwsem is ffff8c691158efc0
>
> <struct rw_semaphore 0xffff8c691158efc0>:
> owner: <struct task_struct 0xffff8c6914275d00> (UN - 8856 -
> reopen_file), counter: 0x0000000000000003
> waitlist: 2
> 0xffff9965007e3c90 8863 reopen_file UN 0 1:29:22.926
> RWSEM_WAITING_FOR_WRITE
> 0xffff996500393e00 9802 ls UN 0 1:17:26.700
> RWSEM_WAITING_FOR_READ
>
>
> the owner of the inode.i_rwsem of the directory is:
>
> [0 00:00:00.109] [UN] PID: 8856 TASK: ffff8c6914275d00 CPU: 3
> COMMAND: "reopen_file"
> #0 [ffff99650065b828] __schedule at ffffffff9b6e6095
> #1 [ffff99650065b8b8] schedule at ffffffff9b6e64df
> #2 [ffff99650065b8c8] schedule_timeout at ffffffff9b6e9f89
> #3 [ffff99650065b940] msleep at ffffffff9af573a9
> #4 [ffff99650065b948] _cifsFileInfo_put.cold.63 at ffffffffc0a42dd6 [cifs]
> #5 [ffff99650065ba38] cifs_writepage_locked at ffffffffc0a0b8f3 [cifs]
> #6 [ffff99650065bab0] cifs_launder_page at ffffffffc0a0bb72 [cifs]
> #7 [ffff99650065bb30] invalidate_inode_pages2_range at ffffffff9b04d4bd
> #8 [ffff99650065bcb8] cifs_invalidate_mapping at ffffffffc0a11339 [cifs]
> #9 [ffff99650065bcd0] cifs_revalidate_mapping at ffffffffc0a1139a [cifs]
> #10 [ffff99650065bcf0] cifs_d_revalidate at ffffffffc0a014f6 [cifs]
> #11 [ffff99650065bd08] path_openat at ffffffff9b0fe7f7
> #12 [ffff99650065bdd8] do_filp_open at ffffffff9b100a33
> #13 [ffff99650065bee0] do_sys_open at ffffffff9b0eb2d6
> #14 [ffff99650065bf38] do_syscall_64 at ffffffff9ae04315
>
> cifs_launder_page is for page 0xffffd1e2c07d2480
>
> crash> page.index,mapping,flags 0xffffd1e2c07d2480
> index = 0x8
> mapping = 0xffff8c68f3cd0db0
> flags = 0xfffffc0008095
>
> PAGE-FLAG BIT VALUE
> PG_locked 0 0000001
> PG_uptodate 2 0000004
> PG_lru 4 0000010
> PG_waiters 7 0000080
> PG_writeback 15 0008000
>
>
> inode is ffff8c68f3cd0c40
> inode.i_rwsem is ffff8c68f3cd0ce0
> DENTRY INODE SUPERBLK TYPE PATH
> ffff8c68a1f1b480 ffff8c68f3cd0c40 ffff8c6915bf9000 REG
> /mnt/vm1_smb/testfile.8853
>
>
> this process holds the inode->i_rwsem for the parent directory, is
> laundering a page attached to the inode of the file it's opening, and in
> _cifsFileInfo_put is trying to down_write the cifsInodeInflock_sem
> for the file itself.
>
>
> <struct rw_semaphore 0xffff8c68f3cd0ce0>:
> owner: <struct task_struct 0xffff8c6914272e80> (UN - 8854 -
> reopen_file), counter: 0x0000000000000003
> waitlist: 1
> 0xffff9965005dfd80 8855 reopen_file UN 0 1:29:22.912
> RWSEM_WAITING_FOR_WRITE
>
> this is the inode.i_rwsem for the file
>
> the owner:
>
> [0 00:48:22.739] [UN] PID: 8854 TASK: ffff8c6914272e80 CPU: 2
> COMMAND: "reopen_file"
> #0 [ffff99650054fb38] __schedule at ffffffff9b6e6095
> #1 [ffff99650054fbc8] schedule at ffffffff9b6e64df
> #2 [ffff99650054fbd8] io_schedule at ffffffff9b6e68e2
> #3 [ffff99650054fbe8] __lock_page at ffffffff9b03c56f
> #4 [ffff99650054fc80] pagecache_get_page at ffffffff9b03dcdf
> #5 [ffff99650054fcc0] grab_cache_page_write_begin at ffffffff9b03ef4c
> #6 [ffff99650054fcd0] cifs_write_begin at ffffffffc0a064ec [cifs]
> #7 [ffff99650054fd30] generic_perform_write at ffffffff9b03bba4
> #8 [ffff99650054fda8] __generic_file_write_iter at ffffffff9b04060a
> #9 [ffff99650054fdf0] cifs_strict_writev.cold.70 at ffffffffc0a4469b [cifs]
> #10 [ffff99650054fe48] new_sync_write at ffffffff9b0ec1dd
> #11 [ffff99650054fed0] vfs_write at ffffffff9b0eed35
> #12 [ffff99650054ff00] ksys_write at ffffffff9b0eefd9
> #13 [ffff99650054ff38] do_syscall_64 at ffffffff9ae04315
>
> the process holds the inode->i_rwsem for the file to which it's writing,
> and is trying to __lock_page for the same page as in the other processes
>
>
> the other tasks:
> [0 00:00:00.028] [UN] PID: 8859 TASK: ffff8c6915479740 CPU: 2
> COMMAND: "reopen_file"
> #0 [ffff9965007b39d8] __schedule at ffffffff9b6e6095
> #1 [ffff9965007b3a68] schedule at ffffffff9b6e64df
> #2 [ffff9965007b3a78] schedule_timeout at ffffffff9b6e9f89
> #3 [ffff9965007b3af0] msleep at ffffffff9af573a9
> #4 [ffff9965007b3af8] cifs_new_fileinfo.cold.61 at ffffffffc0a42a07 [cifs]
> #5 [ffff9965007b3b78] cifs_open at ffffffffc0a0709d [cifs]
> #6 [ffff9965007b3cd8] do_dentry_open at ffffffff9b0e9b7a
> #7 [ffff9965007b3d08] path_openat at ffffffff9b0fe34f
> #8 [ffff9965007b3dd8] do_filp_open at ffffffff9b100a33
> #9 [ffff9965007b3ee0] do_sys_open at ffffffff9b0eb2d6
> #10 [ffff9965007b3f38] do_syscall_64 at ffffffff9ae04315
>
> this is opening the file, and is trying to down_write cinode->lock_sem
>
>
> [0 00:00:00.041] [UN] PID: 8860 TASK: ffff8c691547ae80 CPU: 2
> COMMAND: "reopen_file"
> [0 00:00:00.057] [UN] PID: 8861 TASK: ffff8c6915478000 CPU: 3
> COMMAND: "reopen_file"
> [0 00:00:00.059] [UN] PID: 8858 TASK: ffff8c6914271740 CPU: 2
> COMMAND: "reopen_file"
> [0 00:00:00.109] [UN] PID: 8862 TASK: ffff8c691547dd00 CPU: 6
> COMMAND: "reopen_file"
> #0 [ffff9965007c3c78] __schedule at ffffffff9b6e6095
> #1 [ffff9965007c3d08] schedule at ffffffff9b6e64df
> #2 [ffff9965007c3d18] schedule_timeout at ffffffff9b6e9f89
> #3 [ffff9965007c3d90] msleep at ffffffff9af573a9
> #4 [ffff9965007c3d98] _cifsFileInfo_put.cold.63 at ffffffffc0a42dd6 [cifs]
> #5 [ffff9965007c3e88] cifs_close at ffffffffc0a07aaf [cifs]
> #6 [ffff9965007c3ea0] __fput at ffffffff9b0efa6e
> #7 [ffff9965007c3ee8] task_work_run at ffffffff9aef1614
> #8 [ffff9965007c3f20] exit_to_usermode_loop at ffffffff9ae03d6f
> #9 [ffff9965007c3f38] do_syscall_64 at ffffffff9ae0444c
>
> closing the file, and trying to down_write cifsi->lock_sem
>
>
> [0 00:48:22.839] [UN] PID: 8857 TASK: ffff8c6914270000 CPU: 7
> COMMAND: "reopen_file"
> #0 [ffff9965006a7cc8] __schedule at ffffffff9b6e6095
> #1 [ffff9965006a7d58] schedule at ffffffff9b6e64df
> #2 [ffff9965006a7d68] io_schedule at ffffffff9b6e68e2
> #3 [ffff9965006a7d78] wait_on_page_bit at ffffffff9b03cac6
> #4 [ffff9965006a7e10] __filemap_fdatawait_range at ffffffff9b03b028
> #5 [ffff9965006a7ed8] filemap_write_and_wait at ffffffff9b040165
> #6 [ffff9965006a7ef0] cifs_flush at ffffffffc0a0c2fa [cifs]
> #7 [ffff9965006a7f10] filp_close at ffffffff9b0e93f1
> #8 [ffff9965006a7f30] __x64_sys_close at ffffffff9b0e9a0e
> #9 [ffff9965006a7f38] do_syscall_64 at ffffffff9ae04315
>
> in __filemap_fdatawait_range
> wait_on_page_writeback(page);
> for the same page of the file
>
>
>
> [0 00:48:22.718] [UN] PID: 8855 TASK: ffff8c69142745c0 CPU: 7
> COMMAND: "reopen_file"
> #0 [ffff9965005dfc98] __schedule at ffffffff9b6e6095
> #1 [ffff9965005dfd28] schedule at ffffffff9b6e64df
> #2 [ffff9965005dfd38] rwsem_down_write_slowpath at ffffffff9af283d7
> #3 [ffff9965005dfdf0] cifs_strict_writev at ffffffffc0a0c40a [cifs]
> #4 [ffff9965005dfe48] new_sync_write at ffffffff9b0ec1dd
> #5 [ffff9965005dfed0] vfs_write at ffffffff9b0eed35
> #6 [ffff9965005dff00] ksys_write at ffffffff9b0eefd9
> #7 [ffff9965005dff38] do_syscall_64 at ffffffff9ae04315
>
> inode_lock(inode);
>
>
> and one 'ls' later on, to see whether the rest of the mount is available
> (the test file is in the root, so we get blocked up on the directory
> ->i_rwsem), so the entire mount is unavailable
>
> [0 00:36:26.473] [UN] PID: 9802 TASK: ffff8c691436ae80 CPU: 4
> COMMAND: "ls"
> #0 [ffff996500393d28] __schedule at ffffffff9b6e6095
> #1 [ffff996500393db8] schedule at ffffffff9b6e64df
> #2 [ffff996500393dc8] rwsem_down_read_slowpath at ffffffff9b6e9421
> #3 [ffff996500393e78] down_read_killable at ffffffff9b6e95e2
> #4 [ffff996500393e88] iterate_dir at ffffffff9b103c56
> #5 [ffff996500393ec8] ksys_getdents64 at ffffffff9b104b0c
> #6 [ffff996500393f30] __x64_sys_getdents64 at ffffffff9b104bb6
> #7 [ffff996500393f38] do_syscall_64 at ffffffff9ae04315
>
> in iterate_dir:
> if (shared)
> res = down_read_killable(&inode->i_rwsem); <<<<
> else
> res = down_write_killable(&inode->i_rwsem);
>
Reported-by: Frank Sorenson <sorenson@redhat.com>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
Signed-off-by: Ronnie Sahlberg <lsahlber@redhat.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
2019-11-03 06:06:37 +03:00
out_destroy_fileinfo_put_wq :
destroy_workqueue ( fileinfo_put_wq ) ;
2019-09-07 09:09:49 +03:00
out_destroy_decrypt_wq :
destroy_workqueue ( decrypt_wq ) ;
2017-05-03 18:54:01 +03:00
out_destroy_cifsiod_wq :
2012-03-23 22:40:53 +04:00
destroy_workqueue ( cifsiod_wq ) ;
2010-09-22 23:15:36 +04:00
out_clean_proc :
cifs_proc_clean ( ) ;
2005-04-17 02:20:36 +04:00
return rc ;
}
static void __exit
exit_cifs ( void )
{
2018-05-24 05:44:53 +03:00
cifs_dbg ( NOISY , " exit_smb3 \n " ) ;
2012-03-21 14:27:55 +04:00
unregister_filesystem ( & cifs_fs_type ) ;
2018-05-24 05:44:53 +03:00
unregister_filesystem ( & smb3_fs_type ) ;
2008-04-24 12:56:07 +04:00
cifs_dfs_release_automount_timer ( ) ;
2011-04-28 08:34:35 +04:00
exit_cifs_idmap ( ) ;
2007-11-03 08:02:24 +03:00
# ifdef CONFIG_CIFS_UPCALL
2017-09-07 11:03:27 +03:00
exit_cifs_spnego ( ) ;
2018-11-14 21:24:03 +03:00
# endif
# ifdef CONFIG_CIFS_DFS_UPCALL
dfs_cache_destroy ( ) ;
2005-04-17 02:20:36 +04:00
# endif
cifs_destroy_request_bufs ( ) ;
2012-03-21 14:27:55 +04:00
cifs_destroy_mids ( ) ;
cifs_destroy_inodecache ( ) ;
cifs_fscache_unregister ( ) ;
2017-05-03 18:54:01 +03:00
destroy_workqueue ( cifsoplockd_wq ) ;
2019-09-07 09:09:49 +03:00
destroy_workqueue ( decrypt_wq ) ;
cifs: move cifsFileInfo_put logic into a work-queue
This patch moves the final part of the cifsFileInfo_put() logic where we
need a write lock on lock_sem to be processed in a separate thread that
holds no other locks.
This is to prevent deadlocks like the one below:
> there are 6 processes looping to while trying to down_write
> cinode->lock_sem, 5 of them from _cifsFileInfo_put, and one from
> cifs_new_fileinfo
>
> and there are 5 other processes which are blocked, several of them
> waiting on either PG_writeback or PG_locked (which are both set), all
> for the same page of the file
>
> 2 inode_lock() (inode->i_rwsem) for the file
> 1 wait_on_page_writeback() for the page
> 1 down_read(inode->i_rwsem) for the inode of the directory
> 1 inode_lock()(inode->i_rwsem) for the inode of the directory
> 1 __lock_page
>
>
> so processes are blocked waiting on:
> page flags PG_locked and PG_writeback for one specific page
> inode->i_rwsem for the directory
> inode->i_rwsem for the file
> cifsInodeInflock_sem
>
>
>
> here are the more gory details (let me know if I need to provide
> anything more/better):
>
> [0 00:48:22.765] [UN] PID: 8863 TASK: ffff8c691547c5c0 CPU: 3
> COMMAND: "reopen_file"
> #0 [ffff9965007e3ba8] __schedule at ffffffff9b6e6095
> #1 [ffff9965007e3c38] schedule at ffffffff9b6e64df
> #2 [ffff9965007e3c48] rwsem_down_write_slowpath at ffffffff9af283d7
> #3 [ffff9965007e3cb8] legitimize_path at ffffffff9b0f975d
> #4 [ffff9965007e3d08] path_openat at ffffffff9b0fe55d
> #5 [ffff9965007e3dd8] do_filp_open at ffffffff9b100a33
> #6 [ffff9965007e3ee0] do_sys_open at ffffffff9b0eb2d6
> #7 [ffff9965007e3f38] do_syscall_64 at ffffffff9ae04315
> * (I think legitimize_path is bogus)
>
> in path_openat
> } else {
> const char *s = path_init(nd, flags);
> while (!(error = link_path_walk(s, nd)) &&
> (error = do_last(nd, file, op)) > 0) { <<<<
>
> do_last:
> if (open_flag & O_CREAT)
> inode_lock(dir->d_inode); <<<<
> else
> so it's trying to take inode->i_rwsem for the directory
>
> DENTRY INODE SUPERBLK TYPE PATH
> ffff8c68bb8e79c0 ffff8c691158ef20 ffff8c6915bf9000 DIR /mnt/vm1_smb/
> inode.i_rwsem is ffff8c691158efc0
>
> <struct rw_semaphore 0xffff8c691158efc0>:
> owner: <struct task_struct 0xffff8c6914275d00> (UN - 8856 -
> reopen_file), counter: 0x0000000000000003
> waitlist: 2
> 0xffff9965007e3c90 8863 reopen_file UN 0 1:29:22.926
> RWSEM_WAITING_FOR_WRITE
> 0xffff996500393e00 9802 ls UN 0 1:17:26.700
> RWSEM_WAITING_FOR_READ
>
>
> the owner of the inode.i_rwsem of the directory is:
>
> [0 00:00:00.109] [UN] PID: 8856 TASK: ffff8c6914275d00 CPU: 3
> COMMAND: "reopen_file"
> #0 [ffff99650065b828] __schedule at ffffffff9b6e6095
> #1 [ffff99650065b8b8] schedule at ffffffff9b6e64df
> #2 [ffff99650065b8c8] schedule_timeout at ffffffff9b6e9f89
> #3 [ffff99650065b940] msleep at ffffffff9af573a9
> #4 [ffff99650065b948] _cifsFileInfo_put.cold.63 at ffffffffc0a42dd6 [cifs]
> #5 [ffff99650065ba38] cifs_writepage_locked at ffffffffc0a0b8f3 [cifs]
> #6 [ffff99650065bab0] cifs_launder_page at ffffffffc0a0bb72 [cifs]
> #7 [ffff99650065bb30] invalidate_inode_pages2_range at ffffffff9b04d4bd
> #8 [ffff99650065bcb8] cifs_invalidate_mapping at ffffffffc0a11339 [cifs]
> #9 [ffff99650065bcd0] cifs_revalidate_mapping at ffffffffc0a1139a [cifs]
> #10 [ffff99650065bcf0] cifs_d_revalidate at ffffffffc0a014f6 [cifs]
> #11 [ffff99650065bd08] path_openat at ffffffff9b0fe7f7
> #12 [ffff99650065bdd8] do_filp_open at ffffffff9b100a33
> #13 [ffff99650065bee0] do_sys_open at ffffffff9b0eb2d6
> #14 [ffff99650065bf38] do_syscall_64 at ffffffff9ae04315
>
> cifs_launder_page is for page 0xffffd1e2c07d2480
>
> crash> page.index,mapping,flags 0xffffd1e2c07d2480
> index = 0x8
> mapping = 0xffff8c68f3cd0db0
> flags = 0xfffffc0008095
>
> PAGE-FLAG BIT VALUE
> PG_locked 0 0000001
> PG_uptodate 2 0000004
> PG_lru 4 0000010
> PG_waiters 7 0000080
> PG_writeback 15 0008000
>
>
> inode is ffff8c68f3cd0c40
> inode.i_rwsem is ffff8c68f3cd0ce0
> DENTRY INODE SUPERBLK TYPE PATH
> ffff8c68a1f1b480 ffff8c68f3cd0c40 ffff8c6915bf9000 REG
> /mnt/vm1_smb/testfile.8853
>
>
> this process holds the inode->i_rwsem for the parent directory, is
> laundering a page attached to the inode of the file it's opening, and in
> _cifsFileInfo_put is trying to down_write the cifsInodeInflock_sem
> for the file itself.
>
>
> <struct rw_semaphore 0xffff8c68f3cd0ce0>:
> owner: <struct task_struct 0xffff8c6914272e80> (UN - 8854 -
> reopen_file), counter: 0x0000000000000003
> waitlist: 1
> 0xffff9965005dfd80 8855 reopen_file UN 0 1:29:22.912
> RWSEM_WAITING_FOR_WRITE
>
> this is the inode.i_rwsem for the file
>
> the owner:
>
> [0 00:48:22.739] [UN] PID: 8854 TASK: ffff8c6914272e80 CPU: 2
> COMMAND: "reopen_file"
> #0 [ffff99650054fb38] __schedule at ffffffff9b6e6095
> #1 [ffff99650054fbc8] schedule at ffffffff9b6e64df
> #2 [ffff99650054fbd8] io_schedule at ffffffff9b6e68e2
> #3 [ffff99650054fbe8] __lock_page at ffffffff9b03c56f
> #4 [ffff99650054fc80] pagecache_get_page at ffffffff9b03dcdf
> #5 [ffff99650054fcc0] grab_cache_page_write_begin at ffffffff9b03ef4c
> #6 [ffff99650054fcd0] cifs_write_begin at ffffffffc0a064ec [cifs]
> #7 [ffff99650054fd30] generic_perform_write at ffffffff9b03bba4
> #8 [ffff99650054fda8] __generic_file_write_iter at ffffffff9b04060a
> #9 [ffff99650054fdf0] cifs_strict_writev.cold.70 at ffffffffc0a4469b [cifs]
> #10 [ffff99650054fe48] new_sync_write at ffffffff9b0ec1dd
> #11 [ffff99650054fed0] vfs_write at ffffffff9b0eed35
> #12 [ffff99650054ff00] ksys_write at ffffffff9b0eefd9
> #13 [ffff99650054ff38] do_syscall_64 at ffffffff9ae04315
>
> the process holds the inode->i_rwsem for the file to which it's writing,
> and is trying to __lock_page for the same page as in the other processes
>
>
> the other tasks:
> [0 00:00:00.028] [UN] PID: 8859 TASK: ffff8c6915479740 CPU: 2
> COMMAND: "reopen_file"
> #0 [ffff9965007b39d8] __schedule at ffffffff9b6e6095
> #1 [ffff9965007b3a68] schedule at ffffffff9b6e64df
> #2 [ffff9965007b3a78] schedule_timeout at ffffffff9b6e9f89
> #3 [ffff9965007b3af0] msleep at ffffffff9af573a9
> #4 [ffff9965007b3af8] cifs_new_fileinfo.cold.61 at ffffffffc0a42a07 [cifs]
> #5 [ffff9965007b3b78] cifs_open at ffffffffc0a0709d [cifs]
> #6 [ffff9965007b3cd8] do_dentry_open at ffffffff9b0e9b7a
> #7 [ffff9965007b3d08] path_openat at ffffffff9b0fe34f
> #8 [ffff9965007b3dd8] do_filp_open at ffffffff9b100a33
> #9 [ffff9965007b3ee0] do_sys_open at ffffffff9b0eb2d6
> #10 [ffff9965007b3f38] do_syscall_64 at ffffffff9ae04315
>
> this is opening the file, and is trying to down_write cinode->lock_sem
>
>
> [0 00:00:00.041] [UN] PID: 8860 TASK: ffff8c691547ae80 CPU: 2
> COMMAND: "reopen_file"
> [0 00:00:00.057] [UN] PID: 8861 TASK: ffff8c6915478000 CPU: 3
> COMMAND: "reopen_file"
> [0 00:00:00.059] [UN] PID: 8858 TASK: ffff8c6914271740 CPU: 2
> COMMAND: "reopen_file"
> [0 00:00:00.109] [UN] PID: 8862 TASK: ffff8c691547dd00 CPU: 6
> COMMAND: "reopen_file"
> #0 [ffff9965007c3c78] __schedule at ffffffff9b6e6095
> #1 [ffff9965007c3d08] schedule at ffffffff9b6e64df
> #2 [ffff9965007c3d18] schedule_timeout at ffffffff9b6e9f89
> #3 [ffff9965007c3d90] msleep at ffffffff9af573a9
> #4 [ffff9965007c3d98] _cifsFileInfo_put.cold.63 at ffffffffc0a42dd6 [cifs]
> #5 [ffff9965007c3e88] cifs_close at ffffffffc0a07aaf [cifs]
> #6 [ffff9965007c3ea0] __fput at ffffffff9b0efa6e
> #7 [ffff9965007c3ee8] task_work_run at ffffffff9aef1614
> #8 [ffff9965007c3f20] exit_to_usermode_loop at ffffffff9ae03d6f
> #9 [ffff9965007c3f38] do_syscall_64 at ffffffff9ae0444c
>
> closing the file, and trying to down_write cifsi->lock_sem
>
>
> [0 00:48:22.839] [UN] PID: 8857 TASK: ffff8c6914270000 CPU: 7
> COMMAND: "reopen_file"
> #0 [ffff9965006a7cc8] __schedule at ffffffff9b6e6095
> #1 [ffff9965006a7d58] schedule at ffffffff9b6e64df
> #2 [ffff9965006a7d68] io_schedule at ffffffff9b6e68e2
> #3 [ffff9965006a7d78] wait_on_page_bit at ffffffff9b03cac6
> #4 [ffff9965006a7e10] __filemap_fdatawait_range at ffffffff9b03b028
> #5 [ffff9965006a7ed8] filemap_write_and_wait at ffffffff9b040165
> #6 [ffff9965006a7ef0] cifs_flush at ffffffffc0a0c2fa [cifs]
> #7 [ffff9965006a7f10] filp_close at ffffffff9b0e93f1
> #8 [ffff9965006a7f30] __x64_sys_close at ffffffff9b0e9a0e
> #9 [ffff9965006a7f38] do_syscall_64 at ffffffff9ae04315
>
> in __filemap_fdatawait_range
> wait_on_page_writeback(page);
> for the same page of the file
>
>
>
> [0 00:48:22.718] [UN] PID: 8855 TASK: ffff8c69142745c0 CPU: 7
> COMMAND: "reopen_file"
> #0 [ffff9965005dfc98] __schedule at ffffffff9b6e6095
> #1 [ffff9965005dfd28] schedule at ffffffff9b6e64df
> #2 [ffff9965005dfd38] rwsem_down_write_slowpath at ffffffff9af283d7
> #3 [ffff9965005dfdf0] cifs_strict_writev at ffffffffc0a0c40a [cifs]
> #4 [ffff9965005dfe48] new_sync_write at ffffffff9b0ec1dd
> #5 [ffff9965005dfed0] vfs_write at ffffffff9b0eed35
> #6 [ffff9965005dff00] ksys_write at ffffffff9b0eefd9
> #7 [ffff9965005dff38] do_syscall_64 at ffffffff9ae04315
>
> inode_lock(inode);
>
>
> and one 'ls' later on, to see whether the rest of the mount is available
> (the test file is in the root, so we get blocked up on the directory
> ->i_rwsem), so the entire mount is unavailable
>
> [0 00:36:26.473] [UN] PID: 9802 TASK: ffff8c691436ae80 CPU: 4
> COMMAND: "ls"
> #0 [ffff996500393d28] __schedule at ffffffff9b6e6095
> #1 [ffff996500393db8] schedule at ffffffff9b6e64df
> #2 [ffff996500393dc8] rwsem_down_read_slowpath at ffffffff9b6e9421
> #3 [ffff996500393e78] down_read_killable at ffffffff9b6e95e2
> #4 [ffff996500393e88] iterate_dir at ffffffff9b103c56
> #5 [ffff996500393ec8] ksys_getdents64 at ffffffff9b104b0c
> #6 [ffff996500393f30] __x64_sys_getdents64 at ffffffff9b104bb6
> #7 [ffff996500393f38] do_syscall_64 at ffffffff9ae04315
>
> in iterate_dir:
> if (shared)
> res = down_read_killable(&inode->i_rwsem); <<<<
> else
> res = down_write_killable(&inode->i_rwsem);
>
Reported-by: Frank Sorenson <sorenson@redhat.com>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
Signed-off-by: Ronnie Sahlberg <lsahlber@redhat.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
2019-11-03 06:06:37 +03:00
destroy_workqueue ( fileinfo_put_wq ) ;
2012-03-23 22:40:53 +04:00
destroy_workqueue ( cifsiod_wq ) ;
2012-03-21 14:27:55 +04:00
cifs_proc_clean ( ) ;
2005-04-17 02:20:36 +04:00
}
2018-09-18 12:07:45 +03:00
MODULE_AUTHOR ( " Steve French " ) ;
2007-07-07 03:13:06 +04:00
MODULE_LICENSE ( " GPL " ) ; /* combination of LGPL + GPL source behaves as GPL */
2005-04-17 02:20:36 +04:00
MODULE_DESCRIPTION
2018-09-18 12:07:45 +03:00
( " VFS to access SMB3 servers e.g. Samba, Macs, Azure and Windows (and "
" also older servers complying with the SNIA CIFS Specification) " ) ;
2005-04-17 02:20:36 +04:00
MODULE_VERSION ( CIFS_VERSION ) ;
2019-10-31 06:55:14 +03:00
MODULE_SOFTDEP ( " ecb " ) ;
MODULE_SOFTDEP ( " hmac " ) ;
MODULE_SOFTDEP ( " md4 " ) ;
MODULE_SOFTDEP ( " md5 " ) ;
MODULE_SOFTDEP ( " nls " ) ;
MODULE_SOFTDEP ( " aes " ) ;
MODULE_SOFTDEP ( " cmac " ) ;
MODULE_SOFTDEP ( " sha256 " ) ;
MODULE_SOFTDEP ( " sha512 " ) ;
MODULE_SOFTDEP ( " aead2 " ) ;
MODULE_SOFTDEP ( " ccm " ) ;
MODULE_SOFTDEP ( " gcm " ) ;
2005-04-17 02:20:36 +04:00
module_init ( init_cifs )
module_exit ( exit_cifs )