2021-06-18 08:31:49 +03:00
// SPDX-License-Identifier: LGPL-2.1
2005-04-17 02:20:36 +04:00
/*
*
2008-05-17 07:12:45 +04:00
* Copyright ( C ) International Business Machines Corp . , 2002 , 2008
2005-04-17 02:20:36 +04:00
* Author ( s ) : Steve French ( sfrench @ us . ibm . com )
*
* Common Internet FileSystem ( CIFS ) client
*
*/
/* Note that BB means BUGBUG (ie something to fix eventually) */
# include <linux/module.h>
# include <linux/fs.h>
2022-11-20 17:15:34 +03:00
# include <linux/filelock.h>
2005-04-17 02:20:36 +04:00
# include <linux/mount.h>
# include <linux/slab.h>
# include <linux/init.h>
# include <linux/list.h>
# include <linux/seq_file.h>
# include <linux/vfs.h>
# include <linux/mempool.h>
2005-11-30 07:55:11 +03:00
# include <linux/delay.h>
2006-04-22 02:52:25 +04:00
# include <linux/kthread.h>
2006-12-07 07:34:23 +03:00
# include <linux/freezer.h>
2011-07-18 21:50:40 +04:00
# include <linux/namei.h>
2012-09-19 17:22:44 +04:00
# include <linux/random.h>
2017-03-29 00:45:06 +03:00
# include <linux/uuid.h>
2016-04-22 13:11:38 +03:00
# include <linux/xattr.h>
2022-01-11 03:00:02 +03:00
# include <uapi/linux/magic.h>
2010-09-02 04:06:02 +04:00
# include <net/ipv6.h>
2005-04-17 02:20:36 +04:00
# include "cifsfs.h"
# include "cifspdu.h"
# define DECLARE_GLOBALS_HERE
# include "cifsglob.h"
# include "cifsproto.h"
# include "cifs_debug.h"
# include "cifs_fs_sb.h"
# include <linux/mm.h>
2007-11-03 08:02:24 +03:00
# include <linux/key-type.h>
2007-11-03 08:11:06 +03:00
# include "cifs_spnego.h"
2010-07-05 16:41:50 +04:00
# include "fscache.h"
2018-11-14 21:24:03 +03:00
# ifdef CONFIG_CIFS_DFS_UPCALL
# include "dfs_cache.h"
# endif
2020-11-30 21:02:49 +03:00
# ifdef CONFIG_CIFS_SWN_UPCALL
# include "netlink.h"
# endif
2020-12-10 08:07:12 +03:00
# include "fs_context.h"
2022-08-11 06:00:08 +03:00
# include "cached_dir.h"
2005-04-17 02:20:36 +04:00
2019-03-23 00:32:35 +03:00
/*
* DOS dates from 1980 / 1 / 1 through 2107 / 12 / 31
* Protocol specifications indicate the range should be to 119 , which
* limits maximum year to 2099. But this range has not been checked .
*/
# define SMB_DATE_MAX (127<<9 | 12<<5 | 31)
# define SMB_DATE_MIN (0<<9 | 1<<5 | 1)
# define SMB_TIME_MAX (23<<11 | 59<<5 | 29)
2005-04-17 02:20:36 +04:00
int cifsFYI = 0 ;
2016-03-18 00:22:54 +03:00
bool traceSMB ;
2011-10-13 02:47:03 +04:00
bool enable_oplocks = true ;
2016-03-18 00:22:54 +03:00
bool linuxExtEnabled = true ;
bool lookupCacheEnabled = true ;
2018-05-24 12:11:07 +03:00
bool disable_legacy_dialects ; /* false by default */
2021-04-28 07:07:19 +03:00
bool enable_gcm_256 = true ;
2020-09-12 00:19:28 +03:00
bool require_gcm_256 ; /* false by default */
2021-07-05 23:05:39 +03:00
bool enable_negotiate_signing ; /* false by default */
2010-04-24 15:57:45 +04:00
unsigned int global_secflags = CIFSSEC_DEF ;
2006-06-01 02:40:51 +04:00
/* unsigned int ntlmv2_support = 0; */
2005-04-17 02:20:36 +04:00
unsigned int sign_CIFS_PDUs = 1 ;
2022-07-25 06:47:59 +03:00
/*
* Global transaction id ( XID ) information
*/
unsigned int GlobalCurrentXid ; /* protected by GlobalMid_Sem */
unsigned int GlobalTotalActiveXid ; /* prot by GlobalMid_Sem */
unsigned int GlobalMaxActiveXid ; /* prot by GlobalMid_Sem */
spinlock_t GlobalMid_Lock ; /* protects above & list operations on midQ entries */
/*
* Global counters , updated atomically
*/
atomic_t sesInfoAllocCount ;
atomic_t tconInfoAllocCount ;
atomic_t tcpSesNextId ;
atomic_t tcpSesAllocCount ;
atomic_t tcpSesReconnectCount ;
atomic_t tconInfoReconnectCount ;
2022-07-16 07:45:45 +03:00
atomic_t mid_count ;
atomic_t buf_alloc_count ;
atomic_t small_buf_alloc_count ;
# ifdef CONFIG_CIFS_STATS2
atomic_t total_buf_alloc_count ;
atomic_t total_small_buf_alloc_count ;
# endif /* STATS2 */
2022-07-16 07:57:08 +03:00
struct list_head cifs_tcp_ses_list ;
spinlock_t cifs_tcp_ses_lock ;
2007-02-12 11:55:41 +03:00
static const struct super_operations cifs_super_ops ;
2005-04-17 02:20:36 +04:00
unsigned int CIFSMaxBufSize = CIFS_MAX_MSGSIZE ;
2016-10-01 05:25:24 +03:00
module_param ( CIFSMaxBufSize , uint , 0444 ) ;
2018-05-24 10:09:20 +03:00
MODULE_PARM_DESC ( CIFSMaxBufSize , " Network buffer size (not including header) "
" for CIFS requests. "
2007-07-17 21:34:02 +04:00
" Default: 16384 Range: 8192 to 130048 " ) ;
2005-04-17 02:20:36 +04:00
unsigned int cifs_min_rcv = CIFS_MIN_RCV_POOL ;
2016-10-01 05:25:24 +03:00
module_param ( cifs_min_rcv , uint , 0444 ) ;
2007-07-17 21:34:02 +04:00
MODULE_PARM_DESC ( cifs_min_rcv , " Network buffers in pool. Default: 4 Range: "
" 1 to 64 " ) ;
2005-04-17 02:20:36 +04:00
unsigned int cifs_min_small = 30 ;
2016-10-01 05:25:24 +03:00
module_param ( cifs_min_small , uint , 0444 ) ;
2007-07-17 21:34:02 +04:00
MODULE_PARM_DESC ( cifs_min_small , " Small network buffers in pool. Default: 30 "
" Range: 2 to 256 " ) ;
2005-04-17 02:20:36 +04:00
unsigned int cifs_max_pending = CIFS_MAX_REQ ;
2012-11-25 17:00:34 +04:00
module_param ( cifs_max_pending , uint , 0444 ) ;
2018-05-24 10:09:20 +03:00
MODULE_PARM_DESC ( cifs_max_pending , " Simultaneous requests to server for "
" CIFS/SMB1 dialect (N/A for SMB3) "
2012-03-20 13:55:09 +04:00
" Default: 32767 Range: 2 to 32767. " ) ;
2018-09-18 22:05:18 +03:00
# ifdef CONFIG_CIFS_STATS2
unsigned int slow_rsp_threshold = 1 ;
module_param ( slow_rsp_threshold , uint , 0644 ) ;
MODULE_PARM_DESC ( slow_rsp_threshold , " Amount of time (in seconds) to wait "
" before logging that a response is delayed. "
" Default: 1 (if set to 0 disables msg). " ) ;
# endif /* STATS2 */
2011-10-13 02:47:03 +04:00
module_param ( enable_oplocks , bool , 0644 ) ;
2012-11-25 17:00:34 +04:00
MODULE_PARM_DESC ( enable_oplocks , " Enable or disable oplocks. Default: y/Y/1 " ) ;
2011-10-13 02:47:03 +04:00
2020-10-15 04:24:09 +03:00
module_param ( enable_gcm_256 , bool , 0644 ) ;
MODULE_PARM_DESC ( enable_gcm_256 , " Enable requesting strongest (256 bit) GCM encryption. Default: n/N/0 " ) ;
2020-09-12 00:19:28 +03:00
module_param ( require_gcm_256 , bool , 0644 ) ;
MODULE_PARM_DESC ( require_gcm_256 , " Require strongest (256 bit) GCM encryption. Default: n/N/0 " ) ;
2021-07-05 23:05:39 +03:00
module_param ( enable_negotiate_signing , bool , 0644 ) ;
MODULE_PARM_DESC ( enable_negotiate_signing , " Enable negotiating packet signing algorithm with server. Default: n/N/0 " ) ;
2018-05-24 12:11:07 +03:00
module_param ( disable_legacy_dialects , bool , 0644 ) ;
MODULE_PARM_DESC ( disable_legacy_dialects , " To improve security it may be "
" helpful to restrict the ability to "
" override the default dialects (SMB2.1, "
" SMB3 and SMB3.02) on mount with old "
" dialects (CIFS/SMB1 and SMB2) since "
" vers=1.0 (CIFS/SMB1) and vers=2.0 are weaker "
" and less secure. Default: n/N/0 " ) ;
2005-04-17 02:20:36 +04:00
extern mempool_t * cifs_sm_req_poolp ;
extern mempool_t * cifs_req_poolp ;
extern mempool_t * cifs_mid_poolp ;
2012-03-23 22:40:53 +04:00
struct workqueue_struct * cifsiod_wq ;
2019-09-07 09:09:49 +03:00
struct workqueue_struct * decrypt_wq ;
cifs: move cifsFileInfo_put logic into a work-queue
This patch moves the final part of the cifsFileInfo_put() logic where we
need a write lock on lock_sem to be processed in a separate thread that
holds no other locks.
This is to prevent deadlocks like the one below:
> there are 6 processes looping to while trying to down_write
> cinode->lock_sem, 5 of them from _cifsFileInfo_put, and one from
> cifs_new_fileinfo
>
> and there are 5 other processes which are blocked, several of them
> waiting on either PG_writeback or PG_locked (which are both set), all
> for the same page of the file
>
> 2 inode_lock() (inode->i_rwsem) for the file
> 1 wait_on_page_writeback() for the page
> 1 down_read(inode->i_rwsem) for the inode of the directory
> 1 inode_lock()(inode->i_rwsem) for the inode of the directory
> 1 __lock_page
>
>
> so processes are blocked waiting on:
> page flags PG_locked and PG_writeback for one specific page
> inode->i_rwsem for the directory
> inode->i_rwsem for the file
> cifsInodeInflock_sem
>
>
>
> here are the more gory details (let me know if I need to provide
> anything more/better):
>
> [0 00:48:22.765] [UN] PID: 8863 TASK: ffff8c691547c5c0 CPU: 3
> COMMAND: "reopen_file"
> #0 [ffff9965007e3ba8] __schedule at ffffffff9b6e6095
> #1 [ffff9965007e3c38] schedule at ffffffff9b6e64df
> #2 [ffff9965007e3c48] rwsem_down_write_slowpath at ffffffff9af283d7
> #3 [ffff9965007e3cb8] legitimize_path at ffffffff9b0f975d
> #4 [ffff9965007e3d08] path_openat at ffffffff9b0fe55d
> #5 [ffff9965007e3dd8] do_filp_open at ffffffff9b100a33
> #6 [ffff9965007e3ee0] do_sys_open at ffffffff9b0eb2d6
> #7 [ffff9965007e3f38] do_syscall_64 at ffffffff9ae04315
> * (I think legitimize_path is bogus)
>
> in path_openat
> } else {
> const char *s = path_init(nd, flags);
> while (!(error = link_path_walk(s, nd)) &&
> (error = do_last(nd, file, op)) > 0) { <<<<
>
> do_last:
> if (open_flag & O_CREAT)
> inode_lock(dir->d_inode); <<<<
> else
> so it's trying to take inode->i_rwsem for the directory
>
> DENTRY INODE SUPERBLK TYPE PATH
> ffff8c68bb8e79c0 ffff8c691158ef20 ffff8c6915bf9000 DIR /mnt/vm1_smb/
> inode.i_rwsem is ffff8c691158efc0
>
> <struct rw_semaphore 0xffff8c691158efc0>:
> owner: <struct task_struct 0xffff8c6914275d00> (UN - 8856 -
> reopen_file), counter: 0x0000000000000003
> waitlist: 2
> 0xffff9965007e3c90 8863 reopen_file UN 0 1:29:22.926
> RWSEM_WAITING_FOR_WRITE
> 0xffff996500393e00 9802 ls UN 0 1:17:26.700
> RWSEM_WAITING_FOR_READ
>
>
> the owner of the inode.i_rwsem of the directory is:
>
> [0 00:00:00.109] [UN] PID: 8856 TASK: ffff8c6914275d00 CPU: 3
> COMMAND: "reopen_file"
> #0 [ffff99650065b828] __schedule at ffffffff9b6e6095
> #1 [ffff99650065b8b8] schedule at ffffffff9b6e64df
> #2 [ffff99650065b8c8] schedule_timeout at ffffffff9b6e9f89
> #3 [ffff99650065b940] msleep at ffffffff9af573a9
> #4 [ffff99650065b948] _cifsFileInfo_put.cold.63 at ffffffffc0a42dd6 [cifs]
> #5 [ffff99650065ba38] cifs_writepage_locked at ffffffffc0a0b8f3 [cifs]
> #6 [ffff99650065bab0] cifs_launder_page at ffffffffc0a0bb72 [cifs]
> #7 [ffff99650065bb30] invalidate_inode_pages2_range at ffffffff9b04d4bd
> #8 [ffff99650065bcb8] cifs_invalidate_mapping at ffffffffc0a11339 [cifs]
> #9 [ffff99650065bcd0] cifs_revalidate_mapping at ffffffffc0a1139a [cifs]
> #10 [ffff99650065bcf0] cifs_d_revalidate at ffffffffc0a014f6 [cifs]
> #11 [ffff99650065bd08] path_openat at ffffffff9b0fe7f7
> #12 [ffff99650065bdd8] do_filp_open at ffffffff9b100a33
> #13 [ffff99650065bee0] do_sys_open at ffffffff9b0eb2d6
> #14 [ffff99650065bf38] do_syscall_64 at ffffffff9ae04315
>
> cifs_launder_page is for page 0xffffd1e2c07d2480
>
> crash> page.index,mapping,flags 0xffffd1e2c07d2480
> index = 0x8
> mapping = 0xffff8c68f3cd0db0
> flags = 0xfffffc0008095
>
> PAGE-FLAG BIT VALUE
> PG_locked 0 0000001
> PG_uptodate 2 0000004
> PG_lru 4 0000010
> PG_waiters 7 0000080
> PG_writeback 15 0008000
>
>
> inode is ffff8c68f3cd0c40
> inode.i_rwsem is ffff8c68f3cd0ce0
> DENTRY INODE SUPERBLK TYPE PATH
> ffff8c68a1f1b480 ffff8c68f3cd0c40 ffff8c6915bf9000 REG
> /mnt/vm1_smb/testfile.8853
>
>
> this process holds the inode->i_rwsem for the parent directory, is
> laundering a page attached to the inode of the file it's opening, and in
> _cifsFileInfo_put is trying to down_write the cifsInodeInflock_sem
> for the file itself.
>
>
> <struct rw_semaphore 0xffff8c68f3cd0ce0>:
> owner: <struct task_struct 0xffff8c6914272e80> (UN - 8854 -
> reopen_file), counter: 0x0000000000000003
> waitlist: 1
> 0xffff9965005dfd80 8855 reopen_file UN 0 1:29:22.912
> RWSEM_WAITING_FOR_WRITE
>
> this is the inode.i_rwsem for the file
>
> the owner:
>
> [0 00:48:22.739] [UN] PID: 8854 TASK: ffff8c6914272e80 CPU: 2
> COMMAND: "reopen_file"
> #0 [ffff99650054fb38] __schedule at ffffffff9b6e6095
> #1 [ffff99650054fbc8] schedule at ffffffff9b6e64df
> #2 [ffff99650054fbd8] io_schedule at ffffffff9b6e68e2
> #3 [ffff99650054fbe8] __lock_page at ffffffff9b03c56f
> #4 [ffff99650054fc80] pagecache_get_page at ffffffff9b03dcdf
> #5 [ffff99650054fcc0] grab_cache_page_write_begin at ffffffff9b03ef4c
> #6 [ffff99650054fcd0] cifs_write_begin at ffffffffc0a064ec [cifs]
> #7 [ffff99650054fd30] generic_perform_write at ffffffff9b03bba4
> #8 [ffff99650054fda8] __generic_file_write_iter at ffffffff9b04060a
> #9 [ffff99650054fdf0] cifs_strict_writev.cold.70 at ffffffffc0a4469b [cifs]
> #10 [ffff99650054fe48] new_sync_write at ffffffff9b0ec1dd
> #11 [ffff99650054fed0] vfs_write at ffffffff9b0eed35
> #12 [ffff99650054ff00] ksys_write at ffffffff9b0eefd9
> #13 [ffff99650054ff38] do_syscall_64 at ffffffff9ae04315
>
> the process holds the inode->i_rwsem for the file to which it's writing,
> and is trying to __lock_page for the same page as in the other processes
>
>
> the other tasks:
> [0 00:00:00.028] [UN] PID: 8859 TASK: ffff8c6915479740 CPU: 2
> COMMAND: "reopen_file"
> #0 [ffff9965007b39d8] __schedule at ffffffff9b6e6095
> #1 [ffff9965007b3a68] schedule at ffffffff9b6e64df
> #2 [ffff9965007b3a78] schedule_timeout at ffffffff9b6e9f89
> #3 [ffff9965007b3af0] msleep at ffffffff9af573a9
> #4 [ffff9965007b3af8] cifs_new_fileinfo.cold.61 at ffffffffc0a42a07 [cifs]
> #5 [ffff9965007b3b78] cifs_open at ffffffffc0a0709d [cifs]
> #6 [ffff9965007b3cd8] do_dentry_open at ffffffff9b0e9b7a
> #7 [ffff9965007b3d08] path_openat at ffffffff9b0fe34f
> #8 [ffff9965007b3dd8] do_filp_open at ffffffff9b100a33
> #9 [ffff9965007b3ee0] do_sys_open at ffffffff9b0eb2d6
> #10 [ffff9965007b3f38] do_syscall_64 at ffffffff9ae04315
>
> this is opening the file, and is trying to down_write cinode->lock_sem
>
>
> [0 00:00:00.041] [UN] PID: 8860 TASK: ffff8c691547ae80 CPU: 2
> COMMAND: "reopen_file"
> [0 00:00:00.057] [UN] PID: 8861 TASK: ffff8c6915478000 CPU: 3
> COMMAND: "reopen_file"
> [0 00:00:00.059] [UN] PID: 8858 TASK: ffff8c6914271740 CPU: 2
> COMMAND: "reopen_file"
> [0 00:00:00.109] [UN] PID: 8862 TASK: ffff8c691547dd00 CPU: 6
> COMMAND: "reopen_file"
> #0 [ffff9965007c3c78] __schedule at ffffffff9b6e6095
> #1 [ffff9965007c3d08] schedule at ffffffff9b6e64df
> #2 [ffff9965007c3d18] schedule_timeout at ffffffff9b6e9f89
> #3 [ffff9965007c3d90] msleep at ffffffff9af573a9
> #4 [ffff9965007c3d98] _cifsFileInfo_put.cold.63 at ffffffffc0a42dd6 [cifs]
> #5 [ffff9965007c3e88] cifs_close at ffffffffc0a07aaf [cifs]
> #6 [ffff9965007c3ea0] __fput at ffffffff9b0efa6e
> #7 [ffff9965007c3ee8] task_work_run at ffffffff9aef1614
> #8 [ffff9965007c3f20] exit_to_usermode_loop at ffffffff9ae03d6f
> #9 [ffff9965007c3f38] do_syscall_64 at ffffffff9ae0444c
>
> closing the file, and trying to down_write cifsi->lock_sem
>
>
> [0 00:48:22.839] [UN] PID: 8857 TASK: ffff8c6914270000 CPU: 7
> COMMAND: "reopen_file"
> #0 [ffff9965006a7cc8] __schedule at ffffffff9b6e6095
> #1 [ffff9965006a7d58] schedule at ffffffff9b6e64df
> #2 [ffff9965006a7d68] io_schedule at ffffffff9b6e68e2
> #3 [ffff9965006a7d78] wait_on_page_bit at ffffffff9b03cac6
> #4 [ffff9965006a7e10] __filemap_fdatawait_range at ffffffff9b03b028
> #5 [ffff9965006a7ed8] filemap_write_and_wait at ffffffff9b040165
> #6 [ffff9965006a7ef0] cifs_flush at ffffffffc0a0c2fa [cifs]
> #7 [ffff9965006a7f10] filp_close at ffffffff9b0e93f1
> #8 [ffff9965006a7f30] __x64_sys_close at ffffffff9b0e9a0e
> #9 [ffff9965006a7f38] do_syscall_64 at ffffffff9ae04315
>
> in __filemap_fdatawait_range
> wait_on_page_writeback(page);
> for the same page of the file
>
>
>
> [0 00:48:22.718] [UN] PID: 8855 TASK: ffff8c69142745c0 CPU: 7
> COMMAND: "reopen_file"
> #0 [ffff9965005dfc98] __schedule at ffffffff9b6e6095
> #1 [ffff9965005dfd28] schedule at ffffffff9b6e64df
> #2 [ffff9965005dfd38] rwsem_down_write_slowpath at ffffffff9af283d7
> #3 [ffff9965005dfdf0] cifs_strict_writev at ffffffffc0a0c40a [cifs]
> #4 [ffff9965005dfe48] new_sync_write at ffffffff9b0ec1dd
> #5 [ffff9965005dfed0] vfs_write at ffffffff9b0eed35
> #6 [ffff9965005dff00] ksys_write at ffffffff9b0eefd9
> #7 [ffff9965005dff38] do_syscall_64 at ffffffff9ae04315
>
> inode_lock(inode);
>
>
> and one 'ls' later on, to see whether the rest of the mount is available
> (the test file is in the root, so we get blocked up on the directory
> ->i_rwsem), so the entire mount is unavailable
>
> [0 00:36:26.473] [UN] PID: 9802 TASK: ffff8c691436ae80 CPU: 4
> COMMAND: "ls"
> #0 [ffff996500393d28] __schedule at ffffffff9b6e6095
> #1 [ffff996500393db8] schedule at ffffffff9b6e64df
> #2 [ffff996500393dc8] rwsem_down_read_slowpath at ffffffff9b6e9421
> #3 [ffff996500393e78] down_read_killable at ffffffff9b6e95e2
> #4 [ffff996500393e88] iterate_dir at ffffffff9b103c56
> #5 [ffff996500393ec8] ksys_getdents64 at ffffffff9b104b0c
> #6 [ffff996500393f30] __x64_sys_getdents64 at ffffffff9b104bb6
> #7 [ffff996500393f38] do_syscall_64 at ffffffff9ae04315
>
> in iterate_dir:
> if (shared)
> res = down_read_killable(&inode->i_rwsem); <<<<
> else
> res = down_write_killable(&inode->i_rwsem);
>
Reported-by: Frank Sorenson <sorenson@redhat.com>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
Signed-off-by: Ronnie Sahlberg <lsahlber@redhat.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
2019-11-03 06:06:37 +03:00
struct workqueue_struct * fileinfo_put_wq ;
2017-05-03 18:54:01 +03:00
struct workqueue_struct * cifsoplockd_wq ;
2021-05-05 13:56:47 +03:00
struct workqueue_struct * deferredclose_wq ;
2016-05-24 13:27:44 +03:00
__u32 cifs_lock_secret ;
2012-03-23 22:40:53 +04:00
2013-03-08 19:30:03 +04:00
/*
* Bumps refcount for cifs super block .
* Note that it should be only called if a referece to VFS super block is
* already held , e . g . in open - type syscalls context . Otherwise it can race with
* atomic_dec_and_test in deactivate_locked_super .
*/
void
cifs_sb_active ( struct super_block * sb )
{
struct cifs_sb_info * server = CIFS_SB ( sb ) ;
if ( atomic_inc_return ( & server - > active ) = = 1 )
atomic_inc ( & sb - > s_active ) ;
}
void
cifs_sb_deactive ( struct super_block * sb )
{
struct cifs_sb_info * server = CIFS_SB ( sb ) ;
if ( atomic_dec_and_test ( & server - > active ) )
deactivate_super ( sb ) ;
}
2005-04-17 02:20:36 +04:00
static int
2011-06-17 17:29:57 +04:00
cifs_read_super ( struct super_block * sb )
2005-04-17 02:20:36 +04:00
{
struct inode * inode ;
2011-05-25 13:35:34 +04:00
struct cifs_sb_info * cifs_sb ;
2013-10-06 23:08:20 +04:00
struct cifs_tcon * tcon ;
2019-03-23 00:32:35 +03:00
struct timespec64 ts ;
2005-04-17 02:20:36 +04:00
int rc = 0 ;
2007-07-13 04:33:32 +04:00
2011-05-25 13:35:34 +04:00
cifs_sb = CIFS_SB ( sb ) ;
2013-10-06 23:08:20 +04:00
tcon = cifs_sb_master_tcon ( cifs_sb ) ;
2005-04-17 02:20:36 +04:00
2011-06-17 17:05:48 +04:00
if ( cifs_sb - > mnt_cifs_flags & CIFS_MOUNT_POSIXACL )
2017-11-28 00:05:09 +03:00
sb - > s_flags | = SB_POSIXACL ;
2011-06-17 17:05:48 +04:00
smb3: snapshot mounts are read-only and make sure info is displayable about the mount
snapshot mounts were not marked as read-only and did not display the snapshot
time (in /proc/mounts) specified on mount
With this patch - note that can not write to the snapshot mount (see "ro" in
/proc/mounts line) and also the missing snapshot timewarp token time is
dumped. Sample line from /proc/mounts with the patch:
//127.0.0.1/scratch /mnt2 smb3 ro,relatime,vers=default,cache=strict,username=testuser,domain=,uid=0,noforceuid,gid=0,noforcegid,addr=127.0.0.1,file_mode=0755,dir_mode=0755,soft,nounix,serverino,mapposix,noperm,rsize=1048576,wsize=1048576,echo_interval=60,snapshot=1234567,actimeo=1 0 0
Signed-off-by: Steve French <stfrench@microsoft.com>
Reviewed-by: Paulo Alcantara <palcantara@suse.de>
2018-06-30 00:06:15 +03:00
if ( tcon - > snapshot_time )
sb - > s_flags | = SB_RDONLY ;
2013-10-06 23:08:20 +04:00
if ( tcon - > ses - > capabilities & tcon - > ses - > server - > vals - > cap_large_files )
2011-06-17 17:05:48 +04:00
sb - > s_maxbytes = MAX_LFS_FILESIZE ;
else
sb - > s_maxbytes = MAX_NON_LFS ;
2019-10-12 03:36:13 +03:00
/*
* Some very old servers like DOS and OS / 2 used 2 second granularity
* ( while all current servers use 100 ns granularity - see MS - DTYP )
* but 1 second is the maximum allowed granularity for the VFS
* so for old servers set time granularity to 1 second while for
* everything else ( current servers ) set it to 100 ns .
*/
2019-10-08 08:27:14 +03:00
if ( ( tcon - > ses - > server - > vals - > protocol_id = = SMB10_PROT_ID ) & &
( ( tcon - > ses - > capabilities &
tcon - > ses - > server - > vals - > cap_nt_find ) = = 0 ) & &
! tcon - > unix_ext ) {
sb - > s_time_gran = 1000000000 ; /* 1 second is max allowed gran */
ts = cnvrtDosUnixTm ( cpu_to_le16 ( SMB_DATE_MIN ) , 0 , 0 ) ;
2019-03-23 00:32:35 +03:00
sb - > s_time_min = ts . tv_sec ;
2019-10-08 08:27:14 +03:00
ts = cnvrtDosUnixTm ( cpu_to_le16 ( SMB_DATE_MAX ) ,
cpu_to_le16 ( SMB_TIME_MAX ) , 0 ) ;
2019-03-23 00:32:35 +03:00
sb - > s_time_max = ts . tv_sec ;
} else {
2019-10-08 08:27:14 +03:00
/*
* Almost every server , including all SMB2 + , uses DCE TIME
* ie 100 nanosecond units , since 1601. See MS - DTYP and MS - FSCC
*/
sb - > s_time_gran = 100 ;
ts = cifs_NTtimeToUnix ( 0 ) ;
2019-03-23 00:32:35 +03:00
sb - > s_time_min = ts . tv_sec ;
2019-10-08 08:27:14 +03:00
ts = cifs_NTtimeToUnix ( cpu_to_le64 ( S64_MAX ) ) ;
2019-03-23 00:32:35 +03:00
sb - > s_time_max = ts . tv_sec ;
}
2022-01-11 03:00:02 +03:00
sb - > s_magic = CIFS_SUPER_MAGIC ;
2005-04-17 02:20:36 +04:00
sb - > s_op = & cifs_super_ops ;
2016-04-22 13:11:38 +03:00
sb - > s_xattr = cifs_xattr_handlers ;
2017-04-12 13:24:34 +03:00
rc = super_setup_bdi ( sb ) ;
if ( rc )
goto out_no_root ;
2021-04-25 05:46:23 +03:00
/* tune readahead according to rsize if readahead size not set on mount */
2022-03-07 21:37:22 +03:00
if ( cifs_sb - > ctx - > rsize = = 0 )
cifs_sb - > ctx - > rsize =
tcon - > ses - > server - > ops - > negotiate_rsize ( tcon , cifs_sb - > ctx ) ;
2021-04-25 05:46:23 +03:00
if ( cifs_sb - > ctx - > rasize )
sb - > s_bdi - > ra_pages = cifs_sb - > ctx - > rasize / PAGE_SIZE ;
else
sb - > s_bdi - > ra_pages = cifs_sb - > ctx - > rsize / PAGE_SIZE ;
2017-04-12 13:24:34 +03:00
2005-04-17 02:20:36 +04:00
sb - > s_blocksize = CIFS_MAX_MSGSIZE ;
sb - > s_blocksize_bits = 14 ; /* default 2**14 = CIFS_MAX_MSGSIZE */
2011-02-22 08:56:59 +03:00
inode = cifs_root_iget ( sb ) ;
2005-04-17 02:20:36 +04:00
2008-02-07 11:15:33 +03:00
if ( IS_ERR ( inode ) ) {
rc = PTR_ERR ( inode ) ;
2005-04-17 02:20:36 +04:00
goto out_no_root ;
}
2013-10-06 23:08:20 +04:00
if ( tcon - > nocase )
2013-07-30 19:38:44 +04:00
sb - > s_d_op = & cifs_ci_dentry_ops ;
else
sb - > s_d_op = & cifs_dentry_ops ;
2012-01-09 07:15:13 +04:00
sb - > s_root = d_make_root ( inode ) ;
2005-04-17 02:20:36 +04:00
if ( ! sb - > s_root ) {
rc = - ENOMEM ;
goto out_no_root ;
}
2007-07-13 04:33:32 +04:00
2011-10-12 16:14:04 +04:00
# ifdef CONFIG_CIFS_NFSD_EXPORT
2007-07-11 22:30:34 +04:00
if ( cifs_sb - > mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM ) {
2013-05-05 07:12:25 +04:00
cifs_dbg ( FYI , " export ops supported \n " ) ;
2007-07-11 22:30:34 +04:00
sb - > s_export_op = & cifs_export_ops ;
}
2011-10-12 16:14:04 +04:00
# endif /* CONFIG_CIFS_NFSD_EXPORT */
2005-04-17 02:20:36 +04:00
return 0 ;
out_no_root :
2013-05-05 07:12:25 +04:00
cifs_dbg ( VFS , " %s: get root inode failed \n " , __func__ ) ;
2005-04-17 02:20:36 +04:00
return rc ;
}
2011-06-17 16:34:57 +04:00
static void cifs_kill_sb ( struct super_block * sb )
{
struct cifs_sb_info * cifs_sb = CIFS_SB ( sb ) ;
2021-03-09 02:07:30 +03:00
2021-03-09 02:07:31 +03:00
/*
* We ned to release all dentries for the cached directories
* before we kill the sb .
*/
2021-03-09 02:07:30 +03:00
if ( cifs_sb - > root ) {
2022-08-11 06:00:08 +03:00
close_all_cached_dirs ( cifs_sb ) ;
2022-04-01 09:25:17 +03:00
/* finally release root dentry */
2021-03-09 02:07:30 +03:00
dput ( cifs_sb - > root ) ;
cifs_sb - > root = NULL ;
}
2011-06-17 16:34:57 +04:00
kill_anon_super ( sb ) ;
2011-06-17 17:32:10 +04:00
cifs_umount ( cifs_sb ) ;
2005-04-17 02:20:36 +04:00
}
static int
2006-06-23 13:02:58 +04:00
cifs_statfs ( struct dentry * dentry , struct kstatfs * buf )
2005-04-17 02:20:36 +04:00
{
2006-06-23 13:02:58 +04:00
struct super_block * sb = dentry - > d_sb ;
2008-04-28 08:04:34 +04:00
struct cifs_sb_info * cifs_sb = CIFS_SB ( sb ) ;
2011-05-27 08:34:02 +04:00
struct cifs_tcon * tcon = cifs_sb_master_tcon ( cifs_sb ) ;
2012-09-19 03:20:33 +04:00
struct TCP_Server_Info * server = tcon - > ses - > server ;
2012-06-20 11:21:16 +04:00
unsigned int xid ;
2012-09-19 03:20:33 +04:00
int rc = 0 ;
2005-04-17 02:20:36 +04:00
2012-06-20 11:21:16 +04:00
xid = get_xid ( ) ;
2005-04-17 02:20:36 +04:00
2018-06-25 07:18:52 +03:00
if ( le32_to_cpu ( tcon - > fsAttrInfo . MaxPathNameComponentLength ) > 0 )
buf - > f_namelen =
le32_to_cpu ( tcon - > fsAttrInfo . MaxPathNameComponentLength ) ;
else
buf - > f_namelen = PATH_MAX ;
buf - > f_fsid . val [ 0 ] = tcon - > vol_serial_number ;
/* are using part of create time for more randomness, see man statfs */
buf - > f_fsid . val [ 1 ] = ( int ) le64_to_cpu ( tcon - > vol_create_time ) ;
2005-04-17 02:20:36 +04:00
buf - > f_files = 0 ; /* undefined */
buf - > f_ffree = 0 ; /* unlimited */
2012-09-19 03:20:33 +04:00
if ( server - > ops - > queryfs )
2020-02-03 22:46:43 +03:00
rc = server - > ops - > queryfs ( xid , tcon , cifs_sb , buf ) ;
2008-04-28 08:04:34 +04:00
2012-06-20 11:21:16 +04:00
free_xid ( xid ) ;
2021-03-08 18:00:49 +03:00
return rc ;
2005-04-17 02:20:36 +04:00
}
2014-08-17 17:38:47 +04:00
static long cifs_fallocate ( struct file * file , int mode , loff_t off , loff_t len )
{
2014-10-22 08:25:12 +04:00
struct cifs_sb_info * cifs_sb = CIFS_FILE_SB ( file ) ;
2014-08-17 17:38:47 +04:00
struct cifs_tcon * tcon = cifs_sb_master_tcon ( cifs_sb ) ;
struct TCP_Server_Info * server = tcon - > ses - > server ;
if ( server - > ops - > fallocate )
return server - > ops - > fallocate ( file , tcon , mode , off , len ) ;
return - EOPNOTSUPP ;
}
2023-01-13 14:49:22 +03:00
static int cifs_permission ( struct mnt_idmap * idmap ,
2021-01-21 16:19:43 +03:00
struct inode * inode , int mask )
2005-04-17 02:20:36 +04:00
{
struct cifs_sb_info * cifs_sb ;
cifs_sb = CIFS_SB ( inode - > i_sb ) ;
2008-07-31 15:41:58 +04:00
if ( cifs_sb - > mnt_cifs_flags & CIFS_MOUNT_NO_PERM ) {
if ( ( mask & MAY_EXEC ) & & ! execute_ok ( inode ) )
return - EACCES ;
else
return 0 ;
} else /* file mode might have been restricted at mount time
2007-07-13 04:33:32 +04:00
on the client ( above and beyond ACL on servers ) for
2005-04-17 02:20:36 +04:00
servers which do not support setting and viewing mode bits ,
2007-07-13 04:33:32 +04:00
so allowing client to check permissions is useful */
2023-01-13 14:49:22 +03:00
return generic_permission ( & nop_mnt_idmap , inode , mask ) ;
2005-04-17 02:20:36 +04:00
}
2006-12-07 07:33:20 +03:00
static struct kmem_cache * cifs_inode_cachep ;
static struct kmem_cache * cifs_req_cachep ;
static struct kmem_cache * cifs_mid_cachep ;
static struct kmem_cache * cifs_sm_req_cachep ;
2005-04-17 02:20:36 +04:00
mempool_t * cifs_sm_req_poolp ;
mempool_t * cifs_req_poolp ;
mempool_t * cifs_mid_poolp ;
static struct inode *
cifs_alloc_inode ( struct super_block * sb )
{
struct cifsInodeInfo * cifs_inode ;
2022-03-23 00:41:03 +03:00
cifs_inode = alloc_inode_sb ( sb , cifs_inode_cachep , GFP_KERNEL ) ;
2005-04-17 02:20:36 +04:00
if ( ! cifs_inode )
return NULL ;
cifs_inode - > cifsAttrs = 0x20 ; /* default */
cifs_inode - > time = 0 ;
2012-09-19 17:22:44 +04:00
/*
* Until the file is open and we have gotten oplock info back from the
* server , can not assume caching of file data or metadata .
*/
2010-11-03 10:58:57 +03:00
cifs_set_oplock_level ( cifs_inode , 0 ) ;
2014-04-30 17:31:45 +04:00
cifs_inode - > flags = 0 ;
2014-03-11 20:11:47 +04:00
spin_lock_init ( & cifs_inode - > writers_lock ) ;
cifs_inode - > writers = 0 ;
netfs: Fix gcc-12 warning by embedding vfs inode in netfs_i_context
While randstruct was satisfied with using an open-coded "void *" offset
cast for the netfs_i_context <-> inode casting, __builtin_object_size() as
used by FORTIFY_SOURCE was not as easily fooled. This was causing the
following complaint[1] from gcc v12:
In file included from include/linux/string.h:253,
from include/linux/ceph/ceph_debug.h:7,
from fs/ceph/inode.c:2:
In function 'fortify_memset_chk',
inlined from 'netfs_i_context_init' at include/linux/netfs.h:326:2,
inlined from 'ceph_alloc_inode' at fs/ceph/inode.c:463:2:
include/linux/fortify-string.h:242:25: warning: call to '__write_overflow_field' declared with attribute warning: detected write beyond size of field (1st parameter); maybe use struct_group()? [-Wattribute-warning]
242 | __write_overflow_field(p_size_field, size);
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Fix this by embedding a struct inode into struct netfs_i_context (which
should perhaps be renamed to struct netfs_inode). The struct inode
vfs_inode fields are then removed from the 9p, afs, ceph and cifs inode
structs and vfs_inode is then simply changed to "netfs.inode" in those
filesystems.
Further, rename netfs_i_context to netfs_inode, get rid of the
netfs_inode() function that converted a netfs_i_context pointer to an
inode pointer (that can now be done with &ctx->inode) and rename the
netfs_i_context() function to netfs_inode() (which is now a wrapper
around container_of()).
Most of the changes were done with:
perl -p -i -e 's/vfs_inode/netfs.inode/'g \
`git grep -l 'vfs_inode' -- fs/{9p,afs,ceph,cifs}/*.[ch]`
Kees suggested doing it with a pair structure[2] and a special
declarator to insert that into the network filesystem's inode
wrapper[3], but I think it's cleaner to embed it - and then it doesn't
matter if struct randomisation reorders things.
Dave Chinner suggested using a filesystem-specific VFS_I() function in
each filesystem to convert that filesystem's own inode wrapper struct
into the VFS inode struct[4].
Version #2:
- Fix a couple of missed name changes due to a disabled cifs option.
- Rename nfs_i_context to nfs_inode
- Use "netfs" instead of "nic" as the member name in per-fs inode wrapper
structs.
[ This also undoes commit 507160f46c55 ("netfs: gcc-12: temporarily
disable '-Wattribute-warning' for now") that is no longer needed ]
Fixes: bc899ee1c898 ("netfs: Add a netfs inode context")
Reported-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Xiubo Li <xiubli@redhat.com>
cc: Jonathan Corbet <corbet@lwn.net>
cc: Eric Van Hensbergen <ericvh@gmail.com>
cc: Latchesar Ionkov <lucho@ionkov.net>
cc: Dominique Martinet <asmadeus@codewreck.org>
cc: Christian Schoenebeck <linux_oss@crudebyte.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: Ilya Dryomov <idryomov@gmail.com>
cc: Steve French <smfrench@gmail.com>
cc: William Kucharski <william.kucharski@oracle.com>
cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>
cc: Dave Chinner <david@fromorbit.com>
cc: linux-doc@vger.kernel.org
cc: v9fs-developer@lists.sourceforge.net
cc: linux-afs@lists.infradead.org
cc: ceph-devel@vger.kernel.org
cc: linux-cifs@vger.kernel.org
cc: samba-technical@lists.samba.org
cc: linux-fsdevel@vger.kernel.org
cc: linux-hardening@vger.kernel.org
Link: https://lore.kernel.org/r/d2ad3a3d7bdd794c6efb562d2f2b655fb67756b9.camel@kernel.org/ [1]
Link: https://lore.kernel.org/r/20220517210230.864239-1-keescook@chromium.org/ [2]
Link: https://lore.kernel.org/r/20220518202212.2322058-1-keescook@chromium.org/ [3]
Link: https://lore.kernel.org/r/20220524101205.GI2306852@dread.disaster.area/ [4]
Link: https://lore.kernel.org/r/165296786831.3591209.12111293034669289733.stgit@warthog.procyon.org.uk/ # v1
Link: https://lore.kernel.org/r/165305805651.4094995.7763502506786714216.stgit@warthog.procyon.org.uk # v2
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2022-06-09 23:46:04 +03:00
cifs_inode - > netfs . inode . i_blkbits = 14 ; /* 2**14 = CIFS_MAX_MSGSIZE */
2009-04-03 21:44:00 +04:00
cifs_inode - > server_eof = 0 ;
2011-01-07 19:30:27 +03:00
cifs_inode - > uniqueid = 0 ;
cifs_inode - > createtime = 0 ;
2013-09-05 21:30:16 +04:00
cifs_inode - > epoch = 0 ;
2019-06-05 03:38:38 +03:00
spin_lock_init ( & cifs_inode - > open_file_lock ) ;
2016-09-22 08:39:34 +03:00
generate_random_uuid ( cifs_inode - > lease_key ) ;
2022-10-04 00:43:50 +03:00
cifs_inode - > symlink_target = NULL ;
2017-07-09 02:48:15 +03:00
2012-09-19 17:22:44 +04:00
/*
* Can not set i_flags here - they get immediately overwritten to zero
* by the VFS .
*/
netfs: Fix gcc-12 warning by embedding vfs inode in netfs_i_context
While randstruct was satisfied with using an open-coded "void *" offset
cast for the netfs_i_context <-> inode casting, __builtin_object_size() as
used by FORTIFY_SOURCE was not as easily fooled. This was causing the
following complaint[1] from gcc v12:
In file included from include/linux/string.h:253,
from include/linux/ceph/ceph_debug.h:7,
from fs/ceph/inode.c:2:
In function 'fortify_memset_chk',
inlined from 'netfs_i_context_init' at include/linux/netfs.h:326:2,
inlined from 'ceph_alloc_inode' at fs/ceph/inode.c:463:2:
include/linux/fortify-string.h:242:25: warning: call to '__write_overflow_field' declared with attribute warning: detected write beyond size of field (1st parameter); maybe use struct_group()? [-Wattribute-warning]
242 | __write_overflow_field(p_size_field, size);
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Fix this by embedding a struct inode into struct netfs_i_context (which
should perhaps be renamed to struct netfs_inode). The struct inode
vfs_inode fields are then removed from the 9p, afs, ceph and cifs inode
structs and vfs_inode is then simply changed to "netfs.inode" in those
filesystems.
Further, rename netfs_i_context to netfs_inode, get rid of the
netfs_inode() function that converted a netfs_i_context pointer to an
inode pointer (that can now be done with &ctx->inode) and rename the
netfs_i_context() function to netfs_inode() (which is now a wrapper
around container_of()).
Most of the changes were done with:
perl -p -i -e 's/vfs_inode/netfs.inode/'g \
`git grep -l 'vfs_inode' -- fs/{9p,afs,ceph,cifs}/*.[ch]`
Kees suggested doing it with a pair structure[2] and a special
declarator to insert that into the network filesystem's inode
wrapper[3], but I think it's cleaner to embed it - and then it doesn't
matter if struct randomisation reorders things.
Dave Chinner suggested using a filesystem-specific VFS_I() function in
each filesystem to convert that filesystem's own inode wrapper struct
into the VFS inode struct[4].
Version #2:
- Fix a couple of missed name changes due to a disabled cifs option.
- Rename nfs_i_context to nfs_inode
- Use "netfs" instead of "nic" as the member name in per-fs inode wrapper
structs.
[ This also undoes commit 507160f46c55 ("netfs: gcc-12: temporarily
disable '-Wattribute-warning' for now") that is no longer needed ]
Fixes: bc899ee1c898 ("netfs: Add a netfs inode context")
Reported-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Xiubo Li <xiubli@redhat.com>
cc: Jonathan Corbet <corbet@lwn.net>
cc: Eric Van Hensbergen <ericvh@gmail.com>
cc: Latchesar Ionkov <lucho@ionkov.net>
cc: Dominique Martinet <asmadeus@codewreck.org>
cc: Christian Schoenebeck <linux_oss@crudebyte.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: Ilya Dryomov <idryomov@gmail.com>
cc: Steve French <smfrench@gmail.com>
cc: William Kucharski <william.kucharski@oracle.com>
cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>
cc: Dave Chinner <david@fromorbit.com>
cc: linux-doc@vger.kernel.org
cc: v9fs-developer@lists.sourceforge.net
cc: linux-afs@lists.infradead.org
cc: ceph-devel@vger.kernel.org
cc: linux-cifs@vger.kernel.org
cc: samba-technical@lists.samba.org
cc: linux-fsdevel@vger.kernel.org
cc: linux-hardening@vger.kernel.org
Link: https://lore.kernel.org/r/d2ad3a3d7bdd794c6efb562d2f2b655fb67756b9.camel@kernel.org/ [1]
Link: https://lore.kernel.org/r/20220517210230.864239-1-keescook@chromium.org/ [2]
Link: https://lore.kernel.org/r/20220518202212.2322058-1-keescook@chromium.org/ [3]
Link: https://lore.kernel.org/r/20220524101205.GI2306852@dread.disaster.area/ [4]
Link: https://lore.kernel.org/r/165296786831.3591209.12111293034669289733.stgit@warthog.procyon.org.uk/ # v1
Link: https://lore.kernel.org/r/165305805651.4094995.7763502506786714216.stgit@warthog.procyon.org.uk # v2
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2022-06-09 23:46:04 +03:00
/* cifs_inode->netfs.inode.i_flags = S_NOATIME | S_NOCMTIME; */
2005-04-17 02:20:36 +04:00
INIT_LIST_HEAD ( & cifs_inode - > openFileList ) ;
2012-09-19 17:22:43 +04:00
INIT_LIST_HEAD ( & cifs_inode - > llist ) ;
2021-04-13 08:26:42 +03:00
INIT_LIST_HEAD ( & cifs_inode - > deferred_closes ) ;
spin_lock_init ( & cifs_inode - > deferred_lock ) ;
netfs: Fix gcc-12 warning by embedding vfs inode in netfs_i_context
While randstruct was satisfied with using an open-coded "void *" offset
cast for the netfs_i_context <-> inode casting, __builtin_object_size() as
used by FORTIFY_SOURCE was not as easily fooled. This was causing the
following complaint[1] from gcc v12:
In file included from include/linux/string.h:253,
from include/linux/ceph/ceph_debug.h:7,
from fs/ceph/inode.c:2:
In function 'fortify_memset_chk',
inlined from 'netfs_i_context_init' at include/linux/netfs.h:326:2,
inlined from 'ceph_alloc_inode' at fs/ceph/inode.c:463:2:
include/linux/fortify-string.h:242:25: warning: call to '__write_overflow_field' declared with attribute warning: detected write beyond size of field (1st parameter); maybe use struct_group()? [-Wattribute-warning]
242 | __write_overflow_field(p_size_field, size);
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Fix this by embedding a struct inode into struct netfs_i_context (which
should perhaps be renamed to struct netfs_inode). The struct inode
vfs_inode fields are then removed from the 9p, afs, ceph and cifs inode
structs and vfs_inode is then simply changed to "netfs.inode" in those
filesystems.
Further, rename netfs_i_context to netfs_inode, get rid of the
netfs_inode() function that converted a netfs_i_context pointer to an
inode pointer (that can now be done with &ctx->inode) and rename the
netfs_i_context() function to netfs_inode() (which is now a wrapper
around container_of()).
Most of the changes were done with:
perl -p -i -e 's/vfs_inode/netfs.inode/'g \
`git grep -l 'vfs_inode' -- fs/{9p,afs,ceph,cifs}/*.[ch]`
Kees suggested doing it with a pair structure[2] and a special
declarator to insert that into the network filesystem's inode
wrapper[3], but I think it's cleaner to embed it - and then it doesn't
matter if struct randomisation reorders things.
Dave Chinner suggested using a filesystem-specific VFS_I() function in
each filesystem to convert that filesystem's own inode wrapper struct
into the VFS inode struct[4].
Version #2:
- Fix a couple of missed name changes due to a disabled cifs option.
- Rename nfs_i_context to nfs_inode
- Use "netfs" instead of "nic" as the member name in per-fs inode wrapper
structs.
[ This also undoes commit 507160f46c55 ("netfs: gcc-12: temporarily
disable '-Wattribute-warning' for now") that is no longer needed ]
Fixes: bc899ee1c898 ("netfs: Add a netfs inode context")
Reported-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Xiubo Li <xiubli@redhat.com>
cc: Jonathan Corbet <corbet@lwn.net>
cc: Eric Van Hensbergen <ericvh@gmail.com>
cc: Latchesar Ionkov <lucho@ionkov.net>
cc: Dominique Martinet <asmadeus@codewreck.org>
cc: Christian Schoenebeck <linux_oss@crudebyte.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: Ilya Dryomov <idryomov@gmail.com>
cc: Steve French <smfrench@gmail.com>
cc: William Kucharski <william.kucharski@oracle.com>
cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>
cc: Dave Chinner <david@fromorbit.com>
cc: linux-doc@vger.kernel.org
cc: v9fs-developer@lists.sourceforge.net
cc: linux-afs@lists.infradead.org
cc: ceph-devel@vger.kernel.org
cc: linux-cifs@vger.kernel.org
cc: samba-technical@lists.samba.org
cc: linux-fsdevel@vger.kernel.org
cc: linux-hardening@vger.kernel.org
Link: https://lore.kernel.org/r/d2ad3a3d7bdd794c6efb562d2f2b655fb67756b9.camel@kernel.org/ [1]
Link: https://lore.kernel.org/r/20220517210230.864239-1-keescook@chromium.org/ [2]
Link: https://lore.kernel.org/r/20220518202212.2322058-1-keescook@chromium.org/ [3]
Link: https://lore.kernel.org/r/20220524101205.GI2306852@dread.disaster.area/ [4]
Link: https://lore.kernel.org/r/165296786831.3591209.12111293034669289733.stgit@warthog.procyon.org.uk/ # v1
Link: https://lore.kernel.org/r/165305805651.4094995.7763502506786714216.stgit@warthog.procyon.org.uk # v2
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2022-06-09 23:46:04 +03:00
return & cifs_inode - > netfs . inode ;
2005-04-17 02:20:36 +04:00
}
static void
2019-04-15 06:18:35 +03:00
cifs_free_inode ( struct inode * inode )
2005-04-17 02:20:36 +04:00
{
2022-10-04 00:43:50 +03:00
struct cifsInodeInfo * cinode = CIFS_I ( inode ) ;
if ( S_ISLNK ( inode - > i_mode ) )
kfree ( cinode - > symlink_target ) ;
kmem_cache_free ( cifs_inode_cachep , cinode ) ;
2005-04-17 02:20:36 +04:00
}
2010-07-05 16:42:45 +04:00
static void
2010-06-07 22:34:48 +04:00
cifs_evict_inode ( struct inode * inode )
2010-07-05 16:42:45 +04:00
{
2014-04-04 01:47:49 +04:00
truncate_inode_pages_final ( & inode - > i_data ) ;
cifs: Support fscache indexing rewrite
Change the cifs filesystem to take account of the changes to fscache's
indexing rewrite and reenable caching in cifs.
The following changes have been made:
(1) The fscache_netfs struct is no more, and there's no need to register
the filesystem as a whole.
(2) The session cookie is now an fscache_volume cookie, allocated with
fscache_acquire_volume(). That takes three parameters: a string
representing the "volume" in the index, a string naming the cache to
use (or NULL) and a u64 that conveys coherency metadata for the
volume.
For cifs, I've made it render the volume name string as:
"cifs,<ipaddress>,<sharename>"
where the sharename has '/' characters replaced with ';'.
This probably needs rethinking a bit as the total name could exceed
the maximum filename component length.
Further, the coherency data is currently just set to 0. It needs
something else doing with it - I wonder if it would suffice simply to
sum the resource_id, vol_create_time and vol_serial_number or maybe
hash them.
(3) The fscache_cookie_def is no more and needed information is passed
directly to fscache_acquire_cookie(). The cache no longer calls back
into the filesystem, but rather metadata changes are indicated at
other times.
fscache_acquire_cookie() is passed the same keying and coherency
information as before.
(4) The functions to set/reset cookies are removed and
fscache_use_cookie() and fscache_unuse_cookie() are used instead.
fscache_use_cookie() is passed a flag to indicate if the cookie is
opened for writing. fscache_unuse_cookie() is passed updates for the
metadata if we changed it (ie. if the file was opened for writing).
These are called when the file is opened or closed.
(5) cifs_setattr_*() are made to call fscache_resize() to change the size
of the cache object.
(6) The functions to read and write data are stubbed out pending a
conversion to use netfslib.
Changes
=======
ver #8:
- Abstract cache invalidation into a helper function.
- Fix some checkpatch warnings[3].
ver #7:
- Removed the accidentally added-back call to get the super cookie in
cifs_root_iget().
- Fixed the right call to cifs_fscache_get_super_cookie() to take account
of the "-o fsc" mount flag.
ver #6:
- Moved the change of gfpflags_allow_blocking() to current_is_kswapd() for
cifs here.
- Fixed one of the error paths in cifs_atomic_open() to jump around the
call to use the cookie.
- Fixed an additional successful return in the middle of cifs_open() to
use the cookie on the way out.
- Only get a volume cookie (and thus inode cookies) when "-o fsc" is
supplied to mount.
ver #5:
- Fixed a couple of bits of cookie handling[2]:
- The cookie should be released in cifs_evict_inode(), not
cifsFileInfo_put_final(). The cookie needs to persist beyond file
closure so that writepages will be able to write to it.
- fscache_use_cookie() needs to be called in cifs_atomic_open() as it is
for cifs_open().
ver #4:
- Fixed the use of sizeof with memset.
- tcon->vol_create_time is __le64 so doesn't need cpu_to_le64().
ver #3:
- Canonicalise the cifs coherency data to make the cache portable.
- Set volume coherency data.
ver #2:
- Use gfpflags_allow_blocking() rather than using flag directly.
- Upgraded to -rc4 to allow for upstream changes[1].
- fscache_acquire_volume() now returns errors.
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Jeff Layton <jlayton@kernel.org>
cc: Steve French <smfrench@gmail.com>
cc: Shyam Prasad N <nspmangalore@gmail.com>
cc: linux-cifs@vger.kernel.org
cc: linux-cachefs@redhat.com
Link: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=23b55d673d7527b093cd97b7c217c82e70cd1af0 [1]
Link: https://lore.kernel.org/r/3419813.1641592362@warthog.procyon.org.uk/ [2]
Link: https://lore.kernel.org/r/CAH2r5muTanw9pJqzAHd01d9A8keeChkzGsCEH6=0rHutVLAF-A@mail.gmail.com/ [3]
Link: https://lore.kernel.org/r/163819671009.215744.11230627184193298714.stgit@warthog.procyon.org.uk/ # v1
Link: https://lore.kernel.org/r/163906982979.143852.10672081929614953210.stgit@warthog.procyon.org.uk/ # v2
Link: https://lore.kernel.org/r/163967187187.1823006.247415138444991444.stgit@warthog.procyon.org.uk/ # v3
Link: https://lore.kernel.org/r/164021579335.640689.2681324337038770579.stgit@warthog.procyon.org.uk/ # v4
Link: https://lore.kernel.org/r/3462849.1641593783@warthog.procyon.org.uk/ # v5
Link: https://lore.kernel.org/r/1318953.1642024578@warthog.procyon.org.uk/ # v6
Signed-off-by: Steve French <stfrench@microsoft.com>
2020-11-17 18:56:59 +03:00
if ( inode - > i_state & I_PINNING_FSCACHE_WB )
cifs_fscache_unuse_inode_cookie ( inode , true ) ;
cifs_fscache_release_inode_cookie ( inode ) ;
2012-05-03 16:48:02 +04:00
clear_inode ( inode ) ;
2010-07-05 16:42:45 +04:00
}
2009-06-11 18:27:32 +04:00
static void
cifs_show_address ( struct seq_file * s , struct TCP_Server_Info * server )
{
2010-12-13 19:08:35 +03:00
struct sockaddr_in * sa = ( struct sockaddr_in * ) & server - > dstaddr ;
struct sockaddr_in6 * sa6 = ( struct sockaddr_in6 * ) & server - > dstaddr ;
2014-05-13 20:04:17 +04:00
seq_puts ( s , " ,addr= " ) ;
2009-06-11 18:27:32 +04:00
2010-12-13 19:08:35 +03:00
switch ( server - > dstaddr . ss_family ) {
2009-06-11 18:27:32 +04:00
case AF_INET :
2010-12-13 19:08:35 +03:00
seq_printf ( s , " %pI4 " , & sa - > sin_addr . s_addr ) ;
2009-06-11 18:27:32 +04:00
break ;
case AF_INET6 :
2010-12-13 19:08:35 +03:00
seq_printf ( s , " %pI6 " , & sa6 - > sin6_addr . s6_addr ) ;
if ( sa6 - > sin6_scope_id )
seq_printf ( s , " %%%u " , sa6 - > sin6_scope_id ) ;
2009-06-11 18:27:32 +04:00
break ;
default :
2014-05-13 20:04:17 +04:00
seq_puts ( s , " (unknown) " ) ;
2009-06-11 18:27:32 +04:00
}
2017-11-07 11:54:55 +03:00
if ( server - > rdma )
seq_puts ( s , " ,rdma " ) ;
2009-06-11 18:27:32 +04:00
}
2011-06-13 19:50:41 +04:00
static void
2013-05-26 15:01:00 +04:00
cifs_show_security ( struct seq_file * s , struct cifs_ses * ses )
2011-06-13 19:50:41 +04:00
{
2015-09-12 03:24:19 +03:00
if ( ses - > sectype = = Unspecified ) {
if ( ses - > user_name = = NULL )
seq_puts ( s , " ,sec=none " ) ;
2013-05-26 15:01:00 +04:00
return ;
2015-09-12 03:24:19 +03:00
}
2013-05-26 15:01:00 +04:00
2014-05-13 20:04:17 +04:00
seq_puts ( s , " ,sec= " ) ;
2011-06-13 19:50:41 +04:00
2013-05-26 15:01:00 +04:00
switch ( ses - > sectype ) {
2011-06-13 19:50:41 +04:00
case NTLMv2 :
2014-05-13 20:04:17 +04:00
seq_puts ( s , " ntlmv2 " ) ;
2011-06-13 19:50:41 +04:00
break ;
case Kerberos :
2020-02-10 12:38:14 +03:00
seq_puts ( s , " krb5 " ) ;
2011-06-13 19:50:41 +04:00
break ;
case RawNTLMSSP :
2014-05-13 20:04:17 +04:00
seq_puts ( s , " ntlmssp " ) ;
2011-06-13 19:50:41 +04:00
break ;
default :
/* shouldn't ever happen */
2014-05-13 20:04:17 +04:00
seq_puts ( s , " unknown " ) ;
2011-06-13 19:50:41 +04:00
break ;
}
2013-05-26 15:01:00 +04:00
if ( ses - > sign )
2014-05-13 20:04:17 +04:00
seq_puts ( s , " i " ) ;
2020-02-10 12:38:14 +03:00
if ( ses - > sectype = = Kerberos )
seq_printf ( s , " ,cruid=%u " ,
from_kuid_munged ( & init_user_ns , ses - > cred_uid ) ) ;
2011-06-13 19:50:41 +04:00
}
2012-05-16 15:53:01 +04:00
static void
cifs_show_cache_flavor ( struct seq_file * s , struct cifs_sb_info * cifs_sb )
{
2014-05-13 20:04:17 +04:00
seq_puts ( s , " ,cache= " ) ;
2012-05-16 15:53:01 +04:00
if ( cifs_sb - > mnt_cifs_flags & CIFS_MOUNT_STRICT_IO )
2014-05-13 20:04:17 +04:00
seq_puts ( s , " strict " ) ;
2012-05-16 15:53:01 +04:00
else if ( cifs_sb - > mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO )
2014-05-13 20:04:17 +04:00
seq_puts ( s , " none " ) ;
2019-08-30 10:12:41 +03:00
else if ( cifs_sb - > mnt_cifs_flags & CIFS_MOUNT_RW_CACHE )
seq_puts ( s , " singleclient " ) ; /* assume only one client access */
2019-08-28 07:58:54 +03:00
else if ( cifs_sb - > mnt_cifs_flags & CIFS_MOUNT_RO_CACHE )
seq_puts ( s , " ro " ) ; /* read only caching assumed */
2012-05-16 15:53:01 +04:00
else
2014-05-13 20:04:17 +04:00
seq_puts ( s , " loose " ) ;
2012-05-16 15:53:01 +04:00
}
2020-12-15 05:08:10 +03:00
/*
* cifs_show_devname ( ) is used so we show the mount device name with correct
* format ( e . g . forward slashes vs . back slashes ) in / proc / mounts
*/
static int cifs_show_devname ( struct seq_file * m , struct dentry * root )
{
struct cifs_sb_info * cifs_sb = CIFS_SB ( root - > d_sb ) ;
2021-02-11 09:06:16 +03:00
char * devname = kstrdup ( cifs_sb - > ctx - > source , GFP_KERNEL ) ;
2020-12-15 05:08:10 +03:00
if ( devname = = NULL )
seq_puts ( m , " none " ) ;
else {
convert_delimiter ( devname , ' / ' ) ;
2021-04-06 18:02:29 +03:00
/* escape all spaces in share names */
seq_escape ( m , devname , " \t " ) ;
2020-12-15 05:08:10 +03:00
kfree ( devname ) ;
}
return 0 ;
}
2005-04-17 02:20:36 +04:00
/*
* cifs_show_options ( ) is for displaying mount options in / proc / mounts .
* Not all settable options are displayed but most of the important
* ones are .
*/
static int
2011-12-09 06:32:45 +04:00
cifs_show_options ( struct seq_file * s , struct dentry * root )
2005-04-17 02:20:36 +04:00
{
2011-12-09 06:32:45 +04:00
struct cifs_sb_info * cifs_sb = CIFS_SB ( root - > d_sb ) ;
2011-05-27 08:34:02 +04:00
struct cifs_tcon * tcon = cifs_sb_master_tcon ( cifs_sb ) ;
2010-09-02 04:06:02 +04:00
struct sockaddr * srcaddr ;
srcaddr = ( struct sockaddr * ) & tcon - > ses - > server - > srcaddr ;
2009-06-11 18:27:28 +04:00
fs: create and use seq_show_option for escaping
Many file systems that implement the show_options hook fail to correctly
escape their output which could lead to unescaped characters (e.g. new
lines) leaking into /proc/mounts and /proc/[pid]/mountinfo files. This
could lead to confusion, spoofed entries (resulting in things like
systemd issuing false d-bus "mount" notifications), and who knows what
else. This looks like it would only be the root user stepping on
themselves, but it's possible weird things could happen in containers or
in other situations with delegated mount privileges.
Here's an example using overlay with setuid fusermount trusting the
contents of /proc/mounts (via the /etc/mtab symlink). Imagine the use
of "sudo" is something more sneaky:
$ BASE="ovl"
$ MNT="$BASE/mnt"
$ LOW="$BASE/lower"
$ UP="$BASE/upper"
$ WORK="$BASE/work/ 0 0
none /proc fuse.pwn user_id=1000"
$ mkdir -p "$LOW" "$UP" "$WORK"
$ sudo mount -t overlay -o "lowerdir=$LOW,upperdir=$UP,workdir=$WORK" none /mnt
$ cat /proc/mounts
none /root/ovl/mnt overlay rw,relatime,lowerdir=ovl/lower,upperdir=ovl/upper,workdir=ovl/work/ 0 0
none /proc fuse.pwn user_id=1000 0 0
$ fusermount -u /proc
$ cat /proc/mounts
cat: /proc/mounts: No such file or directory
This fixes the problem by adding new seq_show_option and
seq_show_option_n helpers, and updating the vulnerable show_option
handlers to use them as needed. Some, like SELinux, need to be open
coded due to unusual existing escape mechanisms.
[akpm@linux-foundation.org: add lost chunk, per Kees]
[keescook@chromium.org: seq_show_option should be using const parameters]
Signed-off-by: Kees Cook <keescook@chromium.org>
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Acked-by: Jan Kara <jack@suse.com>
Acked-by: Paul Moore <paul@paul-moore.com>
Cc: J. R. Okajima <hooanon05g@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2015-09-05 01:44:57 +03:00
seq_show_option ( s , " vers " , tcon - > ses - > server - > vals - > version_string ) ;
2013-05-26 15:01:00 +04:00
cifs_show_security ( s , tcon - > ses ) ;
2012-05-16 15:53:01 +04:00
cifs_show_cache_flavor ( s , cifs_sb ) ;
2011-06-13 19:50:41 +04:00
2019-09-12 05:46:20 +03:00
if ( tcon - > no_lease )
seq_puts ( s , " ,nolease " ) ;
2020-12-14 09:40:19 +03:00
if ( cifs_sb - > ctx - > multiuser )
2014-05-13 20:04:17 +04:00
seq_puts ( s , " ,multiuser " ) ;
2011-02-25 10:11:56 +03:00
else if ( tcon - > ses - > user_name )
fs: create and use seq_show_option for escaping
Many file systems that implement the show_options hook fail to correctly
escape their output which could lead to unescaped characters (e.g. new
lines) leaking into /proc/mounts and /proc/[pid]/mountinfo files. This
could lead to confusion, spoofed entries (resulting in things like
systemd issuing false d-bus "mount" notifications), and who knows what
else. This looks like it would only be the root user stepping on
themselves, but it's possible weird things could happen in containers or
in other situations with delegated mount privileges.
Here's an example using overlay with setuid fusermount trusting the
contents of /proc/mounts (via the /etc/mtab symlink). Imagine the use
of "sudo" is something more sneaky:
$ BASE="ovl"
$ MNT="$BASE/mnt"
$ LOW="$BASE/lower"
$ UP="$BASE/upper"
$ WORK="$BASE/work/ 0 0
none /proc fuse.pwn user_id=1000"
$ mkdir -p "$LOW" "$UP" "$WORK"
$ sudo mount -t overlay -o "lowerdir=$LOW,upperdir=$UP,workdir=$WORK" none /mnt
$ cat /proc/mounts
none /root/ovl/mnt overlay rw,relatime,lowerdir=ovl/lower,upperdir=ovl/upper,workdir=ovl/work/ 0 0
none /proc fuse.pwn user_id=1000 0 0
$ fusermount -u /proc
$ cat /proc/mounts
cat: /proc/mounts: No such file or directory
This fixes the problem by adding new seq_show_option and
seq_show_option_n helpers, and updating the vulnerable show_option
handlers to use them as needed. Some, like SELinux, need to be open
coded due to unusual existing escape mechanisms.
[akpm@linux-foundation.org: add lost chunk, per Kees]
[keescook@chromium.org: seq_show_option should be using const parameters]
Signed-off-by: Kees Cook <keescook@chromium.org>
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Acked-by: Jan Kara <jack@suse.com>
Acked-by: Paul Moore <paul@paul-moore.com>
Cc: J. R. Okajima <hooanon05g@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2015-09-05 01:44:57 +03:00
seq_show_option ( s , " username " , tcon - > ses - > user_name ) ;
2010-09-30 03:51:12 +04:00
2018-08-10 04:31:10 +03:00
if ( tcon - > ses - > domainName & & tcon - > ses - > domainName [ 0 ] ! = 0 )
fs: create and use seq_show_option for escaping
Many file systems that implement the show_options hook fail to correctly
escape their output which could lead to unescaped characters (e.g. new
lines) leaking into /proc/mounts and /proc/[pid]/mountinfo files. This
could lead to confusion, spoofed entries (resulting in things like
systemd issuing false d-bus "mount" notifications), and who knows what
else. This looks like it would only be the root user stepping on
themselves, but it's possible weird things could happen in containers or
in other situations with delegated mount privileges.
Here's an example using overlay with setuid fusermount trusting the
contents of /proc/mounts (via the /etc/mtab symlink). Imagine the use
of "sudo" is something more sneaky:
$ BASE="ovl"
$ MNT="$BASE/mnt"
$ LOW="$BASE/lower"
$ UP="$BASE/upper"
$ WORK="$BASE/work/ 0 0
none /proc fuse.pwn user_id=1000"
$ mkdir -p "$LOW" "$UP" "$WORK"
$ sudo mount -t overlay -o "lowerdir=$LOW,upperdir=$UP,workdir=$WORK" none /mnt
$ cat /proc/mounts
none /root/ovl/mnt overlay rw,relatime,lowerdir=ovl/lower,upperdir=ovl/upper,workdir=ovl/work/ 0 0
none /proc fuse.pwn user_id=1000 0 0
$ fusermount -u /proc
$ cat /proc/mounts
cat: /proc/mounts: No such file or directory
This fixes the problem by adding new seq_show_option and
seq_show_option_n helpers, and updating the vulnerable show_option
handlers to use them as needed. Some, like SELinux, need to be open
coded due to unusual existing escape mechanisms.
[akpm@linux-foundation.org: add lost chunk, per Kees]
[keescook@chromium.org: seq_show_option should be using const parameters]
Signed-off-by: Kees Cook <keescook@chromium.org>
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Acked-by: Jan Kara <jack@suse.com>
Acked-by: Paul Moore <paul@paul-moore.com>
Cc: J. R. Okajima <hooanon05g@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2015-09-05 01:44:57 +03:00
seq_show_option ( s , " domain " , tcon - > ses - > domainName ) ;
2009-06-11 18:27:28 +04:00
2010-09-02 04:06:02 +04:00
if ( srcaddr - > sa_family ! = AF_UNSPEC ) {
struct sockaddr_in * saddr4 ;
struct sockaddr_in6 * saddr6 ;
saddr4 = ( struct sockaddr_in * ) srcaddr ;
saddr6 = ( struct sockaddr_in6 * ) srcaddr ;
if ( srcaddr - > sa_family = = AF_INET6 )
seq_printf ( s , " ,srcaddr=%pI6c " ,
& saddr6 - > sin6_addr ) ;
else if ( srcaddr - > sa_family = = AF_INET )
seq_printf ( s , " ,srcaddr=%pI4 " ,
& saddr4 - > sin_addr . s_addr ) ;
else
seq_printf ( s , " ,srcaddr=BAD-AF:%i " ,
( int ) ( srcaddr - > sa_family ) ) ;
}
2013-02-06 13:20:20 +04:00
seq_printf ( s , " ,uid=%u " ,
2020-12-12 22:40:50 +03:00
from_kuid_munged ( & init_user_ns , cifs_sb - > ctx - > linux_uid ) ) ;
2009-06-11 18:27:29 +04:00
if ( cifs_sb - > mnt_cifs_flags & CIFS_MOUNT_OVERR_UID )
2014-05-13 20:04:17 +04:00
seq_puts ( s , " ,forceuid " ) ;
2009-08-03 20:45:10 +04:00
else
2014-05-13 20:04:17 +04:00
seq_puts ( s , " ,noforceuid " ) ;
2009-06-11 18:27:29 +04:00
2013-02-06 13:20:20 +04:00
seq_printf ( s , " ,gid=%u " ,
2020-12-12 22:40:50 +03:00
from_kgid_munged ( & init_user_ns , cifs_sb - > ctx - > linux_gid ) ) ;
2009-06-11 18:27:29 +04:00
if ( cifs_sb - > mnt_cifs_flags & CIFS_MOUNT_OVERR_GID )
2014-05-13 20:04:17 +04:00
seq_puts ( s , " ,forcegid " ) ;
2009-08-03 20:45:10 +04:00
else
2014-05-13 20:04:17 +04:00
seq_puts ( s , " ,noforcegid " ) ;
2009-06-11 18:27:28 +04:00
2009-06-11 18:27:32 +04:00
cifs_show_address ( s , tcon - > ses - > server ) ;
2005-04-17 02:20:36 +04:00
2009-06-11 18:27:28 +04:00
if ( ! tcon - > unix_ext )
2011-07-26 11:22:14 +04:00
seq_printf ( s , " ,file_mode=0%ho,dir_mode=0%ho " ,
2020-12-12 22:40:50 +03:00
cifs_sb - > ctx - > file_mode ,
cifs_sb - > ctx - > dir_mode ) ;
2020-12-14 09:40:22 +03:00
if ( cifs_sb - > ctx - > iocharset )
seq_printf ( s , " ,iocharset=%s " , cifs_sb - > ctx - > iocharset ) ;
2009-06-11 18:27:28 +04:00
if ( tcon - > seal )
2014-05-13 20:04:17 +04:00
seq_puts ( s , " ,seal " ) ;
2020-02-20 08:59:32 +03:00
else if ( tcon - > ses - > server - > ignore_signature )
seq_puts ( s , " ,signloosely " ) ;
2009-06-11 18:27:28 +04:00
if ( tcon - > nocase )
2014-05-13 20:04:17 +04:00
seq_puts ( s , " ,nocase " ) ;
2020-05-19 11:06:57 +03:00
if ( tcon - > nodelete )
seq_puts ( s , " ,nodelete " ) ;
2022-05-24 07:17:12 +03:00
if ( cifs_sb - > ctx - > no_sparse )
seq_puts ( s , " ,nosparse " ) ;
2019-01-21 04:51:59 +03:00
if ( tcon - > local_lease )
seq_puts ( s , " ,locallease " ) ;
2009-06-11 18:27:28 +04:00
if ( tcon - > retry )
2014-05-13 20:04:17 +04:00
seq_puts ( s , " ,hard " ) ;
2017-09-20 06:09:23 +03:00
else
seq_puts ( s , " ,soft " ) ;
2015-10-01 05:07:59 +03:00
if ( tcon - > use_persistent )
seq_puts ( s , " ,persistenthandles " ) ;
2015-11-03 19:08:53 +03:00
else if ( tcon - > use_resilient )
seq_puts ( s , " ,resilienthandles " ) ;
2018-05-21 07:41:10 +03:00
if ( tcon - > posix_extensions )
seq_puts ( s , " ,posix " ) ;
else if ( tcon - > unix_ext )
seq_puts ( s , " ,unix " ) ;
else
seq_puts ( s , " ,nounix " ) ;
2018-09-21 04:10:25 +03:00
if ( cifs_sb - > mnt_cifs_flags & CIFS_MOUNT_NO_DFS )
seq_puts ( s , " ,nodfs " ) ;
2009-06-11 18:27:28 +04:00
if ( cifs_sb - > mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS )
2014-05-13 20:04:17 +04:00
seq_puts ( s , " ,posixpaths " ) ;
2009-06-11 18:27:28 +04:00
if ( cifs_sb - > mnt_cifs_flags & CIFS_MOUNT_SET_UID )
2014-05-13 20:04:17 +04:00
seq_puts ( s , " ,setuids " ) ;
2016-09-23 09:36:34 +03:00
if ( cifs_sb - > mnt_cifs_flags & CIFS_MOUNT_UID_FROM_ACL )
seq_puts ( s , " ,idsfromsid " ) ;
2009-06-11 18:27:28 +04:00
if ( cifs_sb - > mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM )
2014-05-13 20:04:17 +04:00
seq_puts ( s , " ,serverino " ) ;
2011-05-26 10:02:00 +04:00
if ( cifs_sb - > mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD )
2014-05-13 20:04:17 +04:00
seq_puts ( s , " ,rwpidforward " ) ;
2011-05-26 10:02:00 +04:00
if ( cifs_sb - > mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL )
2014-05-13 20:04:17 +04:00
seq_puts ( s , " ,forcemand " ) ;
2009-06-11 18:27:28 +04:00
if ( cifs_sb - > mnt_cifs_flags & CIFS_MOUNT_NO_XATTR )
2014-05-13 20:04:17 +04:00
seq_puts ( s , " ,nouser_xattr " ) ;
2009-06-11 18:27:28 +04:00
if ( cifs_sb - > mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR )
2014-05-13 20:04:17 +04:00
seq_puts ( s , " ,mapchars " ) ;
2015-02-13 09:35:58 +03:00
if ( cifs_sb - > mnt_cifs_flags & CIFS_MOUNT_MAP_SFM_CHR )
seq_puts ( s , " ,mapposix " ) ;
2009-06-11 18:27:28 +04:00
if ( cifs_sb - > mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL )
2014-05-13 20:04:17 +04:00
seq_puts ( s , " ,sfu " ) ;
2009-06-11 18:27:28 +04:00
if ( cifs_sb - > mnt_cifs_flags & CIFS_MOUNT_NO_BRL )
2014-05-13 20:04:17 +04:00
seq_puts ( s , " ,nobrl " ) ;
2018-04-26 06:19:09 +03:00
if ( cifs_sb - > mnt_cifs_flags & CIFS_MOUNT_NO_HANDLE_CACHE )
seq_puts ( s , " ,nohandlecache " ) ;
2019-06-24 10:01:42 +03:00
if ( cifs_sb - > mnt_cifs_flags & CIFS_MOUNT_MODE_FROM_SID )
seq_puts ( s , " ,modefromsid " ) ;
2009-06-11 18:27:28 +04:00
if ( cifs_sb - > mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL )
2014-05-13 20:04:17 +04:00
seq_puts ( s , " ,cifsacl " ) ;
2009-06-11 18:27:28 +04:00
if ( cifs_sb - > mnt_cifs_flags & CIFS_MOUNT_DYNPERM )
2014-05-13 20:04:17 +04:00
seq_puts ( s , " ,dynperm " ) ;
2017-11-28 00:05:09 +03:00
if ( root - > d_sb - > s_flags & SB_POSIXACL )
2014-05-13 20:04:17 +04:00
seq_puts ( s , " ,acl " ) ;
2010-07-30 16:56:00 +04:00
if ( cifs_sb - > mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS )
2014-05-13 20:04:17 +04:00
seq_puts ( s , " ,mfsymlinks " ) ;
2010-11-24 15:19:07 +03:00
if ( cifs_sb - > mnt_cifs_flags & CIFS_MOUNT_FSCACHE )
2014-05-13 20:04:17 +04:00
seq_puts ( s , " ,fsc " ) ;
2011-10-20 05:44:48 +04:00
if ( cifs_sb - > mnt_cifs_flags & CIFS_MOUNT_NOSSYNC )
2014-05-13 20:04:17 +04:00
seq_puts ( s , " ,nostrictsync " ) ;
2011-10-20 05:44:48 +04:00
if ( cifs_sb - > mnt_cifs_flags & CIFS_MOUNT_NO_PERM )
2014-05-13 20:04:17 +04:00
seq_puts ( s , " ,noperm " ) ;
2012-04-24 18:28:14 +04:00
if ( cifs_sb - > mnt_cifs_flags & CIFS_MOUNT_CIFS_BACKUPUID )
2013-02-06 13:20:20 +04:00
seq_printf ( s , " ,backupuid=%u " ,
from_kuid_munged ( & init_user_ns ,
2020-12-12 22:40:50 +03:00
cifs_sb - > ctx - > backupuid ) ) ;
2012-04-24 18:28:14 +04:00
if ( cifs_sb - > mnt_cifs_flags & CIFS_MOUNT_CIFS_BACKUPGID )
2013-02-06 13:20:20 +04:00
seq_printf ( s , " ,backupgid=%u " ,
from_kgid_munged ( & init_user_ns ,
2020-12-12 22:40:50 +03:00
cifs_sb - > ctx - > backupgid ) ) ;
2009-06-11 18:27:28 +04:00
2020-12-15 22:28:50 +03:00
seq_printf ( s , " ,rsize=%u " , cifs_sb - > ctx - > rsize ) ;
seq_printf ( s , " ,wsize=%u " , cifs_sb - > ctx - > wsize ) ;
seq_printf ( s , " ,bsize=%u " , cifs_sb - > ctx - > bsize ) ;
2021-04-25 05:46:23 +03:00
if ( cifs_sb - > ctx - > rasize )
seq_printf ( s , " ,rasize=%u " , cifs_sb - > ctx - > rasize ) ;
2019-09-09 07:22:02 +03:00
if ( tcon - > ses - > server - > min_offload )
seq_printf ( s , " ,esize=%u " , tcon - > ses - > server - > min_offload ) ;
2015-12-18 21:31:36 +03:00
seq_printf ( s , " ,echo_interval=%lu " ,
tcon - > ses - > server - > echo_interval / HZ ) ;
2019-06-18 01:34:57 +03:00
2022-12-11 22:54:21 +03:00
/* Only display the following if overridden on mount */
2019-06-18 01:34:57 +03:00
if ( tcon - > ses - > server - > max_credits ! = SMB2_MAX_CREDITS_AVAILABLE )
seq_printf ( s , " ,max_credits=%u " , tcon - > ses - > server - > max_credits ) ;
2022-12-11 22:54:21 +03:00
if ( tcon - > ses - > server - > tcp_nodelay )
seq_puts ( s , " ,tcpnodelay " ) ;
if ( tcon - > ses - > server - > noautotune )
seq_puts ( s , " ,noautotune " ) ;
if ( tcon - > ses - > server - > noblocksnd )
seq_puts ( s , " ,noblocksend " ) ;
2019-06-18 01:34:57 +03:00
smb3: snapshot mounts are read-only and make sure info is displayable about the mount
snapshot mounts were not marked as read-only and did not display the snapshot
time (in /proc/mounts) specified on mount
With this patch - note that can not write to the snapshot mount (see "ro" in
/proc/mounts line) and also the missing snapshot timewarp token time is
dumped. Sample line from /proc/mounts with the patch:
//127.0.0.1/scratch /mnt2 smb3 ro,relatime,vers=default,cache=strict,username=testuser,domain=,uid=0,noforceuid,gid=0,noforcegid,addr=127.0.0.1,file_mode=0755,dir_mode=0755,soft,nounix,serverino,mapposix,noperm,rsize=1048576,wsize=1048576,echo_interval=60,snapshot=1234567,actimeo=1 0 0
Signed-off-by: Steve French <stfrench@microsoft.com>
Reviewed-by: Paulo Alcantara <palcantara@suse.de>
2018-06-30 00:06:15 +03:00
if ( tcon - > snapshot_time )
seq_printf ( s , " ,snapshot=%llu " , tcon - > snapshot_time ) ;
2019-03-30 00:31:07 +03:00
if ( tcon - > handle_timeout )
seq_printf ( s , " ,handletimeout=%u " , tcon - > handle_timeout ) ;
2021-02-24 21:12:53 +03:00
/*
* Display file and directory attribute timeout in seconds .
* If file and directory attribute timeout the same then actimeo
* was likely specified on mount
*/
if ( cifs_sb - > ctx - > acdirmax = = cifs_sb - > ctx - > acregmax )
seq_printf ( s , " ,actimeo=%lu " , cifs_sb - > ctx - > acregmax / HZ ) ;
else {
seq_printf ( s , " ,acdirmax=%lu " , cifs_sb - > ctx - > acdirmax / HZ ) ;
seq_printf ( s , " ,acregmax=%lu " , cifs_sb - > ctx - > acregmax / HZ ) ;
}
2022-08-11 08:53:00 +03:00
seq_printf ( s , " ,closetimeo=%lu " , cifs_sb - > ctx - > closetimeo / HZ ) ;
2009-06-11 18:27:28 +04:00
2019-09-20 05:32:20 +03:00
if ( tcon - > ses - > chan_max > 1 )
2020-06-10 03:50:40 +03:00
seq_printf ( s , " ,multichannel,max_channels=%zu " ,
2019-09-20 05:32:20 +03:00
tcon - > ses - > chan_max ) ;
2020-12-12 07:59:29 +03:00
if ( tcon - > use_witness )
seq_puts ( s , " ,witness " ) ;
2005-04-17 02:20:36 +04:00
return 0 ;
}
2008-04-24 15:21:56 +04:00
static void cifs_umount_begin ( struct super_block * sb )
2005-10-10 21:34:22 +04:00
{
2008-04-24 15:21:56 +04:00
struct cifs_sb_info * cifs_sb = CIFS_SB ( sb ) ;
2011-05-27 08:34:02 +04:00
struct cifs_tcon * tcon ;
2005-10-10 21:34:22 +04:00
2007-05-01 00:13:06 +04:00
if ( cifs_sb = = NULL )
2005-10-11 01:28:38 +04:00
return ;
2010-09-21 03:01:35 +04:00
tcon = cifs_sb_master_tcon ( cifs_sb ) ;
2008-11-15 19:12:47 +03:00
2010-10-18 21:59:37 +04:00
spin_lock ( & cifs_tcp_ses_lock ) ;
2022-07-27 22:49:56 +03:00
spin_lock ( & tcon - > tc_lock ) ;
smb3: cleanup and clarify status of tree connections
Currently the way the tid (tree connection) status is tracked
is confusing. The same enum is used for structs cifs_tcon
and cifs_ses and TCP_Server_info, but each of these three has
different states that they transition among. The current
code also unnecessarily uses camelCase.
Convert from use of statusEnum to a new tid_status_enum for
tree connections. The valid states for a tid are:
TID_NEW = 0,
TID_GOOD,
TID_EXITING,
TID_NEED_RECON,
TID_NEED_TCON,
TID_IN_TCON,
TID_NEED_FILES_INVALIDATE, /* unused, considering removing in future */
TID_IN_FILES_INVALIDATE
It also removes CifsNeedTcon, CifsInTcon, CifsNeedFilesInvalidate and
CifsInFilesInvalidate from the statusEnum used for session and
TCP_Server_Info since they are not relevant for those.
A follow on patch will fix the places where we use the
tcon->need_reconnect flag to be more consistent with the tid->status.
Also fixes a bug that was:
Reported-by: kernel test robot <lkp@intel.com>
Reviewed-by: Shyam Prasad N <sprasad@microsoft.com>
Reviewed-by: Ronnie Sahlberg <lsahlber@redhat.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
2022-03-28 00:07:30 +03:00
if ( ( tcon - > tc_count > 1 ) | | ( tcon - > status = = TID_EXITING ) ) {
2009-06-26 07:25:49 +04:00
/* we have other mounts to same share or we have
2023-03-24 00:20:02 +03:00
already tried to umount this and woken up
2009-06-26 07:25:49 +04:00
all waiting network requests , nothing to do */
2022-07-27 22:49:56 +03:00
spin_unlock ( & tcon - > tc_lock ) ;
2010-10-18 21:59:37 +04:00
spin_unlock ( & cifs_tcp_ses_lock ) ;
2009-06-26 07:25:49 +04:00
return ;
2023-03-24 00:20:02 +03:00
}
/*
* can not set tcon - > status to TID_EXITING yet since we don ' t know if umount - f will
* fail later ( e . g . due to open files ) . TID_EXITING will be set just before tdis req sent
*/
2022-07-27 22:49:56 +03:00
spin_unlock ( & tcon - > tc_lock ) ;
2010-10-18 21:59:37 +04:00
spin_unlock ( & cifs_tcp_ses_lock ) ;
2005-10-11 01:06:37 +04:00
2006-07-15 02:37:11 +04:00
/* cancel_brl_requests(tcon); */ /* BB mark all brl mids as exiting */
2005-11-10 02:21:09 +03:00
/* cancel_notify_requests(tcon); */
2007-07-13 04:33:32 +04:00
if ( tcon - > ses & & tcon - > ses - > server ) {
2013-05-05 07:12:25 +04:00
cifs_dbg ( FYI , " wake up tasks now - umount begin not complete \n " ) ;
2005-10-11 01:28:38 +04:00
wake_up_all ( & tcon - > ses - > server - > request_q ) ;
2005-11-30 07:55:11 +03:00
wake_up_all ( & tcon - > ses - > server - > response_q ) ;
msleep ( 1 ) ; /* yield */
/* we have to kick the requests once more */
wake_up_all ( & tcon - > ses - > server - > response_q ) ;
msleep ( 1 ) ;
2005-10-11 01:06:37 +04:00
}
2005-10-10 21:34:22 +04:00
return ;
}
2006-09-29 01:34:06 +04:00
# ifdef CONFIG_CIFS_STATS2
2011-12-09 05:51:13 +04:00
static int cifs_show_stats ( struct seq_file * s , struct dentry * root )
2006-09-29 01:34:06 +04:00
{
/* BB FIXME */
return 0 ;
}
# endif
cifs: Support fscache indexing rewrite
Change the cifs filesystem to take account of the changes to fscache's
indexing rewrite and reenable caching in cifs.
The following changes have been made:
(1) The fscache_netfs struct is no more, and there's no need to register
the filesystem as a whole.
(2) The session cookie is now an fscache_volume cookie, allocated with
fscache_acquire_volume(). That takes three parameters: a string
representing the "volume" in the index, a string naming the cache to
use (or NULL) and a u64 that conveys coherency metadata for the
volume.
For cifs, I've made it render the volume name string as:
"cifs,<ipaddress>,<sharename>"
where the sharename has '/' characters replaced with ';'.
This probably needs rethinking a bit as the total name could exceed
the maximum filename component length.
Further, the coherency data is currently just set to 0. It needs
something else doing with it - I wonder if it would suffice simply to
sum the resource_id, vol_create_time and vol_serial_number or maybe
hash them.
(3) The fscache_cookie_def is no more and needed information is passed
directly to fscache_acquire_cookie(). The cache no longer calls back
into the filesystem, but rather metadata changes are indicated at
other times.
fscache_acquire_cookie() is passed the same keying and coherency
information as before.
(4) The functions to set/reset cookies are removed and
fscache_use_cookie() and fscache_unuse_cookie() are used instead.
fscache_use_cookie() is passed a flag to indicate if the cookie is
opened for writing. fscache_unuse_cookie() is passed updates for the
metadata if we changed it (ie. if the file was opened for writing).
These are called when the file is opened or closed.
(5) cifs_setattr_*() are made to call fscache_resize() to change the size
of the cache object.
(6) The functions to read and write data are stubbed out pending a
conversion to use netfslib.
Changes
=======
ver #8:
- Abstract cache invalidation into a helper function.
- Fix some checkpatch warnings[3].
ver #7:
- Removed the accidentally added-back call to get the super cookie in
cifs_root_iget().
- Fixed the right call to cifs_fscache_get_super_cookie() to take account
of the "-o fsc" mount flag.
ver #6:
- Moved the change of gfpflags_allow_blocking() to current_is_kswapd() for
cifs here.
- Fixed one of the error paths in cifs_atomic_open() to jump around the
call to use the cookie.
- Fixed an additional successful return in the middle of cifs_open() to
use the cookie on the way out.
- Only get a volume cookie (and thus inode cookies) when "-o fsc" is
supplied to mount.
ver #5:
- Fixed a couple of bits of cookie handling[2]:
- The cookie should be released in cifs_evict_inode(), not
cifsFileInfo_put_final(). The cookie needs to persist beyond file
closure so that writepages will be able to write to it.
- fscache_use_cookie() needs to be called in cifs_atomic_open() as it is
for cifs_open().
ver #4:
- Fixed the use of sizeof with memset.
- tcon->vol_create_time is __le64 so doesn't need cpu_to_le64().
ver #3:
- Canonicalise the cifs coherency data to make the cache portable.
- Set volume coherency data.
ver #2:
- Use gfpflags_allow_blocking() rather than using flag directly.
- Upgraded to -rc4 to allow for upstream changes[1].
- fscache_acquire_volume() now returns errors.
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Jeff Layton <jlayton@kernel.org>
cc: Steve French <smfrench@gmail.com>
cc: Shyam Prasad N <nspmangalore@gmail.com>
cc: linux-cifs@vger.kernel.org
cc: linux-cachefs@redhat.com
Link: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=23b55d673d7527b093cd97b7c217c82e70cd1af0 [1]
Link: https://lore.kernel.org/r/3419813.1641592362@warthog.procyon.org.uk/ [2]
Link: https://lore.kernel.org/r/CAH2r5muTanw9pJqzAHd01d9A8keeChkzGsCEH6=0rHutVLAF-A@mail.gmail.com/ [3]
Link: https://lore.kernel.org/r/163819671009.215744.11230627184193298714.stgit@warthog.procyon.org.uk/ # v1
Link: https://lore.kernel.org/r/163906982979.143852.10672081929614953210.stgit@warthog.procyon.org.uk/ # v2
Link: https://lore.kernel.org/r/163967187187.1823006.247415138444991444.stgit@warthog.procyon.org.uk/ # v3
Link: https://lore.kernel.org/r/164021579335.640689.2681324337038770579.stgit@warthog.procyon.org.uk/ # v4
Link: https://lore.kernel.org/r/3462849.1641593783@warthog.procyon.org.uk/ # v5
Link: https://lore.kernel.org/r/1318953.1642024578@warthog.procyon.org.uk/ # v6
Signed-off-by: Steve French <stfrench@microsoft.com>
2020-11-17 18:56:59 +03:00
static int cifs_write_inode ( struct inode * inode , struct writeback_control * wbc )
{
fscache_unpin_writeback ( wbc , cifs_inode_cookie ( inode ) ) ;
return 0 ;
}
2010-06-07 21:43:19 +04:00
static int cifs_drop_inode ( struct inode * inode )
2010-06-01 22:47:40 +04:00
{
struct cifs_sb_info * cifs_sb = CIFS_SB ( inode - > i_sb ) ;
2010-06-07 21:43:19 +04:00
/* no serverino => unconditional eviction */
return ! ( cifs_sb - > mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM ) | |
generic_drop_inode ( inode ) ;
2010-06-01 22:47:40 +04:00
}
2007-02-12 11:55:41 +03:00
static const struct super_operations cifs_super_ops = {
2005-04-17 02:20:36 +04:00
. statfs = cifs_statfs ,
. alloc_inode = cifs_alloc_inode ,
cifs: Support fscache indexing rewrite
Change the cifs filesystem to take account of the changes to fscache's
indexing rewrite and reenable caching in cifs.
The following changes have been made:
(1) The fscache_netfs struct is no more, and there's no need to register
the filesystem as a whole.
(2) The session cookie is now an fscache_volume cookie, allocated with
fscache_acquire_volume(). That takes three parameters: a string
representing the "volume" in the index, a string naming the cache to
use (or NULL) and a u64 that conveys coherency metadata for the
volume.
For cifs, I've made it render the volume name string as:
"cifs,<ipaddress>,<sharename>"
where the sharename has '/' characters replaced with ';'.
This probably needs rethinking a bit as the total name could exceed
the maximum filename component length.
Further, the coherency data is currently just set to 0. It needs
something else doing with it - I wonder if it would suffice simply to
sum the resource_id, vol_create_time and vol_serial_number or maybe
hash them.
(3) The fscache_cookie_def is no more and needed information is passed
directly to fscache_acquire_cookie(). The cache no longer calls back
into the filesystem, but rather metadata changes are indicated at
other times.
fscache_acquire_cookie() is passed the same keying and coherency
information as before.
(4) The functions to set/reset cookies are removed and
fscache_use_cookie() and fscache_unuse_cookie() are used instead.
fscache_use_cookie() is passed a flag to indicate if the cookie is
opened for writing. fscache_unuse_cookie() is passed updates for the
metadata if we changed it (ie. if the file was opened for writing).
These are called when the file is opened or closed.
(5) cifs_setattr_*() are made to call fscache_resize() to change the size
of the cache object.
(6) The functions to read and write data are stubbed out pending a
conversion to use netfslib.
Changes
=======
ver #8:
- Abstract cache invalidation into a helper function.
- Fix some checkpatch warnings[3].
ver #7:
- Removed the accidentally added-back call to get the super cookie in
cifs_root_iget().
- Fixed the right call to cifs_fscache_get_super_cookie() to take account
of the "-o fsc" mount flag.
ver #6:
- Moved the change of gfpflags_allow_blocking() to current_is_kswapd() for
cifs here.
- Fixed one of the error paths in cifs_atomic_open() to jump around the
call to use the cookie.
- Fixed an additional successful return in the middle of cifs_open() to
use the cookie on the way out.
- Only get a volume cookie (and thus inode cookies) when "-o fsc" is
supplied to mount.
ver #5:
- Fixed a couple of bits of cookie handling[2]:
- The cookie should be released in cifs_evict_inode(), not
cifsFileInfo_put_final(). The cookie needs to persist beyond file
closure so that writepages will be able to write to it.
- fscache_use_cookie() needs to be called in cifs_atomic_open() as it is
for cifs_open().
ver #4:
- Fixed the use of sizeof with memset.
- tcon->vol_create_time is __le64 so doesn't need cpu_to_le64().
ver #3:
- Canonicalise the cifs coherency data to make the cache portable.
- Set volume coherency data.
ver #2:
- Use gfpflags_allow_blocking() rather than using flag directly.
- Upgraded to -rc4 to allow for upstream changes[1].
- fscache_acquire_volume() now returns errors.
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Jeff Layton <jlayton@kernel.org>
cc: Steve French <smfrench@gmail.com>
cc: Shyam Prasad N <nspmangalore@gmail.com>
cc: linux-cifs@vger.kernel.org
cc: linux-cachefs@redhat.com
Link: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=23b55d673d7527b093cd97b7c217c82e70cd1af0 [1]
Link: https://lore.kernel.org/r/3419813.1641592362@warthog.procyon.org.uk/ [2]
Link: https://lore.kernel.org/r/CAH2r5muTanw9pJqzAHd01d9A8keeChkzGsCEH6=0rHutVLAF-A@mail.gmail.com/ [3]
Link: https://lore.kernel.org/r/163819671009.215744.11230627184193298714.stgit@warthog.procyon.org.uk/ # v1
Link: https://lore.kernel.org/r/163906982979.143852.10672081929614953210.stgit@warthog.procyon.org.uk/ # v2
Link: https://lore.kernel.org/r/163967187187.1823006.247415138444991444.stgit@warthog.procyon.org.uk/ # v3
Link: https://lore.kernel.org/r/164021579335.640689.2681324337038770579.stgit@warthog.procyon.org.uk/ # v4
Link: https://lore.kernel.org/r/3462849.1641593783@warthog.procyon.org.uk/ # v5
Link: https://lore.kernel.org/r/1318953.1642024578@warthog.procyon.org.uk/ # v6
Signed-off-by: Steve French <stfrench@microsoft.com>
2020-11-17 18:56:59 +03:00
. write_inode = cifs_write_inode ,
2019-04-15 06:18:35 +03:00
. free_inode = cifs_free_inode ,
2010-06-01 22:47:40 +04:00
. drop_inode = cifs_drop_inode ,
2010-06-07 22:34:48 +04:00
. evict_inode = cifs_evict_inode ,
2020-12-15 05:08:10 +03:00
/* .show_path = cifs_show_path, */ /* Would we ever need show path? */
. show_devname = cifs_show_devname ,
2010-06-01 22:47:40 +04:00
/* .delete_inode = cifs_delete_inode, */ /* Do not need above
function unless later we add lazy close of inodes or unless the
2007-07-13 04:33:32 +04:00
kernel forgets to call us with the same number of releases ( closes )
as opens */
2005-04-17 02:20:36 +04:00
. show_options = cifs_show_options ,
2005-11-10 02:21:09 +03:00
. umount_begin = cifs_umount_begin ,
2006-09-29 01:34:06 +04:00
# ifdef CONFIG_CIFS_STATS2
2006-09-30 05:08:55 +04:00
. show_stats = cifs_show_stats ,
2006-09-29 01:34:06 +04:00
# endif
2005-04-17 02:20:36 +04:00
} ;
2011-05-27 07:50:55 +04:00
/*
* Get root dentry from superblock according to prefix path mount option .
* Return dentry with refcount + 1 on success and NULL otherwise .
*/
static struct dentry *
2020-12-10 08:07:12 +03:00
cifs_get_root ( struct smb3_fs_context * ctx , struct super_block * sb )
2011-05-27 07:50:55 +04:00
{
2011-07-18 21:50:40 +04:00
struct dentry * dentry ;
2011-05-27 07:50:55 +04:00
struct cifs_sb_info * cifs_sb = CIFS_SB ( sb ) ;
2011-07-18 21:50:40 +04:00
char * full_path = NULL ;
char * s , * p ;
2011-05-27 07:50:55 +04:00
char sep ;
2016-07-30 00:38:21 +03:00
if ( cifs_sb - > mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH )
return dget ( sb - > s_root ) ;
2020-12-10 08:07:12 +03:00
full_path = cifs_build_path_to_root ( ctx , cifs_sb ,
2016-12-15 10:01:19 +03:00
cifs_sb_master_tcon ( cifs_sb ) , 0 ) ;
2011-05-27 07:50:55 +04:00
if ( full_path = = NULL )
2011-06-17 18:02:59 +04:00
return ERR_PTR ( - ENOMEM ) ;
2011-05-27 07:50:55 +04:00
2013-05-05 07:12:25 +04:00
cifs_dbg ( FYI , " Get root dentry for %s \n " , full_path ) ;
2011-05-27 07:50:55 +04:00
sep = CIFS_DIR_SEP ( cifs_sb ) ;
2011-07-18 21:50:40 +04:00
dentry = dget ( sb - > s_root ) ;
2021-12-21 03:48:09 +03:00
s = full_path ;
2011-07-18 21:50:40 +04:00
do {
2015-03-18 01:25:59 +03:00
struct inode * dir = d_inode ( dentry ) ;
2011-07-18 21:50:40 +04:00
struct dentry * child ;
2013-02-02 00:11:01 +04:00
if ( ! S_ISDIR ( dir - > i_mode ) ) {
dput ( dentry ) ;
dentry = ERR_PTR ( - ENOTDIR ) ;
break ;
}
2011-08-21 19:30:15 +04:00
2011-07-18 21:50:40 +04:00
/* skip separators */
while ( * s = = sep )
s + + ;
if ( ! * s )
break ;
p = s + + ;
/* next separator */
while ( * s & & * s ! = sep )
s + + ;
2019-10-31 08:21:58 +03:00
child = lookup_positive_unlocked ( p , dentry , s - p ) ;
2011-07-18 21:50:40 +04:00
dput ( dentry ) ;
dentry = child ;
} while ( ! IS_ERR ( dentry ) ) ;
2011-05-27 07:50:55 +04:00
kfree ( full_path ) ;
2011-07-18 21:50:40 +04:00
return dentry ;
2011-05-27 07:50:55 +04:00
}
2011-06-17 17:47:23 +04:00
static int cifs_set_super ( struct super_block * sb , void * data )
{
struct cifs_mnt_data * mnt_data = data ;
sb - > s_fs_info = mnt_data - > cifs_sb ;
return set_anon_super ( sb , NULL ) ;
}
2020-12-10 09:06:02 +03:00
struct dentry *
2018-06-07 01:59:29 +03:00
cifs_smb3_do_mount ( struct file_system_type * fs_type ,
2020-12-10 09:06:02 +03:00
int flags , struct smb3_fs_context * old_ctx )
2005-04-17 02:20:36 +04:00
{
int rc ;
2022-05-31 06:01:17 +03:00
struct super_block * sb = NULL ;
2020-11-10 01:59:26 +03:00
struct cifs_sb_info * cifs_sb = NULL ;
2011-05-26 23:35:47 +04:00
struct cifs_mnt_data mnt_data ;
2011-05-05 13:55:12 +04:00
struct dentry * root ;
2005-04-17 02:20:36 +04:00
2018-10-07 21:52:18 +03:00
/*
* Prints in Kernel / CIFS log the attempted mount operation
* If CIFS_DEBUG & & cifs_FYI
*/
2018-10-07 18:21:26 +03:00
if ( cifsFYI )
2020-12-10 09:06:02 +03:00
cifs_dbg ( FYI , " Devname: %s flags: %d \n " , old_ctx - > UNC , flags ) ;
2018-10-07 18:21:26 +03:00
else
2020-12-10 09:06:02 +03:00
cifs_info ( " Attempting to mount %s \n " , old_ctx - > UNC ) ;
2020-11-10 01:59:26 +03:00
cifs_sb = kzalloc ( sizeof ( struct cifs_sb_info ) , GFP_KERNEL ) ;
if ( cifs_sb = = NULL ) {
root = ERR_PTR ( - ENOMEM ) ;
2020-12-10 09:06:02 +03:00
goto out ;
}
2005-04-17 02:20:36 +04:00
2020-11-10 01:59:26 +03:00
cifs_sb - > ctx = kzalloc ( sizeof ( struct smb3_fs_context ) , GFP_KERNEL ) ;
if ( ! cifs_sb - > ctx ) {
root = ERR_PTR ( - ENOMEM ) ;
goto out ;
}
rc = smb3_fs_context_dup ( cifs_sb - > ctx , old_ctx ) ;
2020-12-10 09:06:02 +03:00
if ( rc ) {
root = ERR_PTR ( rc ) ;
goto out ;
}
2011-05-05 13:55:12 +04:00
2020-12-14 09:40:24 +03:00
rc = cifs_setup_cifs_sb ( cifs_sb ) ;
2016-07-30 00:38:19 +03:00
if ( rc ) {
root = ERR_PTR ( rc ) ;
2020-11-10 01:59:26 +03:00
goto out ;
2016-05-25 20:59:09 +03:00
}
2020-11-10 01:59:26 +03:00
rc = cifs_mount ( cifs_sb , cifs_sb - > ctx ) ;
2011-06-17 17:29:57 +04:00
if ( rc ) {
2017-11-28 00:05:09 +03:00
if ( ! ( flags & SB_SILENT ) )
2013-05-05 07:12:25 +04:00
cifs_dbg ( VFS , " cifs_mount failed w/return code = %d \n " ,
rc ) ;
2011-06-17 17:29:57 +04:00
root = ERR_PTR ( rc ) ;
2020-11-10 01:59:26 +03:00
goto out ;
2011-06-17 17:29:57 +04:00
}
2020-11-10 01:59:26 +03:00
mnt_data . ctx = cifs_sb - > ctx ;
2011-05-26 23:35:47 +04:00
mnt_data . cifs_sb = cifs_sb ;
mnt_data . flags = flags ;
2012-06-25 15:55:37 +04:00
/* BB should we make this contingent on mount parm? */
2017-11-28 00:05:09 +03:00
flags | = SB_NODIRATIME | SB_NOATIME ;
2012-06-25 15:55:37 +04:00
sb = sget ( fs_type , cifs_match_super , cifs_set_super , flags , & mnt_data ) ;
2011-05-05 13:55:12 +04:00
if ( IS_ERR ( sb ) ) {
root = ERR_CAST ( sb ) ;
2011-06-17 17:29:57 +04:00
cifs_umount ( cifs_sb ) ;
2020-12-16 01:51:33 +03:00
cifs_sb = NULL ;
2011-06-17 17:42:43 +04:00
goto out ;
2011-05-05 13:55:12 +04:00
}
2005-04-17 02:20:36 +04:00
2011-06-17 17:47:23 +04:00
if ( sb - > s_root ) {
2013-05-05 07:12:25 +04:00
cifs_dbg ( FYI , " Use existing superblock \n " ) ;
2011-06-17 17:29:57 +04:00
cifs_umount ( cifs_sb ) ;
2020-12-16 01:51:33 +03:00
cifs_sb = NULL ;
2011-06-17 17:56:55 +04:00
} else {
rc = cifs_read_super ( sb ) ;
if ( rc ) {
root = ERR_PTR ( rc ) ;
goto out_super ;
}
2011-05-25 13:35:34 +04:00
2017-11-28 00:05:09 +03:00
sb - > s_flags | = SB_ACTIVE ;
2005-04-17 02:20:36 +04:00
}
2011-05-05 13:55:12 +04:00
2020-12-16 01:51:33 +03:00
root = cifs_get_root ( cifs_sb ? cifs_sb - > ctx : old_ctx , sb ) ;
2011-06-17 18:02:59 +04:00
if ( IS_ERR ( root ) )
2011-05-27 07:50:55 +04:00
goto out_super ;
2011-05-26 23:35:47 +04:00
2021-03-09 02:07:30 +03:00
if ( cifs_sb )
cifs_sb - > root = dget ( root ) ;
2013-05-05 07:12:25 +04:00
cifs_dbg ( FYI , " dentry root is: %p \n " , root ) ;
2020-11-10 01:59:26 +03:00
return root ;
2011-05-05 13:55:12 +04:00
2011-05-26 00:02:16 +04:00
out_super :
deactivate_locked_super ( sb ) ;
2022-02-10 19:59:15 +03:00
return root ;
2011-05-26 00:02:16 +04:00
out :
2020-11-10 01:59:26 +03:00
if ( cifs_sb ) {
2022-05-31 06:01:17 +03:00
if ( ! sb | | IS_ERR ( sb ) ) { /* otherwise kill_sb will handle */
kfree ( cifs_sb - > prepath ) ;
smb3_cleanup_fs_context ( cifs_sb - > ctx ) ;
kfree ( cifs_sb ) ;
}
2020-11-10 01:59:26 +03:00
}
2011-05-05 13:55:12 +04:00
return root ;
2005-04-17 02:20:36 +04:00
}
2018-06-07 01:59:29 +03:00
2014-05-23 14:50:21 +04:00
static ssize_t
cifs_loose_read_iter ( struct kiocb * iocb , struct iov_iter * iter )
{
ssize_t rc ;
struct inode * inode = file_inode ( iocb - > ki_filp ) ;
2022-04-07 02:03:14 +03:00
if ( iocb - > ki_flags & IOCB_DIRECT )
2015-12-02 17:46:07 +03:00
return cifs_user_readv ( iocb , iter ) ;
2014-05-23 14:50:21 +04:00
rc = cifs_revalidate_mapping ( inode ) ;
if ( rc )
return rc ;
return generic_file_read_iter ( iocb , iter ) ;
}
2014-04-03 20:05:17 +04:00
static ssize_t cifs_file_write_iter ( struct kiocb * iocb , struct iov_iter * from )
2005-04-17 02:20:36 +04:00
{
2013-01-24 02:07:38 +04:00
struct inode * inode = file_inode ( iocb - > ki_filp ) ;
2014-03-11 20:11:47 +04:00
struct cifsInodeInfo * cinode = CIFS_I ( inode ) ;
2005-04-17 02:20:36 +04:00
ssize_t written ;
2011-01-24 22:16:35 +03:00
int rc ;
2005-04-17 02:20:36 +04:00
2015-12-02 17:46:07 +03:00
if ( iocb - > ki_filp - > f_flags & O_DIRECT ) {
written = cifs_user_writev ( iocb , from ) ;
if ( written > 0 & & CIFS_CACHE_READ ( cinode ) ) {
cifs_zap_mapping ( inode ) ;
cifs_dbg ( FYI ,
" Set no oplock for inode=%p after a write operation \n " ,
inode ) ;
cinode - > oplock = 0 ;
}
return written ;
}
2014-03-11 20:11:47 +04:00
written = cifs_get_writer ( cinode ) ;
if ( written )
return written ;
2014-04-03 20:05:17 +04:00
written = generic_file_write_iter ( iocb , from ) ;
2011-01-24 22:16:35 +03:00
2013-09-05 13:01:06 +04:00
if ( CIFS_CACHE_WRITE ( CIFS_I ( inode ) ) )
2014-03-11 20:11:47 +04:00
goto out ;
2011-01-24 22:16:35 +03:00
rc = filemap_fdatawrite ( inode - > i_mapping ) ;
if ( rc )
2014-04-03 20:05:17 +04:00
cifs_dbg ( FYI , " cifs_file_write_iter: %d rc on %p inode \n " ,
2013-05-05 07:12:25 +04:00
rc , inode ) ;
2011-01-24 22:16:35 +03:00
2014-03-11 20:11:47 +04:00
out :
cifs_put_writer ( cinode ) ;
2005-04-17 02:20:36 +04:00
return written ;
}
2012-12-18 03:59:39 +04:00
static loff_t cifs_llseek ( struct file * file , loff_t offset , int whence )
2006-01-13 01:41:28 +03:00
{
2019-05-15 00:17:02 +03:00
struct cifsFileInfo * cfile = file - > private_data ;
struct cifs_tcon * tcon ;
2011-07-18 21:21:38 +04:00
/*
2012-12-18 03:59:39 +04:00
* whence = = SEEK_END | | SEEK_DATA | | SEEK_HOLE = > we must revalidate
2011-07-18 21:21:38 +04:00
* the cached file length
*/
2012-12-18 03:59:39 +04:00
if ( whence ! = SEEK_SET & & whence ! = SEEK_CUR ) {
2011-04-07 18:18:11 +04:00
int rc ;
2013-01-24 02:07:38 +04:00
struct inode * inode = file_inode ( file ) ;
2011-04-07 18:18:11 +04:00
/*
* We need to be sure that all dirty pages are written and the
* server has the newest file length .
*/
2013-09-05 13:01:06 +04:00
if ( ! CIFS_CACHE_READ ( CIFS_I ( inode ) ) & & inode - > i_mapping & &
2011-04-07 18:18:11 +04:00
inode - > i_mapping - > nrpages ! = 0 ) {
rc = filemap_fdatawait ( inode - > i_mapping ) ;
2011-05-20 21:00:01 +04:00
if ( rc ) {
mapping_set_error ( inode - > i_mapping , rc ) ;
return rc ;
}
2011-04-07 18:18:11 +04:00
}
/*
* Some applications poll for the file length in this strange
* way so we must seek to end on non - oplocked files by
* setting the revalidate time to zero .
*/
CIFS_I ( inode ) - > time = 0 ;
rc = cifs_revalidate_file_attr ( file ) ;
if ( rc < 0 )
return ( loff_t ) rc ;
2006-01-13 01:41:28 +03:00
}
2019-05-15 00:17:02 +03:00
if ( cfile & & cfile - > tlink ) {
tcon = tlink_tcon ( cfile - > tlink ) ;
if ( tcon - > ses - > server - > ops - > llseek )
return tcon - > ses - > server - > ops - > llseek ( file , tcon ,
offset , whence ) ;
}
2012-12-18 03:59:39 +04:00
return generic_file_llseek ( file , offset , whence ) ;
2006-01-13 01:41:28 +03:00
}
2014-08-22 18:40:25 +04:00
static int
cifs_setlease ( struct file * file , long arg , struct file_lock * * lease , void * * priv )
2008-10-23 08:42:37 +04:00
{
2013-09-05 13:01:06 +04:00
/*
* Note that this is called by vfs setlease with i_lock held to
* protect * lease from going away .
*/
2013-01-24 02:07:38 +04:00
struct inode * inode = file_inode ( file ) ;
2010-09-21 03:01:31 +04:00
struct cifsFileInfo * cfile = file - > private_data ;
2008-10-23 08:42:37 +04:00
if ( ! ( S_ISREG ( inode - > i_mode ) ) )
return - EINVAL ;
2014-08-09 18:16:44 +04:00
/* Check if file is oplocked if this is request for new lease */
if ( arg = = F_UNLCK | |
( ( arg = = F_RDLCK ) & & CIFS_CACHE_READ ( CIFS_I ( inode ) ) ) | |
2013-09-05 13:01:06 +04:00
( ( arg = = F_WRLCK ) & & CIFS_CACHE_WRITE ( CIFS_I ( inode ) ) ) )
2014-08-22 18:40:25 +04:00
return generic_setlease ( file , arg , lease , priv ) ;
2010-09-30 03:51:11 +04:00
else if ( tlink_tcon ( cfile - > tlink ) - > local_lease & &
2013-09-05 13:01:06 +04:00
! CIFS_CACHE_READ ( CIFS_I ( inode ) ) )
/*
* If the server claims to support oplock on this file , then we
* still need to check oplock even if the local_lease mount
* option is set , but there are servers which do not support
* oplock for which this mount option may be useful if the user
* knows that the file won ' t be changed on the server by anyone
* else .
*/
2014-08-22 18:40:25 +04:00
return generic_setlease ( file , arg , lease , priv ) ;
2010-10-31 15:35:10 +03:00
else
2008-10-23 08:42:37 +04:00
return - EAGAIN ;
}
2008-01-11 04:49:48 +03:00
struct file_system_type cifs_fs_type = {
2005-04-17 02:20:36 +04:00
. owner = THIS_MODULE ,
. name = " cifs " ,
2020-12-10 09:06:02 +03:00
. init_fs_context = smb3_init_fs_context ,
. parameters = smb3_fs_parameters ,
2011-06-17 16:34:57 +04:00
. kill_sb = cifs_kill_sb ,
2020-02-26 03:08:54 +03:00
. fs_flags = FS_RENAME_DOES_D_MOVE ,
2005-04-17 02:20:36 +04:00
} ;
2013-03-11 18:05:42 +04:00
MODULE_ALIAS_FS ( " cifs " ) ;
2018-05-24 05:44:53 +03:00
2022-06-06 01:54:26 +03:00
struct file_system_type smb3_fs_type = {
2018-05-24 05:44:53 +03:00
. owner = THIS_MODULE ,
. name = " smb3 " ,
2020-12-10 09:06:02 +03:00
. init_fs_context = smb3_init_fs_context ,
. parameters = smb3_fs_parameters ,
2018-05-24 05:44:53 +03:00
. kill_sb = cifs_kill_sb ,
2020-02-26 03:08:54 +03:00
. fs_flags = FS_RENAME_DOES_D_MOVE ,
2018-05-24 05:44:53 +03:00
} ;
MODULE_ALIAS_FS ( " smb3 " ) ;
MODULE_ALIAS ( " smb3 " ) ;
2007-02-12 11:55:38 +03:00
const struct inode_operations cifs_dir_inode_ops = {
2005-04-17 02:20:36 +04:00
. create = cifs_create ,
2012-06-05 17:10:23 +04:00
. atomic_open = cifs_atomic_open ,
2005-04-17 02:20:36 +04:00
. lookup = cifs_lookup ,
. getattr = cifs_getattr ,
. unlink = cifs_unlink ,
. link = cifs_hardlink ,
. mkdir = cifs_mkdir ,
. rmdir = cifs_rmdir ,
2016-09-27 12:03:58 +03:00
. rename = cifs_rename2 ,
2005-04-17 02:20:36 +04:00
. permission = cifs_permission ,
. setattr = cifs_setattr ,
. symlink = cifs_symlink ,
. mknod = cifs_mknod ,
. listxattr = cifs_listxattr ,
2022-09-22 18:17:02 +03:00
. get_acl = cifs_get_acl ,
2022-09-22 18:17:03 +03:00
. set_acl = cifs_set_acl ,
2005-04-17 02:20:36 +04:00
} ;
2007-02-12 11:55:38 +03:00
const struct inode_operations cifs_file_inode_ops = {
2005-04-17 02:20:36 +04:00
. setattr = cifs_setattr ,
2016-05-19 04:48:32 +03:00
. getattr = cifs_getattr ,
2005-04-17 02:20:36 +04:00
. permission = cifs_permission ,
. listxattr = cifs_listxattr ,
2019-04-25 09:45:29 +03:00
. fiemap = cifs_fiemap ,
2022-09-22 18:17:02 +03:00
. get_acl = cifs_get_acl ,
2022-09-22 18:17:03 +03:00
. set_acl = cifs_set_acl ,
2005-04-17 02:20:36 +04:00
} ;
2022-11-04 10:44:41 +03:00
const char * cifs_get_link ( struct dentry * dentry , struct inode * inode ,
struct delayed_call * done )
{
char * target_path ;
target_path = kmalloc ( PATH_MAX , GFP_KERNEL ) ;
if ( ! target_path )
return ERR_PTR ( - ENOMEM ) ;
spin_lock ( & inode - > i_lock ) ;
if ( likely ( CIFS_I ( inode ) - > symlink_target ) ) {
strscpy ( target_path , CIFS_I ( inode ) - > symlink_target , PATH_MAX ) ;
} else {
kfree ( target_path ) ;
target_path = ERR_PTR ( - EOPNOTSUPP ) ;
}
spin_unlock ( & inode - > i_lock ) ;
if ( ! IS_ERR ( target_path ) )
set_delayed_call ( done , kfree_link , target_path ) ;
return target_path ;
}
2007-02-12 11:55:38 +03:00
const struct inode_operations cifs_symlink_inode_ops = {
2022-11-04 10:44:41 +03:00
. get_link = cifs_get_link ,
2005-04-17 02:20:36 +04:00
. permission = cifs_permission ,
. listxattr = cifs_listxattr ,
} ;
2018-10-30 02:41:49 +03:00
static loff_t cifs_remap_file_range ( struct file * src_file , loff_t off ,
struct file * dst_file , loff_t destoff , loff_t len ,
2018-10-30 02:41:21 +03:00
unsigned int remap_flags )
2015-12-03 14:59:50 +03:00
{
struct inode * src_inode = file_inode ( src_file ) ;
struct inode * target_inode = file_inode ( dst_file ) ;
struct cifsFileInfo * smb_file_src = src_file - > private_data ;
2018-11-01 16:14:30 +03:00
struct cifsFileInfo * smb_file_target ;
struct cifs_tcon * target_tcon ;
2015-12-03 14:59:50 +03:00
unsigned int xid ;
int rc ;
2019-03-16 07:11:54 +03:00
if ( remap_flags & ~ ( REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY ) )
2018-10-30 02:41:21 +03:00
return - EINVAL ;
2015-12-03 14:59:50 +03:00
cifs_dbg ( FYI , " clone range \n " ) ;
xid = get_xid ( ) ;
if ( ! src_file - > private_data | | ! dst_file - > private_data ) {
rc = - EBADF ;
cifs_dbg ( VFS , " missing cifsFileInfo on copy range src file \n " ) ;
goto out ;
}
2018-11-01 16:14:30 +03:00
smb_file_target = dst_file - > private_data ;
target_tcon = tlink_tcon ( smb_file_target - > tlink ) ;
2015-12-03 14:59:50 +03:00
/*
* Note : cifs case is easier than btrfs since server responsible for
* checks for proper open modes and file type and if it wants
* server could even support copy of range where source = target
*/
lock_two_nondirectories ( target_inode , src_inode ) ;
if ( len = = 0 )
len = src_inode - > i_size - off ;
cifs_dbg ( FYI , " about to flush pages \n " ) ;
/* should we flush first and last page first */
truncate_inode_pages_range ( & target_inode - > i_data , destoff ,
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros
PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time
ago with promise that one day it will be possible to implement page
cache with bigger chunks than PAGE_SIZE.
This promise never materialized. And unlikely will.
We have many places where PAGE_CACHE_SIZE assumed to be equal to
PAGE_SIZE. And it's constant source of confusion on whether
PAGE_CACHE_* or PAGE_* constant should be used in a particular case,
especially on the border between fs and mm.
Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much
breakage to be doable.
Let's stop pretending that pages in page cache are special. They are
not.
The changes are pretty straight-forward:
- <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN};
- page_cache_get() -> get_page();
- page_cache_release() -> put_page();
This patch contains automated changes generated with coccinelle using
script below. For some reason, coccinelle doesn't patch header files.
I've called spatch for them manually.
The only adjustment after coccinelle is revert of changes to
PAGE_CAHCE_ALIGN definition: we are going to drop it later.
There are few places in the code where coccinelle didn't reach. I'll
fix them manually in a separate patch. Comments and documentation also
will be addressed with the separate patch.
virtual patch
@@
expression E;
@@
- E << (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
expression E;
@@
- E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
@@
- PAGE_CACHE_SHIFT
+ PAGE_SHIFT
@@
@@
- PAGE_CACHE_SIZE
+ PAGE_SIZE
@@
@@
- PAGE_CACHE_MASK
+ PAGE_MASK
@@
expression E;
@@
- PAGE_CACHE_ALIGN(E)
+ PAGE_ALIGN(E)
@@
expression E;
@@
- page_cache_get(E)
+ get_page(E)
@@
expression E;
@@
- page_cache_release(E)
+ put_page(E)
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 15:29:47 +03:00
PAGE_ALIGN ( destoff + len ) - 1 ) ;
2015-12-03 14:59:50 +03:00
if ( target_tcon - > ses - > server - > ops - > duplicate_extents )
rc = target_tcon - > ses - > server - > ops - > duplicate_extents ( xid ,
smb_file_src , smb_file_target , off , len , destoff ) ;
else
rc = - EOPNOTSUPP ;
/* force revalidate of size and timestamps of target file now
that target is updated on the server */
CIFS_I ( target_inode ) - > time = 0 ;
/* although unlocking in the reverse order from locking is not
strictly necessary here it is a little cleaner to be consistent */
unlock_two_nondirectories ( src_inode , target_inode ) ;
out :
free_xid ( xid ) ;
2018-10-30 02:41:49 +03:00
return rc < 0 ? rc : len ;
2015-12-03 14:59:50 +03:00
}
2017-02-10 13:33:51 +03:00
ssize_t cifs_file_copychunk_range ( unsigned int xid ,
struct file * src_file , loff_t off ,
struct file * dst_file , loff_t destoff ,
size_t len , unsigned int flags )
{
struct inode * src_inode = file_inode ( src_file ) ;
struct inode * target_inode = file_inode ( dst_file ) ;
struct cifsFileInfo * smb_file_src ;
struct cifsFileInfo * smb_file_target ;
struct cifs_tcon * src_tcon ;
struct cifs_tcon * target_tcon ;
ssize_t rc ;
cifs_dbg ( FYI , " copychunk range \n " ) ;
if ( ! src_file - > private_data | | ! dst_file - > private_data ) {
rc = - EBADF ;
cifs_dbg ( VFS , " missing cifsFileInfo on copy range src file \n " ) ;
goto out ;
}
rc = - EXDEV ;
smb_file_target = dst_file - > private_data ;
smb_file_src = src_file - > private_data ;
src_tcon = tlink_tcon ( smb_file_src - > tlink ) ;
target_tcon = tlink_tcon ( smb_file_target - > tlink ) ;
if ( src_tcon - > ses ! = target_tcon - > ses ) {
cifs_dbg ( VFS , " source and target of copy not on same server \n " ) ;
goto out ;
}
2019-06-10 20:36:57 +03:00
rc = - EOPNOTSUPP ;
if ( ! target_tcon - > ses - > server - > ops - > copychunk_range )
goto out ;
2017-02-10 13:33:51 +03:00
/*
* Note : cifs case is easier than btrfs since server responsible for
* checks for proper open modes and file type and if it wants
* server could even support copy of range where source = target
*/
lock_two_nondirectories ( target_inode , src_inode ) ;
cifs_dbg ( FYI , " about to flush pages \n " ) ;
2022-08-29 19:53:41 +03:00
rc = filemap_write_and_wait_range ( src_inode - > i_mapping , off ,
off + len - 1 ) ;
if ( rc )
2022-11-19 07:51:59 +03:00
goto unlock ;
2022-08-29 19:53:41 +03:00
2017-02-10 13:33:51 +03:00
/* should we flush first and last page first */
truncate_inode_pages ( & target_inode - > i_data , 0 ) ;
2019-06-10 20:36:57 +03:00
rc = file_modified ( dst_file ) ;
if ( ! rc )
2017-02-10 13:33:51 +03:00
rc = target_tcon - > ses - > server - > ops - > copychunk_range ( xid ,
smb_file_src , smb_file_target , off , len , destoff ) ;
2019-06-10 20:36:57 +03:00
file_accessed ( src_file ) ;
2017-02-10 13:33:51 +03:00
/* force revalidate of size and timestamps of target file now
* that target is updated on the server
*/
CIFS_I ( target_inode ) - > time = 0 ;
2022-11-19 07:51:59 +03:00
unlock :
2017-02-10 13:33:51 +03:00
/* although unlocking in the reverse order from locking is not
* strictly necessary here it is a little cleaner to be consistent
*/
unlock_two_nondirectories ( src_inode , target_inode ) ;
out :
return rc ;
}
2018-05-10 18:59:37 +03:00
/*
* Directory operations under CIFS / SMB2 / SMB3 are synchronous , so fsync ( )
* is a dummy operation .
*/
static int cifs_dir_fsync ( struct file * file , loff_t start , loff_t end , int datasync )
{
cifs_dbg ( FYI , " Sync directory - name: %pD datasync: 0x%x \n " ,
file , datasync ) ;
return 0 ;
}
2017-02-10 13:33:51 +03:00
static ssize_t cifs_copy_file_range ( struct file * src_file , loff_t off ,
struct file * dst_file , loff_t destoff ,
size_t len , unsigned int flags )
{
unsigned int xid = get_xid ( ) ;
ssize_t rc ;
2020-04-10 05:42:18 +03:00
struct cifsFileInfo * cfile = dst_file - > private_data ;
2022-10-17 17:45:22 +03:00
if ( cfile - > swapfile ) {
rc = - EOPNOTSUPP ;
free_xid ( xid ) ;
return rc ;
}
2017-02-10 13:33:51 +03:00
rc = cifs_file_copychunk_range ( xid , src_file , off , dst_file , destoff ,
len , flags ) ;
free_xid ( xid ) ;
2019-06-05 18:04:47 +03:00
2019-06-05 18:04:50 +03:00
if ( rc = = - EOPNOTSUPP | | rc = = - EXDEV )
2019-06-05 18:04:47 +03:00
rc = generic_copy_file_range ( src_file , off , dst_file ,
destoff , len , flags ) ;
2017-02-10 13:33:51 +03:00
return rc ;
}
2006-03-28 13:56:42 +04:00
const struct file_operations cifs_file_ops = {
2014-05-23 14:50:21 +04:00
. read_iter = cifs_loose_read_iter ,
2014-04-03 20:05:17 +04:00
. write_iter = cifs_file_write_iter ,
2005-04-17 02:20:36 +04:00
. open = cifs_open ,
. release = cifs_close ,
. lock = cifs_lock ,
2019-07-17 02:55:38 +03:00
. flock = cifs_flock ,
2005-04-17 02:20:36 +04:00
. fsync = cifs_fsync ,
. flush = cifs_flush ,
. mmap = cifs_file_mmap ,
2022-11-01 17:52:47 +03:00
. splice_read = cifs_splice_read ,
2017-12-28 16:23:08 +03:00
. splice_write = iter_file_splice_write ,
2006-01-13 01:41:28 +03:00
. llseek = cifs_llseek ,
2008-05-15 09:51:55 +04:00
. unlocked_ioctl = cifs_ioctl ,
2017-02-10 13:33:51 +03:00
. copy_file_range = cifs_copy_file_range ,
2018-10-30 02:41:21 +03:00
. remap_file_range = cifs_remap_file_range ,
2008-10-23 08:42:37 +04:00
. setlease = cifs_setlease ,
2014-08-17 17:38:47 +04:00
. fallocate = cifs_fallocate ,
2005-04-17 02:20:36 +04:00
} ;
2010-12-12 13:11:13 +03:00
const struct file_operations cifs_file_strict_ops = {
2014-04-03 03:53:36 +04:00
. read_iter = cifs_strict_readv ,
2014-04-03 20:05:17 +04:00
. write_iter = cifs_strict_writev ,
2010-12-12 13:11:13 +03:00
. open = cifs_open ,
. release = cifs_close ,
. lock = cifs_lock ,
2019-07-17 02:55:38 +03:00
. flock = cifs_flock ,
2010-12-12 13:11:13 +03:00
. fsync = cifs_strict_fsync ,
. flush = cifs_flush ,
2010-12-14 11:29:51 +03:00
. mmap = cifs_file_strict_mmap ,
2022-11-01 17:52:47 +03:00
. splice_read = cifs_splice_read ,
2017-12-28 16:23:08 +03:00
. splice_write = iter_file_splice_write ,
2010-12-12 13:11:13 +03:00
. llseek = cifs_llseek ,
. unlocked_ioctl = cifs_ioctl ,
2017-02-10 13:33:51 +03:00
. copy_file_range = cifs_copy_file_range ,
2018-10-30 02:41:21 +03:00
. remap_file_range = cifs_remap_file_range ,
2010-12-12 13:11:13 +03:00
. setlease = cifs_setlease ,
2014-08-17 17:38:47 +04:00
. fallocate = cifs_fallocate ,
2010-12-12 13:11:13 +03:00
} ;
2006-03-28 13:56:42 +04:00
const struct file_operations cifs_file_direct_ops = {
2018-11-01 01:13:11 +03:00
. read_iter = cifs_direct_readv ,
. write_iter = cifs_direct_writev ,
2005-04-17 02:20:36 +04:00
. open = cifs_open ,
. release = cifs_close ,
. lock = cifs_lock ,
2019-07-17 02:55:38 +03:00
. flock = cifs_flock ,
2005-04-17 02:20:36 +04:00
. fsync = cifs_fsync ,
. flush = cifs_flush ,
2009-12-07 08:44:46 +03:00
. mmap = cifs_file_mmap ,
2022-11-01 17:52:47 +03:00
. splice_read = direct_splice_read ,
2017-12-28 16:23:08 +03:00
. splice_write = iter_file_splice_write ,
2008-05-15 09:51:55 +04:00
. unlocked_ioctl = cifs_ioctl ,
2017-02-10 13:33:51 +03:00
. copy_file_range = cifs_copy_file_range ,
2018-10-30 02:41:21 +03:00
. remap_file_range = cifs_remap_file_range ,
2006-01-13 01:41:28 +03:00
. llseek = cifs_llseek ,
2008-10-23 08:42:37 +04:00
. setlease = cifs_setlease ,
2014-08-17 17:38:47 +04:00
. fallocate = cifs_fallocate ,
2005-04-17 02:20:36 +04:00
} ;
2010-12-12 13:11:13 +03:00
2006-03-28 13:56:42 +04:00
const struct file_operations cifs_file_nobrl_ops = {
2014-05-23 14:50:21 +04:00
. read_iter = cifs_loose_read_iter ,
2014-04-03 20:05:17 +04:00
. write_iter = cifs_file_write_iter ,
2005-11-18 04:03:00 +03:00
. open = cifs_open ,
. release = cifs_close ,
. fsync = cifs_fsync ,
. flush = cifs_flush ,
. mmap = cifs_file_mmap ,
2022-11-01 17:52:47 +03:00
. splice_read = cifs_splice_read ,
2017-12-28 16:23:08 +03:00
. splice_write = iter_file_splice_write ,
2006-01-13 01:41:28 +03:00
. llseek = cifs_llseek ,
2008-05-15 09:51:55 +04:00
. unlocked_ioctl = cifs_ioctl ,
2017-02-10 13:33:51 +03:00
. copy_file_range = cifs_copy_file_range ,
2018-10-30 02:41:21 +03:00
. remap_file_range = cifs_remap_file_range ,
2008-10-23 08:42:37 +04:00
. setlease = cifs_setlease ,
2014-08-17 17:38:47 +04:00
. fallocate = cifs_fallocate ,
2005-11-11 22:41:00 +03:00
} ;
2010-12-12 13:11:13 +03:00
const struct file_operations cifs_file_strict_nobrl_ops = {
2014-04-03 03:53:36 +04:00
. read_iter = cifs_strict_readv ,
2014-04-03 20:05:17 +04:00
. write_iter = cifs_strict_writev ,
2010-12-12 13:11:13 +03:00
. open = cifs_open ,
. release = cifs_close ,
. fsync = cifs_strict_fsync ,
. flush = cifs_flush ,
2010-12-14 11:29:51 +03:00
. mmap = cifs_file_strict_mmap ,
2022-11-01 17:52:47 +03:00
. splice_read = cifs_splice_read ,
2017-12-28 16:23:08 +03:00
. splice_write = iter_file_splice_write ,
2010-12-12 13:11:13 +03:00
. llseek = cifs_llseek ,
. unlocked_ioctl = cifs_ioctl ,
2017-02-10 13:33:51 +03:00
. copy_file_range = cifs_copy_file_range ,
2018-10-30 02:41:21 +03:00
. remap_file_range = cifs_remap_file_range ,
2010-12-12 13:11:13 +03:00
. setlease = cifs_setlease ,
2014-08-17 17:38:47 +04:00
. fallocate = cifs_fallocate ,
2010-12-12 13:11:13 +03:00
} ;
2006-03-28 13:56:42 +04:00
const struct file_operations cifs_file_direct_nobrl_ops = {
2018-11-01 01:13:11 +03:00
. read_iter = cifs_direct_readv ,
. write_iter = cifs_direct_writev ,
2005-11-18 04:03:00 +03:00
. open = cifs_open ,
. release = cifs_close ,
. fsync = cifs_fsync ,
. flush = cifs_flush ,
2010-03-27 05:00:49 +03:00
. mmap = cifs_file_mmap ,
2022-11-01 17:52:47 +03:00
. splice_read = direct_splice_read ,
2017-12-28 16:23:08 +03:00
. splice_write = iter_file_splice_write ,
2008-05-15 09:51:55 +04:00
. unlocked_ioctl = cifs_ioctl ,
2017-02-10 13:33:51 +03:00
. copy_file_range = cifs_copy_file_range ,
2018-10-30 02:41:21 +03:00
. remap_file_range = cifs_remap_file_range ,
2006-01-13 01:41:28 +03:00
. llseek = cifs_llseek ,
2008-10-23 08:42:37 +04:00
. setlease = cifs_setlease ,
2014-08-17 17:38:47 +04:00
. fallocate = cifs_fallocate ,
2005-11-11 22:41:00 +03:00
} ;
2005-04-17 02:20:36 +04:00
2006-03-28 13:56:42 +04:00
const struct file_operations cifs_dir_ops = {
2016-04-21 00:40:47 +03:00
. iterate_shared = cifs_readdir ,
2005-04-17 02:20:36 +04:00
. release = cifs_closedir ,
. read = generic_read_dir ,
2008-05-15 09:51:55 +04:00
. unlocked_ioctl = cifs_ioctl ,
2017-02-10 13:33:51 +03:00
. copy_file_range = cifs_copy_file_range ,
2018-10-30 02:41:21 +03:00
. remap_file_range = cifs_remap_file_range ,
2008-09-03 23:53:01 +04:00
. llseek = generic_file_llseek ,
2018-05-10 18:59:37 +03:00
. fsync = cifs_dir_fsync ,
2005-04-17 02:20:36 +04:00
} ;
static void
2008-07-26 06:45:34 +04:00
cifs_init_once ( void * inode )
2005-04-17 02:20:36 +04:00
{
struct cifsInodeInfo * cifsi = inode ;
netfs: Fix gcc-12 warning by embedding vfs inode in netfs_i_context
While randstruct was satisfied with using an open-coded "void *" offset
cast for the netfs_i_context <-> inode casting, __builtin_object_size() as
used by FORTIFY_SOURCE was not as easily fooled. This was causing the
following complaint[1] from gcc v12:
In file included from include/linux/string.h:253,
from include/linux/ceph/ceph_debug.h:7,
from fs/ceph/inode.c:2:
In function 'fortify_memset_chk',
inlined from 'netfs_i_context_init' at include/linux/netfs.h:326:2,
inlined from 'ceph_alloc_inode' at fs/ceph/inode.c:463:2:
include/linux/fortify-string.h:242:25: warning: call to '__write_overflow_field' declared with attribute warning: detected write beyond size of field (1st parameter); maybe use struct_group()? [-Wattribute-warning]
242 | __write_overflow_field(p_size_field, size);
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Fix this by embedding a struct inode into struct netfs_i_context (which
should perhaps be renamed to struct netfs_inode). The struct inode
vfs_inode fields are then removed from the 9p, afs, ceph and cifs inode
structs and vfs_inode is then simply changed to "netfs.inode" in those
filesystems.
Further, rename netfs_i_context to netfs_inode, get rid of the
netfs_inode() function that converted a netfs_i_context pointer to an
inode pointer (that can now be done with &ctx->inode) and rename the
netfs_i_context() function to netfs_inode() (which is now a wrapper
around container_of()).
Most of the changes were done with:
perl -p -i -e 's/vfs_inode/netfs.inode/'g \
`git grep -l 'vfs_inode' -- fs/{9p,afs,ceph,cifs}/*.[ch]`
Kees suggested doing it with a pair structure[2] and a special
declarator to insert that into the network filesystem's inode
wrapper[3], but I think it's cleaner to embed it - and then it doesn't
matter if struct randomisation reorders things.
Dave Chinner suggested using a filesystem-specific VFS_I() function in
each filesystem to convert that filesystem's own inode wrapper struct
into the VFS inode struct[4].
Version #2:
- Fix a couple of missed name changes due to a disabled cifs option.
- Rename nfs_i_context to nfs_inode
- Use "netfs" instead of "nic" as the member name in per-fs inode wrapper
structs.
[ This also undoes commit 507160f46c55 ("netfs: gcc-12: temporarily
disable '-Wattribute-warning' for now") that is no longer needed ]
Fixes: bc899ee1c898 ("netfs: Add a netfs inode context")
Reported-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Xiubo Li <xiubli@redhat.com>
cc: Jonathan Corbet <corbet@lwn.net>
cc: Eric Van Hensbergen <ericvh@gmail.com>
cc: Latchesar Ionkov <lucho@ionkov.net>
cc: Dominique Martinet <asmadeus@codewreck.org>
cc: Christian Schoenebeck <linux_oss@crudebyte.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: Ilya Dryomov <idryomov@gmail.com>
cc: Steve French <smfrench@gmail.com>
cc: William Kucharski <william.kucharski@oracle.com>
cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>
cc: Dave Chinner <david@fromorbit.com>
cc: linux-doc@vger.kernel.org
cc: v9fs-developer@lists.sourceforge.net
cc: linux-afs@lists.infradead.org
cc: ceph-devel@vger.kernel.org
cc: linux-cifs@vger.kernel.org
cc: samba-technical@lists.samba.org
cc: linux-fsdevel@vger.kernel.org
cc: linux-hardening@vger.kernel.org
Link: https://lore.kernel.org/r/d2ad3a3d7bdd794c6efb562d2f2b655fb67756b9.camel@kernel.org/ [1]
Link: https://lore.kernel.org/r/20220517210230.864239-1-keescook@chromium.org/ [2]
Link: https://lore.kernel.org/r/20220518202212.2322058-1-keescook@chromium.org/ [3]
Link: https://lore.kernel.org/r/20220524101205.GI2306852@dread.disaster.area/ [4]
Link: https://lore.kernel.org/r/165296786831.3591209.12111293034669289733.stgit@warthog.procyon.org.uk/ # v1
Link: https://lore.kernel.org/r/165305805651.4094995.7763502506786714216.stgit@warthog.procyon.org.uk # v2
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2022-06-09 23:46:04 +03:00
inode_init_once ( & cifsi - > netfs . inode ) ;
2012-09-19 17:22:44 +04:00
init_rwsem ( & cifsi - > lock_sem ) ;
2005-04-17 02:20:36 +04:00
}
2014-04-04 01:46:30 +04:00
static int __init
2005-04-17 02:20:36 +04:00
cifs_init_inodecache ( void )
{
cifs_inode_cachep = kmem_cache_create ( " cifs_inode_cache " ,
2007-08-31 02:09:15 +04:00
sizeof ( struct cifsInodeInfo ) ,
2006-03-24 14:16:06 +03:00
0 , ( SLAB_RECLAIM_ACCOUNT |
2016-01-15 02:18:21 +03:00
SLAB_MEM_SPREAD | SLAB_ACCOUNT ) ,
2007-07-20 05:11:58 +04:00
cifs_init_once ) ;
2005-04-17 02:20:36 +04:00
if ( cifs_inode_cachep = = NULL )
return - ENOMEM ;
return 0 ;
}
static void
cifs_destroy_inodecache ( void )
{
2012-09-26 05:33:07 +04:00
/*
* Make sure all delayed rcu free inodes are flushed before we
* destroy cache .
*/
rcu_barrier ( ) ;
2006-09-27 12:49:40 +04:00
kmem_cache_destroy ( cifs_inode_cachep ) ;
2005-04-17 02:20:36 +04:00
}
static int
cifs_init_request_bufs ( void )
{
2012-01-12 22:40:50 +04:00
/*
* SMB2 maximum header size is bigger than CIFS one - no problems to
* allocate some more bytes for CIFS .
*/
2017-07-09 02:48:15 +03:00
size_t max_hdr_size = MAX_SMB2_HDR_SIZE ;
2007-05-01 00:13:06 +04:00
if ( CIFSMaxBufSize < 8192 ) {
2005-04-17 02:20:36 +04:00
/* Buffer size can not be smaller than 2 * PATH_MAX since maximum
Unicode path name has to fit in any SMB / CIFS path based frames */
CIFSMaxBufSize = 8192 ;
} else if ( CIFSMaxBufSize > 1024 * 127 ) {
CIFSMaxBufSize = 1024 * 127 ;
} else {
CIFSMaxBufSize & = 0x1FE00 ; /* Round size to even 512 byte mult*/
}
2013-05-05 07:12:25 +04:00
/*
cifs_dbg ( VFS , " CIFSMaxBufSize %d 0x%x \n " ,
CIFSMaxBufSize , CIFSMaxBufSize ) ;
*/
2017-06-11 05:50:33 +03:00
cifs_req_cachep = kmem_cache_create_usercopy ( " cifs_request " ,
2012-01-12 22:40:50 +04:00
CIFSMaxBufSize + max_hdr_size , 0 ,
2017-06-11 05:50:33 +03:00
SLAB_HWCACHE_ALIGN , 0 ,
CIFSMaxBufSize + max_hdr_size ,
NULL ) ;
2005-04-17 02:20:36 +04:00
if ( cifs_req_cachep = = NULL )
return - ENOMEM ;
2007-05-01 00:13:06 +04:00
if ( cifs_min_rcv < 1 )
2005-04-17 02:20:36 +04:00
cifs_min_rcv = 1 ;
else if ( cifs_min_rcv > 64 ) {
cifs_min_rcv = 64 ;
2013-05-05 07:12:25 +04:00
cifs_dbg ( VFS , " cifs_min_rcv set to maximum (64) \n " ) ;
2005-04-17 02:20:36 +04:00
}
2006-03-26 13:37:50 +04:00
cifs_req_poolp = mempool_create_slab_pool ( cifs_min_rcv ,
cifs_req_cachep ) ;
2005-04-17 02:20:36 +04:00
2007-05-01 00:13:06 +04:00
if ( cifs_req_poolp = = NULL ) {
2005-04-17 02:20:36 +04:00
kmem_cache_destroy ( cifs_req_cachep ) ;
return - ENOMEM ;
}
2005-12-13 07:53:18 +03:00
/* MAX_CIFS_SMALL_BUFFER_SIZE bytes is enough for most SMB responses and
2005-04-17 02:20:36 +04:00
almost all handle based requests ( but not write response , nor is it
sufficient for path based requests ) . A smaller size would have
2007-07-13 04:33:32 +04:00
been more efficient ( compacting multiple slab items on one 4 k page )
2005-04-17 02:20:36 +04:00
for the case in which debug was on , but this larger size allows
more SMBs to use small buffer alloc and is still much more
2007-07-07 03:13:06 +04:00
efficient to alloc 1 per page off the slab compared to 17 K ( 5 page )
2005-04-17 02:20:36 +04:00
alloc of large cifs buffers even when page debugging is on */
2017-06-11 05:50:33 +03:00
cifs_sm_req_cachep = kmem_cache_create_usercopy ( " cifs_small_rq " ,
2007-07-07 03:13:06 +04:00
MAX_CIFS_SMALL_BUFFER_SIZE , 0 , SLAB_HWCACHE_ALIGN ,
2017-06-11 05:50:33 +03:00
0 , MAX_CIFS_SMALL_BUFFER_SIZE , NULL ) ;
2005-04-17 02:20:36 +04:00
if ( cifs_sm_req_cachep = = NULL ) {
mempool_destroy ( cifs_req_poolp ) ;
kmem_cache_destroy ( cifs_req_cachep ) ;
2007-07-07 03:13:06 +04:00
return - ENOMEM ;
2005-04-17 02:20:36 +04:00
}
2007-05-01 00:13:06 +04:00
if ( cifs_min_small < 2 )
2005-04-17 02:20:36 +04:00
cifs_min_small = 2 ;
else if ( cifs_min_small > 256 ) {
cifs_min_small = 256 ;
2013-05-05 07:12:25 +04:00
cifs_dbg ( FYI , " cifs_min_small set to maximum (256) \n " ) ;
2005-04-17 02:20:36 +04:00
}
2006-03-26 13:37:50 +04:00
cifs_sm_req_poolp = mempool_create_slab_pool ( cifs_min_small ,
cifs_sm_req_cachep ) ;
2005-04-17 02:20:36 +04:00
2007-05-01 00:13:06 +04:00
if ( cifs_sm_req_poolp = = NULL ) {
2005-04-17 02:20:36 +04:00
mempool_destroy ( cifs_req_poolp ) ;
kmem_cache_destroy ( cifs_req_cachep ) ;
kmem_cache_destroy ( cifs_sm_req_cachep ) ;
return - ENOMEM ;
}
return 0 ;
}
static void
cifs_destroy_request_bufs ( void )
{
mempool_destroy ( cifs_req_poolp ) ;
2006-09-27 12:49:40 +04:00
kmem_cache_destroy ( cifs_req_cachep ) ;
2005-04-17 02:20:36 +04:00
mempool_destroy ( cifs_sm_req_poolp ) ;
2006-09-27 12:49:40 +04:00
kmem_cache_destroy ( cifs_sm_req_cachep ) ;
2005-04-17 02:20:36 +04:00
}
2022-08-05 17:47:39 +03:00
static int init_mids ( void )
2005-04-17 02:20:36 +04:00
{
cifs_mid_cachep = kmem_cache_create ( " cifs_mpx_ids " ,
2007-08-31 02:09:15 +04:00
sizeof ( struct mid_q_entry ) , 0 ,
SLAB_HWCACHE_ALIGN , NULL ) ;
2005-04-17 02:20:36 +04:00
if ( cifs_mid_cachep = = NULL )
return - ENOMEM ;
2006-03-26 13:37:50 +04:00
/* 3 is a reasonable minimum number of simultaneous operations */
cifs_mid_poolp = mempool_create_slab_pool ( 3 , cifs_mid_cachep ) ;
2007-05-01 00:13:06 +04:00
if ( cifs_mid_poolp = = NULL ) {
2005-04-17 02:20:36 +04:00
kmem_cache_destroy ( cifs_mid_cachep ) ;
return - ENOMEM ;
}
return 0 ;
}
2022-08-05 17:47:39 +03:00
static void destroy_mids ( void )
2005-04-17 02:20:36 +04:00
{
mempool_destroy ( cifs_mid_poolp ) ;
2006-09-27 12:49:40 +04:00
kmem_cache_destroy ( cifs_mid_cachep ) ;
2005-04-17 02:20:36 +04:00
}
static int __init
init_cifs ( void )
{
int rc = 0 ;
cifs_proc_init ( ) ;
2008-11-14 21:44:38 +03:00
INIT_LIST_HEAD ( & cifs_tcp_ses_list ) ;
2005-04-17 02:20:36 +04:00
/*
* Initialize Global counters
*/
atomic_set ( & sesInfoAllocCount , 0 ) ;
atomic_set ( & tconInfoAllocCount , 0 ) ;
2021-02-04 10:20:46 +03:00
atomic_set ( & tcpSesNextId , 0 ) ;
2007-07-07 03:13:06 +04:00
atomic_set ( & tcpSesAllocCount , 0 ) ;
2005-04-17 02:20:36 +04:00
atomic_set ( & tcpSesReconnectCount , 0 ) ;
atomic_set ( & tconInfoReconnectCount , 0 ) ;
2022-07-16 07:45:45 +03:00
atomic_set ( & buf_alloc_count , 0 ) ;
atomic_set ( & small_buf_alloc_count , 0 ) ;
2005-12-04 00:58:57 +03:00
# ifdef CONFIG_CIFS_STATS2
2022-07-16 07:45:45 +03:00
atomic_set ( & total_buf_alloc_count , 0 ) ;
atomic_set ( & total_small_buf_alloc_count , 0 ) ;
2018-09-18 22:05:18 +03:00
if ( slow_rsp_threshold < 1 )
cifs_dbg ( FYI , " slow_response_threshold msgs disabled \n " ) ;
else if ( slow_rsp_threshold > 32767 )
cifs_dbg ( VFS ,
" slow response threshold set higher than recommended (0 to 32767) \n " ) ;
2005-12-04 00:58:57 +03:00
# endif /* CONFIG_CIFS_STATS2 */
2022-07-16 07:45:45 +03:00
atomic_set ( & mid_count , 0 ) ;
2005-04-17 02:20:36 +04:00
GlobalCurrentXid = 0 ;
GlobalTotalActiveXid = 0 ;
GlobalMaxActiveXid = 0 ;
2010-10-18 21:59:37 +04:00
spin_lock_init ( & cifs_tcp_ses_lock ) ;
2005-04-17 02:20:36 +04:00
spin_lock_init ( & GlobalMid_Lock ) ;
2017-06-08 03:42:50 +03:00
cifs_lock_secret = get_random_u32 ( ) ;
2016-05-24 13:27:44 +03:00
2007-05-01 00:13:06 +04:00
if ( cifs_max_pending < 2 ) {
2005-04-17 02:20:36 +04:00
cifs_max_pending = 2 ;
2013-05-05 07:12:25 +04:00
cifs_dbg ( FYI , " cifs_max_pending set to min of 2 \n " ) ;
2012-03-20 13:55:09 +04:00
} else if ( cifs_max_pending > CIFS_MAX_REQ ) {
cifs_max_pending = CIFS_MAX_REQ ;
2013-05-05 07:12:25 +04:00
cifs_dbg ( FYI , " cifs_max_pending set to max of %u \n " ,
CIFS_MAX_REQ ) ;
2005-04-17 02:20:36 +04:00
}
2012-03-23 22:40:53 +04:00
cifsiod_wq = alloc_workqueue ( " cifsiod " , WQ_FREEZABLE | WQ_MEM_RECLAIM , 0 ) ;
if ( ! cifsiod_wq ) {
rc = - ENOMEM ;
goto out_clean_proc ;
}
2019-09-07 09:09:49 +03:00
/*
2019-09-09 21:30:15 +03:00
* Consider in future setting limit ! = 0 maybe to min ( num_of_cores - 1 , 3 )
* so that we don ' t launch too many worker threads but
2019-09-24 16:01:28 +03:00
* Documentation / core - api / workqueue . rst recommends setting it to 0
2019-09-07 09:09:49 +03:00
*/
2019-09-09 21:30:15 +03:00
/* WQ_UNBOUND allows decrypt tasks to run on any CPU */
2019-09-07 09:09:49 +03:00
decrypt_wq = alloc_workqueue ( " smb3decryptd " ,
2019-09-09 21:30:15 +03:00
WQ_UNBOUND | WQ_FREEZABLE | WQ_MEM_RECLAIM , 0 ) ;
2019-09-07 09:09:49 +03:00
if ( ! decrypt_wq ) {
rc = - ENOMEM ;
goto out_destroy_cifsiod_wq ;
}
cifs: move cifsFileInfo_put logic into a work-queue
This patch moves the final part of the cifsFileInfo_put() logic where we
need a write lock on lock_sem to be processed in a separate thread that
holds no other locks.
This is to prevent deadlocks like the one below:
> there are 6 processes looping to while trying to down_write
> cinode->lock_sem, 5 of them from _cifsFileInfo_put, and one from
> cifs_new_fileinfo
>
> and there are 5 other processes which are blocked, several of them
> waiting on either PG_writeback or PG_locked (which are both set), all
> for the same page of the file
>
> 2 inode_lock() (inode->i_rwsem) for the file
> 1 wait_on_page_writeback() for the page
> 1 down_read(inode->i_rwsem) for the inode of the directory
> 1 inode_lock()(inode->i_rwsem) for the inode of the directory
> 1 __lock_page
>
>
> so processes are blocked waiting on:
> page flags PG_locked and PG_writeback for one specific page
> inode->i_rwsem for the directory
> inode->i_rwsem for the file
> cifsInodeInflock_sem
>
>
>
> here are the more gory details (let me know if I need to provide
> anything more/better):
>
> [0 00:48:22.765] [UN] PID: 8863 TASK: ffff8c691547c5c0 CPU: 3
> COMMAND: "reopen_file"
> #0 [ffff9965007e3ba8] __schedule at ffffffff9b6e6095
> #1 [ffff9965007e3c38] schedule at ffffffff9b6e64df
> #2 [ffff9965007e3c48] rwsem_down_write_slowpath at ffffffff9af283d7
> #3 [ffff9965007e3cb8] legitimize_path at ffffffff9b0f975d
> #4 [ffff9965007e3d08] path_openat at ffffffff9b0fe55d
> #5 [ffff9965007e3dd8] do_filp_open at ffffffff9b100a33
> #6 [ffff9965007e3ee0] do_sys_open at ffffffff9b0eb2d6
> #7 [ffff9965007e3f38] do_syscall_64 at ffffffff9ae04315
> * (I think legitimize_path is bogus)
>
> in path_openat
> } else {
> const char *s = path_init(nd, flags);
> while (!(error = link_path_walk(s, nd)) &&
> (error = do_last(nd, file, op)) > 0) { <<<<
>
> do_last:
> if (open_flag & O_CREAT)
> inode_lock(dir->d_inode); <<<<
> else
> so it's trying to take inode->i_rwsem for the directory
>
> DENTRY INODE SUPERBLK TYPE PATH
> ffff8c68bb8e79c0 ffff8c691158ef20 ffff8c6915bf9000 DIR /mnt/vm1_smb/
> inode.i_rwsem is ffff8c691158efc0
>
> <struct rw_semaphore 0xffff8c691158efc0>:
> owner: <struct task_struct 0xffff8c6914275d00> (UN - 8856 -
> reopen_file), counter: 0x0000000000000003
> waitlist: 2
> 0xffff9965007e3c90 8863 reopen_file UN 0 1:29:22.926
> RWSEM_WAITING_FOR_WRITE
> 0xffff996500393e00 9802 ls UN 0 1:17:26.700
> RWSEM_WAITING_FOR_READ
>
>
> the owner of the inode.i_rwsem of the directory is:
>
> [0 00:00:00.109] [UN] PID: 8856 TASK: ffff8c6914275d00 CPU: 3
> COMMAND: "reopen_file"
> #0 [ffff99650065b828] __schedule at ffffffff9b6e6095
> #1 [ffff99650065b8b8] schedule at ffffffff9b6e64df
> #2 [ffff99650065b8c8] schedule_timeout at ffffffff9b6e9f89
> #3 [ffff99650065b940] msleep at ffffffff9af573a9
> #4 [ffff99650065b948] _cifsFileInfo_put.cold.63 at ffffffffc0a42dd6 [cifs]
> #5 [ffff99650065ba38] cifs_writepage_locked at ffffffffc0a0b8f3 [cifs]
> #6 [ffff99650065bab0] cifs_launder_page at ffffffffc0a0bb72 [cifs]
> #7 [ffff99650065bb30] invalidate_inode_pages2_range at ffffffff9b04d4bd
> #8 [ffff99650065bcb8] cifs_invalidate_mapping at ffffffffc0a11339 [cifs]
> #9 [ffff99650065bcd0] cifs_revalidate_mapping at ffffffffc0a1139a [cifs]
> #10 [ffff99650065bcf0] cifs_d_revalidate at ffffffffc0a014f6 [cifs]
> #11 [ffff99650065bd08] path_openat at ffffffff9b0fe7f7
> #12 [ffff99650065bdd8] do_filp_open at ffffffff9b100a33
> #13 [ffff99650065bee0] do_sys_open at ffffffff9b0eb2d6
> #14 [ffff99650065bf38] do_syscall_64 at ffffffff9ae04315
>
> cifs_launder_page is for page 0xffffd1e2c07d2480
>
> crash> page.index,mapping,flags 0xffffd1e2c07d2480
> index = 0x8
> mapping = 0xffff8c68f3cd0db0
> flags = 0xfffffc0008095
>
> PAGE-FLAG BIT VALUE
> PG_locked 0 0000001
> PG_uptodate 2 0000004
> PG_lru 4 0000010
> PG_waiters 7 0000080
> PG_writeback 15 0008000
>
>
> inode is ffff8c68f3cd0c40
> inode.i_rwsem is ffff8c68f3cd0ce0
> DENTRY INODE SUPERBLK TYPE PATH
> ffff8c68a1f1b480 ffff8c68f3cd0c40 ffff8c6915bf9000 REG
> /mnt/vm1_smb/testfile.8853
>
>
> this process holds the inode->i_rwsem for the parent directory, is
> laundering a page attached to the inode of the file it's opening, and in
> _cifsFileInfo_put is trying to down_write the cifsInodeInflock_sem
> for the file itself.
>
>
> <struct rw_semaphore 0xffff8c68f3cd0ce0>:
> owner: <struct task_struct 0xffff8c6914272e80> (UN - 8854 -
> reopen_file), counter: 0x0000000000000003
> waitlist: 1
> 0xffff9965005dfd80 8855 reopen_file UN 0 1:29:22.912
> RWSEM_WAITING_FOR_WRITE
>
> this is the inode.i_rwsem for the file
>
> the owner:
>
> [0 00:48:22.739] [UN] PID: 8854 TASK: ffff8c6914272e80 CPU: 2
> COMMAND: "reopen_file"
> #0 [ffff99650054fb38] __schedule at ffffffff9b6e6095
> #1 [ffff99650054fbc8] schedule at ffffffff9b6e64df
> #2 [ffff99650054fbd8] io_schedule at ffffffff9b6e68e2
> #3 [ffff99650054fbe8] __lock_page at ffffffff9b03c56f
> #4 [ffff99650054fc80] pagecache_get_page at ffffffff9b03dcdf
> #5 [ffff99650054fcc0] grab_cache_page_write_begin at ffffffff9b03ef4c
> #6 [ffff99650054fcd0] cifs_write_begin at ffffffffc0a064ec [cifs]
> #7 [ffff99650054fd30] generic_perform_write at ffffffff9b03bba4
> #8 [ffff99650054fda8] __generic_file_write_iter at ffffffff9b04060a
> #9 [ffff99650054fdf0] cifs_strict_writev.cold.70 at ffffffffc0a4469b [cifs]
> #10 [ffff99650054fe48] new_sync_write at ffffffff9b0ec1dd
> #11 [ffff99650054fed0] vfs_write at ffffffff9b0eed35
> #12 [ffff99650054ff00] ksys_write at ffffffff9b0eefd9
> #13 [ffff99650054ff38] do_syscall_64 at ffffffff9ae04315
>
> the process holds the inode->i_rwsem for the file to which it's writing,
> and is trying to __lock_page for the same page as in the other processes
>
>
> the other tasks:
> [0 00:00:00.028] [UN] PID: 8859 TASK: ffff8c6915479740 CPU: 2
> COMMAND: "reopen_file"
> #0 [ffff9965007b39d8] __schedule at ffffffff9b6e6095
> #1 [ffff9965007b3a68] schedule at ffffffff9b6e64df
> #2 [ffff9965007b3a78] schedule_timeout at ffffffff9b6e9f89
> #3 [ffff9965007b3af0] msleep at ffffffff9af573a9
> #4 [ffff9965007b3af8] cifs_new_fileinfo.cold.61 at ffffffffc0a42a07 [cifs]
> #5 [ffff9965007b3b78] cifs_open at ffffffffc0a0709d [cifs]
> #6 [ffff9965007b3cd8] do_dentry_open at ffffffff9b0e9b7a
> #7 [ffff9965007b3d08] path_openat at ffffffff9b0fe34f
> #8 [ffff9965007b3dd8] do_filp_open at ffffffff9b100a33
> #9 [ffff9965007b3ee0] do_sys_open at ffffffff9b0eb2d6
> #10 [ffff9965007b3f38] do_syscall_64 at ffffffff9ae04315
>
> this is opening the file, and is trying to down_write cinode->lock_sem
>
>
> [0 00:00:00.041] [UN] PID: 8860 TASK: ffff8c691547ae80 CPU: 2
> COMMAND: "reopen_file"
> [0 00:00:00.057] [UN] PID: 8861 TASK: ffff8c6915478000 CPU: 3
> COMMAND: "reopen_file"
> [0 00:00:00.059] [UN] PID: 8858 TASK: ffff8c6914271740 CPU: 2
> COMMAND: "reopen_file"
> [0 00:00:00.109] [UN] PID: 8862 TASK: ffff8c691547dd00 CPU: 6
> COMMAND: "reopen_file"
> #0 [ffff9965007c3c78] __schedule at ffffffff9b6e6095
> #1 [ffff9965007c3d08] schedule at ffffffff9b6e64df
> #2 [ffff9965007c3d18] schedule_timeout at ffffffff9b6e9f89
> #3 [ffff9965007c3d90] msleep at ffffffff9af573a9
> #4 [ffff9965007c3d98] _cifsFileInfo_put.cold.63 at ffffffffc0a42dd6 [cifs]
> #5 [ffff9965007c3e88] cifs_close at ffffffffc0a07aaf [cifs]
> #6 [ffff9965007c3ea0] __fput at ffffffff9b0efa6e
> #7 [ffff9965007c3ee8] task_work_run at ffffffff9aef1614
> #8 [ffff9965007c3f20] exit_to_usermode_loop at ffffffff9ae03d6f
> #9 [ffff9965007c3f38] do_syscall_64 at ffffffff9ae0444c
>
> closing the file, and trying to down_write cifsi->lock_sem
>
>
> [0 00:48:22.839] [UN] PID: 8857 TASK: ffff8c6914270000 CPU: 7
> COMMAND: "reopen_file"
> #0 [ffff9965006a7cc8] __schedule at ffffffff9b6e6095
> #1 [ffff9965006a7d58] schedule at ffffffff9b6e64df
> #2 [ffff9965006a7d68] io_schedule at ffffffff9b6e68e2
> #3 [ffff9965006a7d78] wait_on_page_bit at ffffffff9b03cac6
> #4 [ffff9965006a7e10] __filemap_fdatawait_range at ffffffff9b03b028
> #5 [ffff9965006a7ed8] filemap_write_and_wait at ffffffff9b040165
> #6 [ffff9965006a7ef0] cifs_flush at ffffffffc0a0c2fa [cifs]
> #7 [ffff9965006a7f10] filp_close at ffffffff9b0e93f1
> #8 [ffff9965006a7f30] __x64_sys_close at ffffffff9b0e9a0e
> #9 [ffff9965006a7f38] do_syscall_64 at ffffffff9ae04315
>
> in __filemap_fdatawait_range
> wait_on_page_writeback(page);
> for the same page of the file
>
>
>
> [0 00:48:22.718] [UN] PID: 8855 TASK: ffff8c69142745c0 CPU: 7
> COMMAND: "reopen_file"
> #0 [ffff9965005dfc98] __schedule at ffffffff9b6e6095
> #1 [ffff9965005dfd28] schedule at ffffffff9b6e64df
> #2 [ffff9965005dfd38] rwsem_down_write_slowpath at ffffffff9af283d7
> #3 [ffff9965005dfdf0] cifs_strict_writev at ffffffffc0a0c40a [cifs]
> #4 [ffff9965005dfe48] new_sync_write at ffffffff9b0ec1dd
> #5 [ffff9965005dfed0] vfs_write at ffffffff9b0eed35
> #6 [ffff9965005dff00] ksys_write at ffffffff9b0eefd9
> #7 [ffff9965005dff38] do_syscall_64 at ffffffff9ae04315
>
> inode_lock(inode);
>
>
> and one 'ls' later on, to see whether the rest of the mount is available
> (the test file is in the root, so we get blocked up on the directory
> ->i_rwsem), so the entire mount is unavailable
>
> [0 00:36:26.473] [UN] PID: 9802 TASK: ffff8c691436ae80 CPU: 4
> COMMAND: "ls"
> #0 [ffff996500393d28] __schedule at ffffffff9b6e6095
> #1 [ffff996500393db8] schedule at ffffffff9b6e64df
> #2 [ffff996500393dc8] rwsem_down_read_slowpath at ffffffff9b6e9421
> #3 [ffff996500393e78] down_read_killable at ffffffff9b6e95e2
> #4 [ffff996500393e88] iterate_dir at ffffffff9b103c56
> #5 [ffff996500393ec8] ksys_getdents64 at ffffffff9b104b0c
> #6 [ffff996500393f30] __x64_sys_getdents64 at ffffffff9b104bb6
> #7 [ffff996500393f38] do_syscall_64 at ffffffff9ae04315
>
> in iterate_dir:
> if (shared)
> res = down_read_killable(&inode->i_rwsem); <<<<
> else
> res = down_write_killable(&inode->i_rwsem);
>
Reported-by: Frank Sorenson <sorenson@redhat.com>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
Signed-off-by: Ronnie Sahlberg <lsahlber@redhat.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
2019-11-03 06:06:37 +03:00
fileinfo_put_wq = alloc_workqueue ( " cifsfileinfoput " ,
WQ_UNBOUND | WQ_FREEZABLE | WQ_MEM_RECLAIM , 0 ) ;
if ( ! fileinfo_put_wq ) {
rc = - ENOMEM ;
goto out_destroy_decrypt_wq ;
}
2017-05-03 18:54:01 +03:00
cifsoplockd_wq = alloc_workqueue ( " cifsoplockd " ,
WQ_FREEZABLE | WQ_MEM_RECLAIM , 0 ) ;
if ( ! cifsoplockd_wq ) {
rc = - ENOMEM ;
cifs: move cifsFileInfo_put logic into a work-queue
This patch moves the final part of the cifsFileInfo_put() logic where we
need a write lock on lock_sem to be processed in a separate thread that
holds no other locks.
This is to prevent deadlocks like the one below:
> there are 6 processes looping to while trying to down_write
> cinode->lock_sem, 5 of them from _cifsFileInfo_put, and one from
> cifs_new_fileinfo
>
> and there are 5 other processes which are blocked, several of them
> waiting on either PG_writeback or PG_locked (which are both set), all
> for the same page of the file
>
> 2 inode_lock() (inode->i_rwsem) for the file
> 1 wait_on_page_writeback() for the page
> 1 down_read(inode->i_rwsem) for the inode of the directory
> 1 inode_lock()(inode->i_rwsem) for the inode of the directory
> 1 __lock_page
>
>
> so processes are blocked waiting on:
> page flags PG_locked and PG_writeback for one specific page
> inode->i_rwsem for the directory
> inode->i_rwsem for the file
> cifsInodeInflock_sem
>
>
>
> here are the more gory details (let me know if I need to provide
> anything more/better):
>
> [0 00:48:22.765] [UN] PID: 8863 TASK: ffff8c691547c5c0 CPU: 3
> COMMAND: "reopen_file"
> #0 [ffff9965007e3ba8] __schedule at ffffffff9b6e6095
> #1 [ffff9965007e3c38] schedule at ffffffff9b6e64df
> #2 [ffff9965007e3c48] rwsem_down_write_slowpath at ffffffff9af283d7
> #3 [ffff9965007e3cb8] legitimize_path at ffffffff9b0f975d
> #4 [ffff9965007e3d08] path_openat at ffffffff9b0fe55d
> #5 [ffff9965007e3dd8] do_filp_open at ffffffff9b100a33
> #6 [ffff9965007e3ee0] do_sys_open at ffffffff9b0eb2d6
> #7 [ffff9965007e3f38] do_syscall_64 at ffffffff9ae04315
> * (I think legitimize_path is bogus)
>
> in path_openat
> } else {
> const char *s = path_init(nd, flags);
> while (!(error = link_path_walk(s, nd)) &&
> (error = do_last(nd, file, op)) > 0) { <<<<
>
> do_last:
> if (open_flag & O_CREAT)
> inode_lock(dir->d_inode); <<<<
> else
> so it's trying to take inode->i_rwsem for the directory
>
> DENTRY INODE SUPERBLK TYPE PATH
> ffff8c68bb8e79c0 ffff8c691158ef20 ffff8c6915bf9000 DIR /mnt/vm1_smb/
> inode.i_rwsem is ffff8c691158efc0
>
> <struct rw_semaphore 0xffff8c691158efc0>:
> owner: <struct task_struct 0xffff8c6914275d00> (UN - 8856 -
> reopen_file), counter: 0x0000000000000003
> waitlist: 2
> 0xffff9965007e3c90 8863 reopen_file UN 0 1:29:22.926
> RWSEM_WAITING_FOR_WRITE
> 0xffff996500393e00 9802 ls UN 0 1:17:26.700
> RWSEM_WAITING_FOR_READ
>
>
> the owner of the inode.i_rwsem of the directory is:
>
> [0 00:00:00.109] [UN] PID: 8856 TASK: ffff8c6914275d00 CPU: 3
> COMMAND: "reopen_file"
> #0 [ffff99650065b828] __schedule at ffffffff9b6e6095
> #1 [ffff99650065b8b8] schedule at ffffffff9b6e64df
> #2 [ffff99650065b8c8] schedule_timeout at ffffffff9b6e9f89
> #3 [ffff99650065b940] msleep at ffffffff9af573a9
> #4 [ffff99650065b948] _cifsFileInfo_put.cold.63 at ffffffffc0a42dd6 [cifs]
> #5 [ffff99650065ba38] cifs_writepage_locked at ffffffffc0a0b8f3 [cifs]
> #6 [ffff99650065bab0] cifs_launder_page at ffffffffc0a0bb72 [cifs]
> #7 [ffff99650065bb30] invalidate_inode_pages2_range at ffffffff9b04d4bd
> #8 [ffff99650065bcb8] cifs_invalidate_mapping at ffffffffc0a11339 [cifs]
> #9 [ffff99650065bcd0] cifs_revalidate_mapping at ffffffffc0a1139a [cifs]
> #10 [ffff99650065bcf0] cifs_d_revalidate at ffffffffc0a014f6 [cifs]
> #11 [ffff99650065bd08] path_openat at ffffffff9b0fe7f7
> #12 [ffff99650065bdd8] do_filp_open at ffffffff9b100a33
> #13 [ffff99650065bee0] do_sys_open at ffffffff9b0eb2d6
> #14 [ffff99650065bf38] do_syscall_64 at ffffffff9ae04315
>
> cifs_launder_page is for page 0xffffd1e2c07d2480
>
> crash> page.index,mapping,flags 0xffffd1e2c07d2480
> index = 0x8
> mapping = 0xffff8c68f3cd0db0
> flags = 0xfffffc0008095
>
> PAGE-FLAG BIT VALUE
> PG_locked 0 0000001
> PG_uptodate 2 0000004
> PG_lru 4 0000010
> PG_waiters 7 0000080
> PG_writeback 15 0008000
>
>
> inode is ffff8c68f3cd0c40
> inode.i_rwsem is ffff8c68f3cd0ce0
> DENTRY INODE SUPERBLK TYPE PATH
> ffff8c68a1f1b480 ffff8c68f3cd0c40 ffff8c6915bf9000 REG
> /mnt/vm1_smb/testfile.8853
>
>
> this process holds the inode->i_rwsem for the parent directory, is
> laundering a page attached to the inode of the file it's opening, and in
> _cifsFileInfo_put is trying to down_write the cifsInodeInflock_sem
> for the file itself.
>
>
> <struct rw_semaphore 0xffff8c68f3cd0ce0>:
> owner: <struct task_struct 0xffff8c6914272e80> (UN - 8854 -
> reopen_file), counter: 0x0000000000000003
> waitlist: 1
> 0xffff9965005dfd80 8855 reopen_file UN 0 1:29:22.912
> RWSEM_WAITING_FOR_WRITE
>
> this is the inode.i_rwsem for the file
>
> the owner:
>
> [0 00:48:22.739] [UN] PID: 8854 TASK: ffff8c6914272e80 CPU: 2
> COMMAND: "reopen_file"
> #0 [ffff99650054fb38] __schedule at ffffffff9b6e6095
> #1 [ffff99650054fbc8] schedule at ffffffff9b6e64df
> #2 [ffff99650054fbd8] io_schedule at ffffffff9b6e68e2
> #3 [ffff99650054fbe8] __lock_page at ffffffff9b03c56f
> #4 [ffff99650054fc80] pagecache_get_page at ffffffff9b03dcdf
> #5 [ffff99650054fcc0] grab_cache_page_write_begin at ffffffff9b03ef4c
> #6 [ffff99650054fcd0] cifs_write_begin at ffffffffc0a064ec [cifs]
> #7 [ffff99650054fd30] generic_perform_write at ffffffff9b03bba4
> #8 [ffff99650054fda8] __generic_file_write_iter at ffffffff9b04060a
> #9 [ffff99650054fdf0] cifs_strict_writev.cold.70 at ffffffffc0a4469b [cifs]
> #10 [ffff99650054fe48] new_sync_write at ffffffff9b0ec1dd
> #11 [ffff99650054fed0] vfs_write at ffffffff9b0eed35
> #12 [ffff99650054ff00] ksys_write at ffffffff9b0eefd9
> #13 [ffff99650054ff38] do_syscall_64 at ffffffff9ae04315
>
> the process holds the inode->i_rwsem for the file to which it's writing,
> and is trying to __lock_page for the same page as in the other processes
>
>
> the other tasks:
> [0 00:00:00.028] [UN] PID: 8859 TASK: ffff8c6915479740 CPU: 2
> COMMAND: "reopen_file"
> #0 [ffff9965007b39d8] __schedule at ffffffff9b6e6095
> #1 [ffff9965007b3a68] schedule at ffffffff9b6e64df
> #2 [ffff9965007b3a78] schedule_timeout at ffffffff9b6e9f89
> #3 [ffff9965007b3af0] msleep at ffffffff9af573a9
> #4 [ffff9965007b3af8] cifs_new_fileinfo.cold.61 at ffffffffc0a42a07 [cifs]
> #5 [ffff9965007b3b78] cifs_open at ffffffffc0a0709d [cifs]
> #6 [ffff9965007b3cd8] do_dentry_open at ffffffff9b0e9b7a
> #7 [ffff9965007b3d08] path_openat at ffffffff9b0fe34f
> #8 [ffff9965007b3dd8] do_filp_open at ffffffff9b100a33
> #9 [ffff9965007b3ee0] do_sys_open at ffffffff9b0eb2d6
> #10 [ffff9965007b3f38] do_syscall_64 at ffffffff9ae04315
>
> this is opening the file, and is trying to down_write cinode->lock_sem
>
>
> [0 00:00:00.041] [UN] PID: 8860 TASK: ffff8c691547ae80 CPU: 2
> COMMAND: "reopen_file"
> [0 00:00:00.057] [UN] PID: 8861 TASK: ffff8c6915478000 CPU: 3
> COMMAND: "reopen_file"
> [0 00:00:00.059] [UN] PID: 8858 TASK: ffff8c6914271740 CPU: 2
> COMMAND: "reopen_file"
> [0 00:00:00.109] [UN] PID: 8862 TASK: ffff8c691547dd00 CPU: 6
> COMMAND: "reopen_file"
> #0 [ffff9965007c3c78] __schedule at ffffffff9b6e6095
> #1 [ffff9965007c3d08] schedule at ffffffff9b6e64df
> #2 [ffff9965007c3d18] schedule_timeout at ffffffff9b6e9f89
> #3 [ffff9965007c3d90] msleep at ffffffff9af573a9
> #4 [ffff9965007c3d98] _cifsFileInfo_put.cold.63 at ffffffffc0a42dd6 [cifs]
> #5 [ffff9965007c3e88] cifs_close at ffffffffc0a07aaf [cifs]
> #6 [ffff9965007c3ea0] __fput at ffffffff9b0efa6e
> #7 [ffff9965007c3ee8] task_work_run at ffffffff9aef1614
> #8 [ffff9965007c3f20] exit_to_usermode_loop at ffffffff9ae03d6f
> #9 [ffff9965007c3f38] do_syscall_64 at ffffffff9ae0444c
>
> closing the file, and trying to down_write cifsi->lock_sem
>
>
> [0 00:48:22.839] [UN] PID: 8857 TASK: ffff8c6914270000 CPU: 7
> COMMAND: "reopen_file"
> #0 [ffff9965006a7cc8] __schedule at ffffffff9b6e6095
> #1 [ffff9965006a7d58] schedule at ffffffff9b6e64df
> #2 [ffff9965006a7d68] io_schedule at ffffffff9b6e68e2
> #3 [ffff9965006a7d78] wait_on_page_bit at ffffffff9b03cac6
> #4 [ffff9965006a7e10] __filemap_fdatawait_range at ffffffff9b03b028
> #5 [ffff9965006a7ed8] filemap_write_and_wait at ffffffff9b040165
> #6 [ffff9965006a7ef0] cifs_flush at ffffffffc0a0c2fa [cifs]
> #7 [ffff9965006a7f10] filp_close at ffffffff9b0e93f1
> #8 [ffff9965006a7f30] __x64_sys_close at ffffffff9b0e9a0e
> #9 [ffff9965006a7f38] do_syscall_64 at ffffffff9ae04315
>
> in __filemap_fdatawait_range
> wait_on_page_writeback(page);
> for the same page of the file
>
>
>
> [0 00:48:22.718] [UN] PID: 8855 TASK: ffff8c69142745c0 CPU: 7
> COMMAND: "reopen_file"
> #0 [ffff9965005dfc98] __schedule at ffffffff9b6e6095
> #1 [ffff9965005dfd28] schedule at ffffffff9b6e64df
> #2 [ffff9965005dfd38] rwsem_down_write_slowpath at ffffffff9af283d7
> #3 [ffff9965005dfdf0] cifs_strict_writev at ffffffffc0a0c40a [cifs]
> #4 [ffff9965005dfe48] new_sync_write at ffffffff9b0ec1dd
> #5 [ffff9965005dfed0] vfs_write at ffffffff9b0eed35
> #6 [ffff9965005dff00] ksys_write at ffffffff9b0eefd9
> #7 [ffff9965005dff38] do_syscall_64 at ffffffff9ae04315
>
> inode_lock(inode);
>
>
> and one 'ls' later on, to see whether the rest of the mount is available
> (the test file is in the root, so we get blocked up on the directory
> ->i_rwsem), so the entire mount is unavailable
>
> [0 00:36:26.473] [UN] PID: 9802 TASK: ffff8c691436ae80 CPU: 4
> COMMAND: "ls"
> #0 [ffff996500393d28] __schedule at ffffffff9b6e6095
> #1 [ffff996500393db8] schedule at ffffffff9b6e64df
> #2 [ffff996500393dc8] rwsem_down_read_slowpath at ffffffff9b6e9421
> #3 [ffff996500393e78] down_read_killable at ffffffff9b6e95e2
> #4 [ffff996500393e88] iterate_dir at ffffffff9b103c56
> #5 [ffff996500393ec8] ksys_getdents64 at ffffffff9b104b0c
> #6 [ffff996500393f30] __x64_sys_getdents64 at ffffffff9b104bb6
> #7 [ffff996500393f38] do_syscall_64 at ffffffff9ae04315
>
> in iterate_dir:
> if (shared)
> res = down_read_killable(&inode->i_rwsem); <<<<
> else
> res = down_write_killable(&inode->i_rwsem);
>
Reported-by: Frank Sorenson <sorenson@redhat.com>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
Signed-off-by: Ronnie Sahlberg <lsahlber@redhat.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
2019-11-03 06:06:37 +03:00
goto out_destroy_fileinfo_put_wq ;
2017-05-03 18:54:01 +03:00
}
2021-04-13 08:26:42 +03:00
deferredclose_wq = alloc_workqueue ( " deferredclose " ,
WQ_FREEZABLE | WQ_MEM_RECLAIM , 0 ) ;
if ( ! deferredclose_wq ) {
rc = - ENOMEM ;
goto out_destroy_cifsoplockd_wq ;
}
2005-04-17 02:20:36 +04:00
rc = cifs_init_inodecache ( ) ;
2006-04-22 02:52:25 +04:00
if ( rc )
cifs: Support fscache indexing rewrite
Change the cifs filesystem to take account of the changes to fscache's
indexing rewrite and reenable caching in cifs.
The following changes have been made:
(1) The fscache_netfs struct is no more, and there's no need to register
the filesystem as a whole.
(2) The session cookie is now an fscache_volume cookie, allocated with
fscache_acquire_volume(). That takes three parameters: a string
representing the "volume" in the index, a string naming the cache to
use (or NULL) and a u64 that conveys coherency metadata for the
volume.
For cifs, I've made it render the volume name string as:
"cifs,<ipaddress>,<sharename>"
where the sharename has '/' characters replaced with ';'.
This probably needs rethinking a bit as the total name could exceed
the maximum filename component length.
Further, the coherency data is currently just set to 0. It needs
something else doing with it - I wonder if it would suffice simply to
sum the resource_id, vol_create_time and vol_serial_number or maybe
hash them.
(3) The fscache_cookie_def is no more and needed information is passed
directly to fscache_acquire_cookie(). The cache no longer calls back
into the filesystem, but rather metadata changes are indicated at
other times.
fscache_acquire_cookie() is passed the same keying and coherency
information as before.
(4) The functions to set/reset cookies are removed and
fscache_use_cookie() and fscache_unuse_cookie() are used instead.
fscache_use_cookie() is passed a flag to indicate if the cookie is
opened for writing. fscache_unuse_cookie() is passed updates for the
metadata if we changed it (ie. if the file was opened for writing).
These are called when the file is opened or closed.
(5) cifs_setattr_*() are made to call fscache_resize() to change the size
of the cache object.
(6) The functions to read and write data are stubbed out pending a
conversion to use netfslib.
Changes
=======
ver #8:
- Abstract cache invalidation into a helper function.
- Fix some checkpatch warnings[3].
ver #7:
- Removed the accidentally added-back call to get the super cookie in
cifs_root_iget().
- Fixed the right call to cifs_fscache_get_super_cookie() to take account
of the "-o fsc" mount flag.
ver #6:
- Moved the change of gfpflags_allow_blocking() to current_is_kswapd() for
cifs here.
- Fixed one of the error paths in cifs_atomic_open() to jump around the
call to use the cookie.
- Fixed an additional successful return in the middle of cifs_open() to
use the cookie on the way out.
- Only get a volume cookie (and thus inode cookies) when "-o fsc" is
supplied to mount.
ver #5:
- Fixed a couple of bits of cookie handling[2]:
- The cookie should be released in cifs_evict_inode(), not
cifsFileInfo_put_final(). The cookie needs to persist beyond file
closure so that writepages will be able to write to it.
- fscache_use_cookie() needs to be called in cifs_atomic_open() as it is
for cifs_open().
ver #4:
- Fixed the use of sizeof with memset.
- tcon->vol_create_time is __le64 so doesn't need cpu_to_le64().
ver #3:
- Canonicalise the cifs coherency data to make the cache portable.
- Set volume coherency data.
ver #2:
- Use gfpflags_allow_blocking() rather than using flag directly.
- Upgraded to -rc4 to allow for upstream changes[1].
- fscache_acquire_volume() now returns errors.
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Jeff Layton <jlayton@kernel.org>
cc: Steve French <smfrench@gmail.com>
cc: Shyam Prasad N <nspmangalore@gmail.com>
cc: linux-cifs@vger.kernel.org
cc: linux-cachefs@redhat.com
Link: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=23b55d673d7527b093cd97b7c217c82e70cd1af0 [1]
Link: https://lore.kernel.org/r/3419813.1641592362@warthog.procyon.org.uk/ [2]
Link: https://lore.kernel.org/r/CAH2r5muTanw9pJqzAHd01d9A8keeChkzGsCEH6=0rHutVLAF-A@mail.gmail.com/ [3]
Link: https://lore.kernel.org/r/163819671009.215744.11230627184193298714.stgit@warthog.procyon.org.uk/ # v1
Link: https://lore.kernel.org/r/163906982979.143852.10672081929614953210.stgit@warthog.procyon.org.uk/ # v2
Link: https://lore.kernel.org/r/163967187187.1823006.247415138444991444.stgit@warthog.procyon.org.uk/ # v3
Link: https://lore.kernel.org/r/164021579335.640689.2681324337038770579.stgit@warthog.procyon.org.uk/ # v4
Link: https://lore.kernel.org/r/3462849.1641593783@warthog.procyon.org.uk/ # v5
Link: https://lore.kernel.org/r/1318953.1642024578@warthog.procyon.org.uk/ # v6
Signed-off-by: Steve French <stfrench@microsoft.com>
2020-11-17 18:56:59 +03:00
goto out_destroy_deferredclose_wq ;
2006-04-22 02:52:25 +04:00
2022-08-05 17:47:39 +03:00
rc = init_mids ( ) ;
2006-04-22 02:52:25 +04:00
if ( rc )
goto out_destroy_inodecache ;
rc = cifs_init_request_bufs ( ) ;
if ( rc )
goto out_destroy_mids ;
2018-11-14 21:24:03 +03:00
# ifdef CONFIG_CIFS_DFS_UPCALL
rc = dfs_cache_init ( ) ;
if ( rc )
goto out_destroy_request_bufs ;
# endif /* CONFIG_CIFS_DFS_UPCALL */
2007-11-03 08:02:24 +03:00
# ifdef CONFIG_CIFS_UPCALL
2016-05-18 02:20:13 +03:00
rc = init_cifs_spnego ( ) ;
2007-11-03 08:02:24 +03:00
if ( rc )
2018-11-14 21:24:03 +03:00
goto out_destroy_dfs_cache ;
2011-04-28 08:34:35 +04:00
# endif /* CONFIG_CIFS_UPCALL */
2020-11-30 21:02:49 +03:00
# ifdef CONFIG_CIFS_SWN_UPCALL
rc = cifs_genl_init ( ) ;
if ( rc )
goto out_register_key_type ;
# endif /* CONFIG_CIFS_SWN_UPCALL */
2011-04-28 08:34:35 +04:00
rc = init_cifs_idmap ( ) ;
if ( rc )
2020-11-30 21:02:49 +03:00
goto out_cifs_swn_init ;
2011-04-28 08:34:35 +04:00
rc = register_filesystem ( & cifs_fs_type ) ;
if ( rc )
2011-05-06 11:35:00 +04:00
goto out_init_cifs_idmap ;
2006-04-22 02:52:25 +04:00
2018-05-24 05:44:53 +03:00
rc = register_filesystem ( & smb3_fs_type ) ;
if ( rc ) {
unregister_filesystem ( & cifs_fs_type ) ;
goto out_init_cifs_idmap ;
}
2006-04-22 02:52:25 +04:00
return 0 ;
2011-05-06 11:35:00 +04:00
out_init_cifs_idmap :
2011-04-28 08:34:35 +04:00
exit_cifs_idmap ( ) ;
2020-11-30 21:02:49 +03:00
out_cifs_swn_init :
# ifdef CONFIG_CIFS_SWN_UPCALL
cifs_genl_exit ( ) ;
2011-05-06 11:35:00 +04:00
out_register_key_type :
2020-11-30 21:02:49 +03:00
# endif
2007-11-03 08:02:24 +03:00
# ifdef CONFIG_CIFS_UPCALL
2016-05-18 02:20:13 +03:00
exit_cifs_spnego ( ) ;
2018-11-14 21:24:03 +03:00
out_destroy_dfs_cache :
# endif
# ifdef CONFIG_CIFS_DFS_UPCALL
dfs_cache_destroy ( ) ;
2011-05-06 11:35:00 +04:00
out_destroy_request_bufs :
2010-08-07 23:54:46 +04:00
# endif
2006-04-22 02:52:25 +04:00
cifs_destroy_request_bufs ( ) ;
2010-09-22 23:15:36 +04:00
out_destroy_mids :
2022-08-05 17:47:39 +03:00
destroy_mids ( ) ;
2010-09-22 23:15:36 +04:00
out_destroy_inodecache :
2006-04-22 02:52:25 +04:00
cifs_destroy_inodecache ( ) ;
2021-04-13 08:26:42 +03:00
out_destroy_deferredclose_wq :
destroy_workqueue ( deferredclose_wq ) ;
2017-05-03 18:54:01 +03:00
out_destroy_cifsoplockd_wq :
destroy_workqueue ( cifsoplockd_wq ) ;
cifs: move cifsFileInfo_put logic into a work-queue
This patch moves the final part of the cifsFileInfo_put() logic where we
need a write lock on lock_sem to be processed in a separate thread that
holds no other locks.
This is to prevent deadlocks like the one below:
> there are 6 processes looping to while trying to down_write
> cinode->lock_sem, 5 of them from _cifsFileInfo_put, and one from
> cifs_new_fileinfo
>
> and there are 5 other processes which are blocked, several of them
> waiting on either PG_writeback or PG_locked (which are both set), all
> for the same page of the file
>
> 2 inode_lock() (inode->i_rwsem) for the file
> 1 wait_on_page_writeback() for the page
> 1 down_read(inode->i_rwsem) for the inode of the directory
> 1 inode_lock()(inode->i_rwsem) for the inode of the directory
> 1 __lock_page
>
>
> so processes are blocked waiting on:
> page flags PG_locked and PG_writeback for one specific page
> inode->i_rwsem for the directory
> inode->i_rwsem for the file
> cifsInodeInflock_sem
>
>
>
> here are the more gory details (let me know if I need to provide
> anything more/better):
>
> [0 00:48:22.765] [UN] PID: 8863 TASK: ffff8c691547c5c0 CPU: 3
> COMMAND: "reopen_file"
> #0 [ffff9965007e3ba8] __schedule at ffffffff9b6e6095
> #1 [ffff9965007e3c38] schedule at ffffffff9b6e64df
> #2 [ffff9965007e3c48] rwsem_down_write_slowpath at ffffffff9af283d7
> #3 [ffff9965007e3cb8] legitimize_path at ffffffff9b0f975d
> #4 [ffff9965007e3d08] path_openat at ffffffff9b0fe55d
> #5 [ffff9965007e3dd8] do_filp_open at ffffffff9b100a33
> #6 [ffff9965007e3ee0] do_sys_open at ffffffff9b0eb2d6
> #7 [ffff9965007e3f38] do_syscall_64 at ffffffff9ae04315
> * (I think legitimize_path is bogus)
>
> in path_openat
> } else {
> const char *s = path_init(nd, flags);
> while (!(error = link_path_walk(s, nd)) &&
> (error = do_last(nd, file, op)) > 0) { <<<<
>
> do_last:
> if (open_flag & O_CREAT)
> inode_lock(dir->d_inode); <<<<
> else
> so it's trying to take inode->i_rwsem for the directory
>
> DENTRY INODE SUPERBLK TYPE PATH
> ffff8c68bb8e79c0 ffff8c691158ef20 ffff8c6915bf9000 DIR /mnt/vm1_smb/
> inode.i_rwsem is ffff8c691158efc0
>
> <struct rw_semaphore 0xffff8c691158efc0>:
> owner: <struct task_struct 0xffff8c6914275d00> (UN - 8856 -
> reopen_file), counter: 0x0000000000000003
> waitlist: 2
> 0xffff9965007e3c90 8863 reopen_file UN 0 1:29:22.926
> RWSEM_WAITING_FOR_WRITE
> 0xffff996500393e00 9802 ls UN 0 1:17:26.700
> RWSEM_WAITING_FOR_READ
>
>
> the owner of the inode.i_rwsem of the directory is:
>
> [0 00:00:00.109] [UN] PID: 8856 TASK: ffff8c6914275d00 CPU: 3
> COMMAND: "reopen_file"
> #0 [ffff99650065b828] __schedule at ffffffff9b6e6095
> #1 [ffff99650065b8b8] schedule at ffffffff9b6e64df
> #2 [ffff99650065b8c8] schedule_timeout at ffffffff9b6e9f89
> #3 [ffff99650065b940] msleep at ffffffff9af573a9
> #4 [ffff99650065b948] _cifsFileInfo_put.cold.63 at ffffffffc0a42dd6 [cifs]
> #5 [ffff99650065ba38] cifs_writepage_locked at ffffffffc0a0b8f3 [cifs]
> #6 [ffff99650065bab0] cifs_launder_page at ffffffffc0a0bb72 [cifs]
> #7 [ffff99650065bb30] invalidate_inode_pages2_range at ffffffff9b04d4bd
> #8 [ffff99650065bcb8] cifs_invalidate_mapping at ffffffffc0a11339 [cifs]
> #9 [ffff99650065bcd0] cifs_revalidate_mapping at ffffffffc0a1139a [cifs]
> #10 [ffff99650065bcf0] cifs_d_revalidate at ffffffffc0a014f6 [cifs]
> #11 [ffff99650065bd08] path_openat at ffffffff9b0fe7f7
> #12 [ffff99650065bdd8] do_filp_open at ffffffff9b100a33
> #13 [ffff99650065bee0] do_sys_open at ffffffff9b0eb2d6
> #14 [ffff99650065bf38] do_syscall_64 at ffffffff9ae04315
>
> cifs_launder_page is for page 0xffffd1e2c07d2480
>
> crash> page.index,mapping,flags 0xffffd1e2c07d2480
> index = 0x8
> mapping = 0xffff8c68f3cd0db0
> flags = 0xfffffc0008095
>
> PAGE-FLAG BIT VALUE
> PG_locked 0 0000001
> PG_uptodate 2 0000004
> PG_lru 4 0000010
> PG_waiters 7 0000080
> PG_writeback 15 0008000
>
>
> inode is ffff8c68f3cd0c40
> inode.i_rwsem is ffff8c68f3cd0ce0
> DENTRY INODE SUPERBLK TYPE PATH
> ffff8c68a1f1b480 ffff8c68f3cd0c40 ffff8c6915bf9000 REG
> /mnt/vm1_smb/testfile.8853
>
>
> this process holds the inode->i_rwsem for the parent directory, is
> laundering a page attached to the inode of the file it's opening, and in
> _cifsFileInfo_put is trying to down_write the cifsInodeInflock_sem
> for the file itself.
>
>
> <struct rw_semaphore 0xffff8c68f3cd0ce0>:
> owner: <struct task_struct 0xffff8c6914272e80> (UN - 8854 -
> reopen_file), counter: 0x0000000000000003
> waitlist: 1
> 0xffff9965005dfd80 8855 reopen_file UN 0 1:29:22.912
> RWSEM_WAITING_FOR_WRITE
>
> this is the inode.i_rwsem for the file
>
> the owner:
>
> [0 00:48:22.739] [UN] PID: 8854 TASK: ffff8c6914272e80 CPU: 2
> COMMAND: "reopen_file"
> #0 [ffff99650054fb38] __schedule at ffffffff9b6e6095
> #1 [ffff99650054fbc8] schedule at ffffffff9b6e64df
> #2 [ffff99650054fbd8] io_schedule at ffffffff9b6e68e2
> #3 [ffff99650054fbe8] __lock_page at ffffffff9b03c56f
> #4 [ffff99650054fc80] pagecache_get_page at ffffffff9b03dcdf
> #5 [ffff99650054fcc0] grab_cache_page_write_begin at ffffffff9b03ef4c
> #6 [ffff99650054fcd0] cifs_write_begin at ffffffffc0a064ec [cifs]
> #7 [ffff99650054fd30] generic_perform_write at ffffffff9b03bba4
> #8 [ffff99650054fda8] __generic_file_write_iter at ffffffff9b04060a
> #9 [ffff99650054fdf0] cifs_strict_writev.cold.70 at ffffffffc0a4469b [cifs]
> #10 [ffff99650054fe48] new_sync_write at ffffffff9b0ec1dd
> #11 [ffff99650054fed0] vfs_write at ffffffff9b0eed35
> #12 [ffff99650054ff00] ksys_write at ffffffff9b0eefd9
> #13 [ffff99650054ff38] do_syscall_64 at ffffffff9ae04315
>
> the process holds the inode->i_rwsem for the file to which it's writing,
> and is trying to __lock_page for the same page as in the other processes
>
>
> the other tasks:
> [0 00:00:00.028] [UN] PID: 8859 TASK: ffff8c6915479740 CPU: 2
> COMMAND: "reopen_file"
> #0 [ffff9965007b39d8] __schedule at ffffffff9b6e6095
> #1 [ffff9965007b3a68] schedule at ffffffff9b6e64df
> #2 [ffff9965007b3a78] schedule_timeout at ffffffff9b6e9f89
> #3 [ffff9965007b3af0] msleep at ffffffff9af573a9
> #4 [ffff9965007b3af8] cifs_new_fileinfo.cold.61 at ffffffffc0a42a07 [cifs]
> #5 [ffff9965007b3b78] cifs_open at ffffffffc0a0709d [cifs]
> #6 [ffff9965007b3cd8] do_dentry_open at ffffffff9b0e9b7a
> #7 [ffff9965007b3d08] path_openat at ffffffff9b0fe34f
> #8 [ffff9965007b3dd8] do_filp_open at ffffffff9b100a33
> #9 [ffff9965007b3ee0] do_sys_open at ffffffff9b0eb2d6
> #10 [ffff9965007b3f38] do_syscall_64 at ffffffff9ae04315
>
> this is opening the file, and is trying to down_write cinode->lock_sem
>
>
> [0 00:00:00.041] [UN] PID: 8860 TASK: ffff8c691547ae80 CPU: 2
> COMMAND: "reopen_file"
> [0 00:00:00.057] [UN] PID: 8861 TASK: ffff8c6915478000 CPU: 3
> COMMAND: "reopen_file"
> [0 00:00:00.059] [UN] PID: 8858 TASK: ffff8c6914271740 CPU: 2
> COMMAND: "reopen_file"
> [0 00:00:00.109] [UN] PID: 8862 TASK: ffff8c691547dd00 CPU: 6
> COMMAND: "reopen_file"
> #0 [ffff9965007c3c78] __schedule at ffffffff9b6e6095
> #1 [ffff9965007c3d08] schedule at ffffffff9b6e64df
> #2 [ffff9965007c3d18] schedule_timeout at ffffffff9b6e9f89
> #3 [ffff9965007c3d90] msleep at ffffffff9af573a9
> #4 [ffff9965007c3d98] _cifsFileInfo_put.cold.63 at ffffffffc0a42dd6 [cifs]
> #5 [ffff9965007c3e88] cifs_close at ffffffffc0a07aaf [cifs]
> #6 [ffff9965007c3ea0] __fput at ffffffff9b0efa6e
> #7 [ffff9965007c3ee8] task_work_run at ffffffff9aef1614
> #8 [ffff9965007c3f20] exit_to_usermode_loop at ffffffff9ae03d6f
> #9 [ffff9965007c3f38] do_syscall_64 at ffffffff9ae0444c
>
> closing the file, and trying to down_write cifsi->lock_sem
>
>
> [0 00:48:22.839] [UN] PID: 8857 TASK: ffff8c6914270000 CPU: 7
> COMMAND: "reopen_file"
> #0 [ffff9965006a7cc8] __schedule at ffffffff9b6e6095
> #1 [ffff9965006a7d58] schedule at ffffffff9b6e64df
> #2 [ffff9965006a7d68] io_schedule at ffffffff9b6e68e2
> #3 [ffff9965006a7d78] wait_on_page_bit at ffffffff9b03cac6
> #4 [ffff9965006a7e10] __filemap_fdatawait_range at ffffffff9b03b028
> #5 [ffff9965006a7ed8] filemap_write_and_wait at ffffffff9b040165
> #6 [ffff9965006a7ef0] cifs_flush at ffffffffc0a0c2fa [cifs]
> #7 [ffff9965006a7f10] filp_close at ffffffff9b0e93f1
> #8 [ffff9965006a7f30] __x64_sys_close at ffffffff9b0e9a0e
> #9 [ffff9965006a7f38] do_syscall_64 at ffffffff9ae04315
>
> in __filemap_fdatawait_range
> wait_on_page_writeback(page);
> for the same page of the file
>
>
>
> [0 00:48:22.718] [UN] PID: 8855 TASK: ffff8c69142745c0 CPU: 7
> COMMAND: "reopen_file"
> #0 [ffff9965005dfc98] __schedule at ffffffff9b6e6095
> #1 [ffff9965005dfd28] schedule at ffffffff9b6e64df
> #2 [ffff9965005dfd38] rwsem_down_write_slowpath at ffffffff9af283d7
> #3 [ffff9965005dfdf0] cifs_strict_writev at ffffffffc0a0c40a [cifs]
> #4 [ffff9965005dfe48] new_sync_write at ffffffff9b0ec1dd
> #5 [ffff9965005dfed0] vfs_write at ffffffff9b0eed35
> #6 [ffff9965005dff00] ksys_write at ffffffff9b0eefd9
> #7 [ffff9965005dff38] do_syscall_64 at ffffffff9ae04315
>
> inode_lock(inode);
>
>
> and one 'ls' later on, to see whether the rest of the mount is available
> (the test file is in the root, so we get blocked up on the directory
> ->i_rwsem), so the entire mount is unavailable
>
> [0 00:36:26.473] [UN] PID: 9802 TASK: ffff8c691436ae80 CPU: 4
> COMMAND: "ls"
> #0 [ffff996500393d28] __schedule at ffffffff9b6e6095
> #1 [ffff996500393db8] schedule at ffffffff9b6e64df
> #2 [ffff996500393dc8] rwsem_down_read_slowpath at ffffffff9b6e9421
> #3 [ffff996500393e78] down_read_killable at ffffffff9b6e95e2
> #4 [ffff996500393e88] iterate_dir at ffffffff9b103c56
> #5 [ffff996500393ec8] ksys_getdents64 at ffffffff9b104b0c
> #6 [ffff996500393f30] __x64_sys_getdents64 at ffffffff9b104bb6
> #7 [ffff996500393f38] do_syscall_64 at ffffffff9ae04315
>
> in iterate_dir:
> if (shared)
> res = down_read_killable(&inode->i_rwsem); <<<<
> else
> res = down_write_killable(&inode->i_rwsem);
>
Reported-by: Frank Sorenson <sorenson@redhat.com>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
Signed-off-by: Ronnie Sahlberg <lsahlber@redhat.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
2019-11-03 06:06:37 +03:00
out_destroy_fileinfo_put_wq :
destroy_workqueue ( fileinfo_put_wq ) ;
2019-09-07 09:09:49 +03:00
out_destroy_decrypt_wq :
destroy_workqueue ( decrypt_wq ) ;
2017-05-03 18:54:01 +03:00
out_destroy_cifsiod_wq :
2012-03-23 22:40:53 +04:00
destroy_workqueue ( cifsiod_wq ) ;
2010-09-22 23:15:36 +04:00
out_clean_proc :
cifs_proc_clean ( ) ;
2005-04-17 02:20:36 +04:00
return rc ;
}
static void __exit
exit_cifs ( void )
{
2018-05-24 05:44:53 +03:00
cifs_dbg ( NOISY , " exit_smb3 \n " ) ;
2012-03-21 14:27:55 +04:00
unregister_filesystem ( & cifs_fs_type ) ;
2018-05-24 05:44:53 +03:00
unregister_filesystem ( & smb3_fs_type ) ;
2008-04-24 12:56:07 +04:00
cifs_dfs_release_automount_timer ( ) ;
2011-04-28 08:34:35 +04:00
exit_cifs_idmap ( ) ;
2020-11-30 21:02:49 +03:00
# ifdef CONFIG_CIFS_SWN_UPCALL
cifs_genl_exit ( ) ;
# endif
2007-11-03 08:02:24 +03:00
# ifdef CONFIG_CIFS_UPCALL
2017-09-07 11:03:27 +03:00
exit_cifs_spnego ( ) ;
2018-11-14 21:24:03 +03:00
# endif
# ifdef CONFIG_CIFS_DFS_UPCALL
dfs_cache_destroy ( ) ;
2005-04-17 02:20:36 +04:00
# endif
cifs_destroy_request_bufs ( ) ;
2022-08-05 17:47:39 +03:00
destroy_mids ( ) ;
2012-03-21 14:27:55 +04:00
cifs_destroy_inodecache ( ) ;
2021-04-13 08:26:42 +03:00
destroy_workqueue ( deferredclose_wq ) ;
2017-05-03 18:54:01 +03:00
destroy_workqueue ( cifsoplockd_wq ) ;
2019-09-07 09:09:49 +03:00
destroy_workqueue ( decrypt_wq ) ;
cifs: move cifsFileInfo_put logic into a work-queue
This patch moves the final part of the cifsFileInfo_put() logic where we
need a write lock on lock_sem to be processed in a separate thread that
holds no other locks.
This is to prevent deadlocks like the one below:
> there are 6 processes looping to while trying to down_write
> cinode->lock_sem, 5 of them from _cifsFileInfo_put, and one from
> cifs_new_fileinfo
>
> and there are 5 other processes which are blocked, several of them
> waiting on either PG_writeback or PG_locked (which are both set), all
> for the same page of the file
>
> 2 inode_lock() (inode->i_rwsem) for the file
> 1 wait_on_page_writeback() for the page
> 1 down_read(inode->i_rwsem) for the inode of the directory
> 1 inode_lock()(inode->i_rwsem) for the inode of the directory
> 1 __lock_page
>
>
> so processes are blocked waiting on:
> page flags PG_locked and PG_writeback for one specific page
> inode->i_rwsem for the directory
> inode->i_rwsem for the file
> cifsInodeInflock_sem
>
>
>
> here are the more gory details (let me know if I need to provide
> anything more/better):
>
> [0 00:48:22.765] [UN] PID: 8863 TASK: ffff8c691547c5c0 CPU: 3
> COMMAND: "reopen_file"
> #0 [ffff9965007e3ba8] __schedule at ffffffff9b6e6095
> #1 [ffff9965007e3c38] schedule at ffffffff9b6e64df
> #2 [ffff9965007e3c48] rwsem_down_write_slowpath at ffffffff9af283d7
> #3 [ffff9965007e3cb8] legitimize_path at ffffffff9b0f975d
> #4 [ffff9965007e3d08] path_openat at ffffffff9b0fe55d
> #5 [ffff9965007e3dd8] do_filp_open at ffffffff9b100a33
> #6 [ffff9965007e3ee0] do_sys_open at ffffffff9b0eb2d6
> #7 [ffff9965007e3f38] do_syscall_64 at ffffffff9ae04315
> * (I think legitimize_path is bogus)
>
> in path_openat
> } else {
> const char *s = path_init(nd, flags);
> while (!(error = link_path_walk(s, nd)) &&
> (error = do_last(nd, file, op)) > 0) { <<<<
>
> do_last:
> if (open_flag & O_CREAT)
> inode_lock(dir->d_inode); <<<<
> else
> so it's trying to take inode->i_rwsem for the directory
>
> DENTRY INODE SUPERBLK TYPE PATH
> ffff8c68bb8e79c0 ffff8c691158ef20 ffff8c6915bf9000 DIR /mnt/vm1_smb/
> inode.i_rwsem is ffff8c691158efc0
>
> <struct rw_semaphore 0xffff8c691158efc0>:
> owner: <struct task_struct 0xffff8c6914275d00> (UN - 8856 -
> reopen_file), counter: 0x0000000000000003
> waitlist: 2
> 0xffff9965007e3c90 8863 reopen_file UN 0 1:29:22.926
> RWSEM_WAITING_FOR_WRITE
> 0xffff996500393e00 9802 ls UN 0 1:17:26.700
> RWSEM_WAITING_FOR_READ
>
>
> the owner of the inode.i_rwsem of the directory is:
>
> [0 00:00:00.109] [UN] PID: 8856 TASK: ffff8c6914275d00 CPU: 3
> COMMAND: "reopen_file"
> #0 [ffff99650065b828] __schedule at ffffffff9b6e6095
> #1 [ffff99650065b8b8] schedule at ffffffff9b6e64df
> #2 [ffff99650065b8c8] schedule_timeout at ffffffff9b6e9f89
> #3 [ffff99650065b940] msleep at ffffffff9af573a9
> #4 [ffff99650065b948] _cifsFileInfo_put.cold.63 at ffffffffc0a42dd6 [cifs]
> #5 [ffff99650065ba38] cifs_writepage_locked at ffffffffc0a0b8f3 [cifs]
> #6 [ffff99650065bab0] cifs_launder_page at ffffffffc0a0bb72 [cifs]
> #7 [ffff99650065bb30] invalidate_inode_pages2_range at ffffffff9b04d4bd
> #8 [ffff99650065bcb8] cifs_invalidate_mapping at ffffffffc0a11339 [cifs]
> #9 [ffff99650065bcd0] cifs_revalidate_mapping at ffffffffc0a1139a [cifs]
> #10 [ffff99650065bcf0] cifs_d_revalidate at ffffffffc0a014f6 [cifs]
> #11 [ffff99650065bd08] path_openat at ffffffff9b0fe7f7
> #12 [ffff99650065bdd8] do_filp_open at ffffffff9b100a33
> #13 [ffff99650065bee0] do_sys_open at ffffffff9b0eb2d6
> #14 [ffff99650065bf38] do_syscall_64 at ffffffff9ae04315
>
> cifs_launder_page is for page 0xffffd1e2c07d2480
>
> crash> page.index,mapping,flags 0xffffd1e2c07d2480
> index = 0x8
> mapping = 0xffff8c68f3cd0db0
> flags = 0xfffffc0008095
>
> PAGE-FLAG BIT VALUE
> PG_locked 0 0000001
> PG_uptodate 2 0000004
> PG_lru 4 0000010
> PG_waiters 7 0000080
> PG_writeback 15 0008000
>
>
> inode is ffff8c68f3cd0c40
> inode.i_rwsem is ffff8c68f3cd0ce0
> DENTRY INODE SUPERBLK TYPE PATH
> ffff8c68a1f1b480 ffff8c68f3cd0c40 ffff8c6915bf9000 REG
> /mnt/vm1_smb/testfile.8853
>
>
> this process holds the inode->i_rwsem for the parent directory, is
> laundering a page attached to the inode of the file it's opening, and in
> _cifsFileInfo_put is trying to down_write the cifsInodeInflock_sem
> for the file itself.
>
>
> <struct rw_semaphore 0xffff8c68f3cd0ce0>:
> owner: <struct task_struct 0xffff8c6914272e80> (UN - 8854 -
> reopen_file), counter: 0x0000000000000003
> waitlist: 1
> 0xffff9965005dfd80 8855 reopen_file UN 0 1:29:22.912
> RWSEM_WAITING_FOR_WRITE
>
> this is the inode.i_rwsem for the file
>
> the owner:
>
> [0 00:48:22.739] [UN] PID: 8854 TASK: ffff8c6914272e80 CPU: 2
> COMMAND: "reopen_file"
> #0 [ffff99650054fb38] __schedule at ffffffff9b6e6095
> #1 [ffff99650054fbc8] schedule at ffffffff9b6e64df
> #2 [ffff99650054fbd8] io_schedule at ffffffff9b6e68e2
> #3 [ffff99650054fbe8] __lock_page at ffffffff9b03c56f
> #4 [ffff99650054fc80] pagecache_get_page at ffffffff9b03dcdf
> #5 [ffff99650054fcc0] grab_cache_page_write_begin at ffffffff9b03ef4c
> #6 [ffff99650054fcd0] cifs_write_begin at ffffffffc0a064ec [cifs]
> #7 [ffff99650054fd30] generic_perform_write at ffffffff9b03bba4
> #8 [ffff99650054fda8] __generic_file_write_iter at ffffffff9b04060a
> #9 [ffff99650054fdf0] cifs_strict_writev.cold.70 at ffffffffc0a4469b [cifs]
> #10 [ffff99650054fe48] new_sync_write at ffffffff9b0ec1dd
> #11 [ffff99650054fed0] vfs_write at ffffffff9b0eed35
> #12 [ffff99650054ff00] ksys_write at ffffffff9b0eefd9
> #13 [ffff99650054ff38] do_syscall_64 at ffffffff9ae04315
>
> the process holds the inode->i_rwsem for the file to which it's writing,
> and is trying to __lock_page for the same page as in the other processes
>
>
> the other tasks:
> [0 00:00:00.028] [UN] PID: 8859 TASK: ffff8c6915479740 CPU: 2
> COMMAND: "reopen_file"
> #0 [ffff9965007b39d8] __schedule at ffffffff9b6e6095
> #1 [ffff9965007b3a68] schedule at ffffffff9b6e64df
> #2 [ffff9965007b3a78] schedule_timeout at ffffffff9b6e9f89
> #3 [ffff9965007b3af0] msleep at ffffffff9af573a9
> #4 [ffff9965007b3af8] cifs_new_fileinfo.cold.61 at ffffffffc0a42a07 [cifs]
> #5 [ffff9965007b3b78] cifs_open at ffffffffc0a0709d [cifs]
> #6 [ffff9965007b3cd8] do_dentry_open at ffffffff9b0e9b7a
> #7 [ffff9965007b3d08] path_openat at ffffffff9b0fe34f
> #8 [ffff9965007b3dd8] do_filp_open at ffffffff9b100a33
> #9 [ffff9965007b3ee0] do_sys_open at ffffffff9b0eb2d6
> #10 [ffff9965007b3f38] do_syscall_64 at ffffffff9ae04315
>
> this is opening the file, and is trying to down_write cinode->lock_sem
>
>
> [0 00:00:00.041] [UN] PID: 8860 TASK: ffff8c691547ae80 CPU: 2
> COMMAND: "reopen_file"
> [0 00:00:00.057] [UN] PID: 8861 TASK: ffff8c6915478000 CPU: 3
> COMMAND: "reopen_file"
> [0 00:00:00.059] [UN] PID: 8858 TASK: ffff8c6914271740 CPU: 2
> COMMAND: "reopen_file"
> [0 00:00:00.109] [UN] PID: 8862 TASK: ffff8c691547dd00 CPU: 6
> COMMAND: "reopen_file"
> #0 [ffff9965007c3c78] __schedule at ffffffff9b6e6095
> #1 [ffff9965007c3d08] schedule at ffffffff9b6e64df
> #2 [ffff9965007c3d18] schedule_timeout at ffffffff9b6e9f89
> #3 [ffff9965007c3d90] msleep at ffffffff9af573a9
> #4 [ffff9965007c3d98] _cifsFileInfo_put.cold.63 at ffffffffc0a42dd6 [cifs]
> #5 [ffff9965007c3e88] cifs_close at ffffffffc0a07aaf [cifs]
> #6 [ffff9965007c3ea0] __fput at ffffffff9b0efa6e
> #7 [ffff9965007c3ee8] task_work_run at ffffffff9aef1614
> #8 [ffff9965007c3f20] exit_to_usermode_loop at ffffffff9ae03d6f
> #9 [ffff9965007c3f38] do_syscall_64 at ffffffff9ae0444c
>
> closing the file, and trying to down_write cifsi->lock_sem
>
>
> [0 00:48:22.839] [UN] PID: 8857 TASK: ffff8c6914270000 CPU: 7
> COMMAND: "reopen_file"
> #0 [ffff9965006a7cc8] __schedule at ffffffff9b6e6095
> #1 [ffff9965006a7d58] schedule at ffffffff9b6e64df
> #2 [ffff9965006a7d68] io_schedule at ffffffff9b6e68e2
> #3 [ffff9965006a7d78] wait_on_page_bit at ffffffff9b03cac6
> #4 [ffff9965006a7e10] __filemap_fdatawait_range at ffffffff9b03b028
> #5 [ffff9965006a7ed8] filemap_write_and_wait at ffffffff9b040165
> #6 [ffff9965006a7ef0] cifs_flush at ffffffffc0a0c2fa [cifs]
> #7 [ffff9965006a7f10] filp_close at ffffffff9b0e93f1
> #8 [ffff9965006a7f30] __x64_sys_close at ffffffff9b0e9a0e
> #9 [ffff9965006a7f38] do_syscall_64 at ffffffff9ae04315
>
> in __filemap_fdatawait_range
> wait_on_page_writeback(page);
> for the same page of the file
>
>
>
> [0 00:48:22.718] [UN] PID: 8855 TASK: ffff8c69142745c0 CPU: 7
> COMMAND: "reopen_file"
> #0 [ffff9965005dfc98] __schedule at ffffffff9b6e6095
> #1 [ffff9965005dfd28] schedule at ffffffff9b6e64df
> #2 [ffff9965005dfd38] rwsem_down_write_slowpath at ffffffff9af283d7
> #3 [ffff9965005dfdf0] cifs_strict_writev at ffffffffc0a0c40a [cifs]
> #4 [ffff9965005dfe48] new_sync_write at ffffffff9b0ec1dd
> #5 [ffff9965005dfed0] vfs_write at ffffffff9b0eed35
> #6 [ffff9965005dff00] ksys_write at ffffffff9b0eefd9
> #7 [ffff9965005dff38] do_syscall_64 at ffffffff9ae04315
>
> inode_lock(inode);
>
>
> and one 'ls' later on, to see whether the rest of the mount is available
> (the test file is in the root, so we get blocked up on the directory
> ->i_rwsem), so the entire mount is unavailable
>
> [0 00:36:26.473] [UN] PID: 9802 TASK: ffff8c691436ae80 CPU: 4
> COMMAND: "ls"
> #0 [ffff996500393d28] __schedule at ffffffff9b6e6095
> #1 [ffff996500393db8] schedule at ffffffff9b6e64df
> #2 [ffff996500393dc8] rwsem_down_read_slowpath at ffffffff9b6e9421
> #3 [ffff996500393e78] down_read_killable at ffffffff9b6e95e2
> #4 [ffff996500393e88] iterate_dir at ffffffff9b103c56
> #5 [ffff996500393ec8] ksys_getdents64 at ffffffff9b104b0c
> #6 [ffff996500393f30] __x64_sys_getdents64 at ffffffff9b104bb6
> #7 [ffff996500393f38] do_syscall_64 at ffffffff9ae04315
>
> in iterate_dir:
> if (shared)
> res = down_read_killable(&inode->i_rwsem); <<<<
> else
> res = down_write_killable(&inode->i_rwsem);
>
Reported-by: Frank Sorenson <sorenson@redhat.com>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
Signed-off-by: Ronnie Sahlberg <lsahlber@redhat.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
2019-11-03 06:06:37 +03:00
destroy_workqueue ( fileinfo_put_wq ) ;
2012-03-23 22:40:53 +04:00
destroy_workqueue ( cifsiod_wq ) ;
2012-03-21 14:27:55 +04:00
cifs_proc_clean ( ) ;
2005-04-17 02:20:36 +04:00
}
2018-09-18 12:07:45 +03:00
MODULE_AUTHOR ( " Steve French " ) ;
2007-07-07 03:13:06 +04:00
MODULE_LICENSE ( " GPL " ) ; /* combination of LGPL + GPL source behaves as GPL */
2005-04-17 02:20:36 +04:00
MODULE_DESCRIPTION
2018-09-18 12:07:45 +03:00
( " VFS to access SMB3 servers e.g. Samba, Macs, Azure and Windows (and "
" also older servers complying with the SNIA CIFS Specification) " ) ;
2005-04-17 02:20:36 +04:00
MODULE_VERSION ( CIFS_VERSION ) ;
2019-10-31 06:55:14 +03:00
MODULE_SOFTDEP ( " ecb " ) ;
MODULE_SOFTDEP ( " hmac " ) ;
MODULE_SOFTDEP ( " md5 " ) ;
MODULE_SOFTDEP ( " nls " ) ;
MODULE_SOFTDEP ( " aes " ) ;
MODULE_SOFTDEP ( " cmac " ) ;
MODULE_SOFTDEP ( " sha256 " ) ;
MODULE_SOFTDEP ( " sha512 " ) ;
MODULE_SOFTDEP ( " aead2 " ) ;
MODULE_SOFTDEP ( " ccm " ) ;
MODULE_SOFTDEP ( " gcm " ) ;
2005-04-17 02:20:36 +04:00
module_init ( init_cifs )
module_exit ( exit_cifs )