2000-06-10 18:29:31 +04:00
/*
2002-01-30 09:08:46 +03:00
Unix SMB / CIFS implementation .
2000-06-10 18:29:31 +04:00
kernel oplock processing for Linux
Copyright ( C ) Andrew Tridgell 2000
2011-09-12 19:19:27 +04:00
2000-06-10 18:29:31 +04:00
This program is free software ; you can redistribute it and / or modify
it under the terms of the GNU General Public License as published by
2007-07-09 23:25:36 +04:00
the Free Software Foundation ; either version 3 of the License , or
2000-06-10 18:29:31 +04:00
( at your option ) any later version .
2011-09-12 19:19:27 +04:00
2000-06-10 18:29:31 +04:00
This program is distributed in the hope that it will be useful ,
but WITHOUT ANY WARRANTY ; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
GNU General Public License for more details .
2011-09-12 19:19:27 +04:00
2000-06-10 18:29:31 +04:00
You should have received a copy of the GNU General Public License
2007-07-10 04:52:41 +04:00
along with this program . If not , see < http : //www.gnu.org/licenses/>.
2000-06-10 18:29:31 +04:00
*/
2006-05-05 06:06:37 +04:00
# define DBGC_CLASS DBGC_LOCKING
2000-06-10 18:29:31 +04:00
# include "includes.h"
2011-02-26 01:20:06 +03:00
# include "system/filesys.h"
2011-03-22 18:57:01 +03:00
# include "smbd/smbd.h"
2009-01-08 14:03:45 +03:00
# include "smbd/globals.h"
2000-06-10 18:29:31 +04:00
2000-06-11 09:57:58 +04:00
# if HAVE_KERNEL_OPLOCKS_LINUX
2000-06-11 10:24:54 +04:00
# ifndef F_SETLEASE
# define F_SETLEASE 1024
# endif
# ifndef F_GETLEASE
# define F_GETLEASE 1025
# endif
# ifndef CAP_LEASE
# define CAP_LEASE 28
# endif
2000-06-11 09:57:58 +04:00
totally rewrote the async signal, notification and oplock notification
handling in Samba. This was needed due to several limitations and
races in the previous code - as a side effect the new code is much
cleaner :)
in summary:
- changed sys_select() to avoid a signal/select race condition. It is a
rare race but once we have signals doing notification and oplocks it
is important.
- changed our main processing loop to take advantage of the new
sys_select semantics
- split the notify code into implementaion dependent and general
parts. Added the following structure that defines an implementation:
struct cnotify_fns {
void * (*register_notify)(connection_struct *conn, char *path, uint32 flags);
BOOL (*check_notify)(connection_struct *conn, uint16 vuid, char *path, uint32 flags, void *data, time_t t);
void (*remove_notify)(void *data);
};
then I wrote two implementations, one using hash/poll (like our old
code) and the other using the new Linux kernel change notify. It
should be easy to add other change notify implementations by creating
a sructure of the above type.
- fixed a bug in change notify where we were returning the wrong error
code.
- rewrote the core change notify code to be much simpler
- moved to real-time signals for leases and change notify
Amazingly, it all seems to work. I was very surprised!
(This used to be commit 44766c39e0027c762bee8b33b12c621c109a3267)
2000-06-12 19:53:31 +04:00
# ifndef RT_SIGNAL_LEASE
2003-03-28 04:07:05 +03:00
# define RT_SIGNAL_LEASE (SIGRTMIN+1)
totally rewrote the async signal, notification and oplock notification
handling in Samba. This was needed due to several limitations and
races in the previous code - as a side effect the new code is much
cleaner :)
in summary:
- changed sys_select() to avoid a signal/select race condition. It is a
rare race but once we have signals doing notification and oplocks it
is important.
- changed our main processing loop to take advantage of the new
sys_select semantics
- split the notify code into implementaion dependent and general
parts. Added the following structure that defines an implementation:
struct cnotify_fns {
void * (*register_notify)(connection_struct *conn, char *path, uint32 flags);
BOOL (*check_notify)(connection_struct *conn, uint16 vuid, char *path, uint32 flags, void *data, time_t t);
void (*remove_notify)(void *data);
};
then I wrote two implementations, one using hash/poll (like our old
code) and the other using the new Linux kernel change notify. It
should be easy to add other change notify implementations by creating
a sructure of the above type.
- fixed a bug in change notify where we were returning the wrong error
code.
- rewrote the core change notify code to be much simpler
- moved to real-time signals for leases and change notify
Amazingly, it all seems to work. I was very surprised!
(This used to be commit 44766c39e0027c762bee8b33b12c621c109a3267)
2000-06-12 19:53:31 +04:00
# endif
2000-06-12 21:06:00 +04:00
# ifndef F_SETSIG
# define F_SETSIG 10
# endif
2007-02-14 05:37:14 +03:00
/*
2008-01-16 12:09:48 +03:00
* public function to get linux lease capability . Needed by some VFS modules ( eg . gpfs . c )
*/
void linux_set_lease_capability ( void )
{
2008-05-28 15:20:16 +04:00
set_effective_capability ( LEASE_CAPABILITY ) ;
2008-01-16 12:09:48 +03:00
}
/*
* Call to set the kernel lease signal handler
*/
2007-02-14 05:37:14 +03:00
int linux_set_lease_sighandler ( int fd )
{
if ( fcntl ( fd , F_SETSIG , RT_SIGNAL_LEASE ) = = - 1 ) {
DEBUG ( 3 , ( " Failed to set signal handler for kernel lease \n " ) ) ;
return - 1 ;
}
return 0 ;
}
2000-06-11 09:57:58 +04:00
/****************************************************************************
2006-01-06 13:27:12 +03:00
Call SETLEASE . If we get EACCES then we try setting up the right capability and
2007-02-14 05:37:14 +03:00
try again .
Use the SMB_VFS_LINUX_SETLEASE instead of this call directly .
2000-06-11 09:57:58 +04:00
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2001-10-21 01:59:34 +04:00
2007-02-14 05:37:14 +03:00
int linux_setlease ( int fd , int leasetype )
2000-06-11 09:57:58 +04:00
{
int ret ;
2000-06-12 21:06:00 +04:00
2006-01-06 13:27:12 +03:00
ret = fcntl ( fd , F_SETLEASE , leasetype ) ;
2000-06-11 09:57:58 +04:00
if ( ret = = - 1 & & errno = = EACCES ) {
2008-05-28 15:20:16 +04:00
set_effective_capability ( LEASE_CAPABILITY ) ;
2000-06-11 09:57:58 +04:00
ret = fcntl ( fd , F_SETLEASE , leasetype ) ;
}
return ret ;
}
2000-06-10 18:29:31 +04:00
/****************************************************************************
* Deal with the Linux kernel < - - > smbd
* oplock break protocol .
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2001-10-21 01:59:34 +04:00
2009-01-23 12:08:44 +03:00
static void linux_oplock_signal_handler ( struct tevent_context * ev_ctx ,
struct tevent_signal * se ,
int signum , int count ,
void * _info , void * private_data )
2000-06-10 18:29:31 +04:00
{
2009-01-23 12:08:44 +03:00
siginfo_t * info = ( siginfo_t * ) _info ;
int fd = info - > si_fd ;
2005-09-30 21:13:37 +04:00
files_struct * fsp ;
2000-06-10 18:29:31 +04:00
2010-09-27 06:54:29 +04:00
fsp = file_find_fd ( smbd_server_conn , fd ) ;
2009-08-25 08:14:52 +04:00
if ( fsp = = NULL ) {
Hopefully last part of the fix for bug 6651 - smbd SIGSEGV when breaking oplocks.
This one is subtle. There is a race condition where a signal can be
queued for oplock break, and then the file can be closed by the client
before the signal can be processed. Currently if this occurs we panic
(we can't match an incoming signal fd with a fsp pointer). Simply log
the error (at debug level 10 right now, might be too much) and then
return without processing the break request. It looks like there is
another race condition with this fix, but here's why it won't happen.
If the signal was pending (caused by a kernel oplock break from a
local file open), and the client closed the file and then re-opened
another file which happened to use the same file descriptor as the
file just closed, then theoretically the oplock break requests could
be processed on the wrong fd. Here's why this should be very rare..
Processing a pending signal always take precedence over an incoming
network request, so as long as the client close request is non-chained
then the break signal should always be harmlessly processed *before*
the open can be called. If the open is chained onto the close, and
the fd on the new open is the same as the old closed fd, then it's
possible this race will occur. However, all that will happen is that
we'll lose the oplock on this file. A shame, but not a fatal event.
Jeremy.
2009-09-03 18:40:48 +04:00
DEBUG ( 0 , ( " linux_oplock_signal_handler: failed to find fsp for file fd=%d (file was closed ?) \n " , fd ) ) ;
return ;
2009-08-25 08:14:52 +04:00
}
2010-07-04 21:44:57 +04:00
break_kernel_oplock ( fsp - > conn - > sconn - > msg_ctx , fsp ) ;
2000-06-10 18:29:31 +04:00
}
/****************************************************************************
Attempt to set an kernel oplock on a file .
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2001-10-21 01:59:34 +04:00
2009-01-09 16:02:18 +03:00
static bool linux_set_kernel_oplock ( struct kernel_oplocks * ctx ,
files_struct * fsp , int oplock_type )
2000-06-10 18:29:31 +04:00
{
2008-01-07 23:47:53 +03:00
if ( SMB_VFS_LINUX_SETLEASE ( fsp , F_WRLCK ) = = - 1 ) {
2006-08-28 22:25:55 +04:00
DEBUG ( 3 , ( " linux_set_kernel_oplock: Refused oplock on file %s, "
2007-05-29 13:30:34 +04:00
" fd = %d, file_id = %s. (%s) \n " ,
2009-07-11 01:50:37 +04:00
fsp_str_dbg ( fsp ) , fsp - > fh - > fd ,
2007-09-10 14:56:07 +04:00
file_id_string_tos ( & fsp - > file_id ) ,
2006-08-28 22:25:55 +04:00
strerror ( errno ) ) ) ;
2000-06-10 18:29:31 +04:00
return False ;
}
2006-08-28 22:25:55 +04:00
DEBUG ( 3 , ( " linux_set_kernel_oplock: got kernel oplock on file %s, "
2007-05-29 13:30:34 +04:00
" file_id = %s gen_id = %lu \n " ,
2009-07-11 01:50:37 +04:00
fsp_str_dbg ( fsp ) , file_id_string_tos ( & fsp - > file_id ) ,
2007-05-29 13:30:34 +04:00
fsp - > fh - > gen_id ) ) ;
2000-06-10 18:29:31 +04:00
return True ;
}
/****************************************************************************
Release a kernel oplock on a file .
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2001-10-21 01:59:34 +04:00
2009-01-09 16:02:18 +03:00
static void linux_release_kernel_oplock ( struct kernel_oplocks * ctx ,
2009-01-10 00:07:58 +03:00
files_struct * fsp , int oplock_type )
2000-06-10 18:29:31 +04:00
{
if ( DEBUGLVL ( 10 ) ) {
/*
* Check and print out the current kernel
* oplock state of this file .
*/
2005-07-08 08:51:27 +04:00
int state = fcntl ( fsp - > fh - > fd , F_GETLEASE , 0 ) ;
2007-05-29 13:30:34 +04:00
dbgtext ( " linux_release_kernel_oplock: file %s, file_id = %s "
" gen_id = %lu has kernel oplock state "
2009-07-11 01:50:37 +04:00
" of %x. \n " , fsp_str_dbg ( fsp ) ,
file_id_string_tos ( & fsp - > file_id ) ,
2007-05-29 13:30:34 +04:00
fsp - > fh - > gen_id , state ) ;
2000-06-10 18:29:31 +04:00
}
/*
* Remove the kernel oplock on this file .
*/
2008-01-07 23:47:53 +03:00
if ( SMB_VFS_LINUX_SETLEASE ( fsp , F_UNLCK ) = = - 1 ) {
2000-06-10 18:29:31 +04:00
if ( DEBUGLVL ( 0 ) ) {
2006-08-28 22:25:55 +04:00
dbgtext ( " linux_release_kernel_oplock: Error when "
" removing kernel oplock on file " ) ;
2007-05-29 13:30:34 +04:00
dbgtext ( " %s, file_id = %s, gen_id = %lu. "
2009-07-11 01:50:37 +04:00
" Error was %s \n " , fsp_str_dbg ( fsp ) ,
2007-09-10 14:56:07 +04:00
file_id_string_tos ( & fsp - > file_id ) ,
2007-05-29 13:30:34 +04:00
fsp - > fh - > gen_id , strerror ( errno ) ) ;
2000-06-10 18:29:31 +04:00
}
}
}
2000-06-11 10:46:05 +04:00
/****************************************************************************
2001-10-21 01:59:34 +04:00
See if the kernel supports oplocks .
2000-06-11 10:46:05 +04:00
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2001-10-21 01:59:34 +04:00
2007-10-19 04:40:25 +04:00
static bool linux_oplocks_available ( void )
2000-06-11 10:46:05 +04:00
{
int fd , ret ;
fd = open ( " /dev/null " , O_RDONLY ) ;
2001-10-21 01:59:34 +04:00
if ( fd = = - 1 )
return False ; /* uggh! */
2000-06-11 10:46:05 +04:00
ret = fcntl ( fd , F_GETLEASE , 0 ) ;
close ( fd ) ;
return ret = = F_UNLCK ;
}
2000-06-10 18:29:31 +04:00
/****************************************************************************
2001-10-21 01:59:34 +04:00
Setup kernel oplocks .
2000-06-10 18:29:31 +04:00
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2001-10-21 01:59:34 +04:00
2009-01-09 16:02:18 +03:00
static const struct kernel_oplocks_ops linux_koplocks = {
2009-02-03 22:56:35 +03:00
. set_oplock = linux_set_kernel_oplock ,
. release_oplock = linux_release_kernel_oplock ,
. contend_level2_oplocks_begin = NULL ,
. contend_level2_oplocks_end = NULL ,
2009-01-09 16:02:18 +03:00
} ;
struct kernel_oplocks * linux_init_kernel_oplocks ( TALLOC_CTX * mem_ctx )
2000-06-10 18:29:31 +04:00
{
2009-01-09 16:02:18 +03:00
struct kernel_oplocks * ctx ;
2009-01-23 12:08:44 +03:00
struct tevent_signal * se ;
2000-06-10 18:29:31 +04:00
2000-06-11 10:46:05 +04:00
if ( ! linux_oplocks_available ( ) ) {
DEBUG ( 3 , ( " Linux kernel oplocks not available \n " ) ) ;
return NULL ;
}
2009-01-09 16:02:18 +03:00
ctx = talloc_zero ( mem_ctx , struct kernel_oplocks ) ;
if ( ! ctx ) {
DEBUG ( 0 , ( " Linux Kernel oplocks talloc_Zero failed \n " ) ) ;
return NULL ;
}
ctx - > ops = & linux_koplocks ;
2011-05-25 12:51:56 +04:00
se = tevent_add_signal ( server_event_context ( ) ,
2009-01-23 12:08:44 +03:00
ctx ,
RT_SIGNAL_LEASE , SA_SIGINFO ,
linux_oplock_signal_handler ,
ctx ) ;
if ( ! se ) {
DEBUG ( 0 , ( " Failed to setup RT_SIGNAL_LEASE handler " ) ) ;
TALLOC_FREE ( ctx ) ;
2000-06-10 18:29:31 +04:00
return NULL ;
2001-10-21 01:59:34 +04:00
}
2000-06-10 18:29:31 +04:00
2009-01-23 12:08:44 +03:00
ctx - > private_data = se ;
2003-03-28 04:07:05 +03:00
2000-06-12 21:06:00 +04:00
DEBUG ( 3 , ( " Linux kernel oplocks enabled \n " ) ) ;
2009-01-09 16:02:18 +03:00
return ctx ;
2000-06-10 18:29:31 +04:00
}
# else
2005-05-02 21:49:43 +04:00
void oplock_linux_dummy ( void ) ;
2000-06-10 18:29:31 +04:00
void oplock_linux_dummy ( void ) { }
# endif /* HAVE_KERNEL_OPLOCKS_LINUX */