2000-06-10 18:29:31 +04:00
/*
Unix SMB / Netbios implementation .
Version 3.0
kernel oplock processing for Linux
Copyright ( C ) Andrew Tridgell 2000
This program is free software ; you can redistribute it and / or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation ; either version 2 of the License , or
( at your option ) any later version .
This program is distributed in the hope that it will be useful ,
but WITHOUT ANY WARRANTY ; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
GNU General Public License for more details .
You should have received a copy of the GNU General Public License
along with this program ; if not , write to the Free Software
Foundation , Inc . , 675 Mass Ave , Cambridge , MA 0213 9 , USA .
*/
# include "includes.h"
2000-06-11 09:57:58 +04:00
# if HAVE_KERNEL_OPLOCKS_LINUX
2001-06-25 06:53:13 +04:00
static VOLATILE sig_atomic_t signals_received ;
static VOLATILE sig_atomic_t signals_processed ;
static VOLATILE sig_atomic_t fd_pending ; /* the fd of the current pending signal */
2000-06-10 18:29:31 +04:00
2000-06-11 10:24:54 +04:00
# ifndef F_SETLEASE
# define F_SETLEASE 1024
# endif
# ifndef F_GETLEASE
# define F_GETLEASE 1025
# endif
# ifndef CAP_LEASE
# define CAP_LEASE 28
# endif
2000-06-11 09:57:58 +04:00
totally rewrote the async signal, notification and oplock notification
handling in Samba. This was needed due to several limitations and
races in the previous code - as a side effect the new code is much
cleaner :)
in summary:
- changed sys_select() to avoid a signal/select race condition. It is a
rare race but once we have signals doing notification and oplocks it
is important.
- changed our main processing loop to take advantage of the new
sys_select semantics
- split the notify code into implementaion dependent and general
parts. Added the following structure that defines an implementation:
struct cnotify_fns {
void * (*register_notify)(connection_struct *conn, char *path, uint32 flags);
BOOL (*check_notify)(connection_struct *conn, uint16 vuid, char *path, uint32 flags, void *data, time_t t);
void (*remove_notify)(void *data);
};
then I wrote two implementations, one using hash/poll (like our old
code) and the other using the new Linux kernel change notify. It
should be easy to add other change notify implementations by creating
a sructure of the above type.
- fixed a bug in change notify where we were returning the wrong error
code.
- rewrote the core change notify code to be much simpler
- moved to real-time signals for leases and change notify
Amazingly, it all seems to work. I was very surprised!
(This used to be commit 44766c39e0027c762bee8b33b12c621c109a3267)
2000-06-12 19:53:31 +04:00
# ifndef RT_SIGNAL_LEASE
# define RT_SIGNAL_LEASE 33
# endif
2000-06-12 21:06:00 +04:00
# ifndef F_SETSIG
# define F_SETSIG 10
# endif
2000-06-10 18:29:31 +04:00
/****************************************************************************
totally rewrote the async signal, notification and oplock notification
handling in Samba. This was needed due to several limitations and
races in the previous code - as a side effect the new code is much
cleaner :)
in summary:
- changed sys_select() to avoid a signal/select race condition. It is a
rare race but once we have signals doing notification and oplocks it
is important.
- changed our main processing loop to take advantage of the new
sys_select semantics
- split the notify code into implementaion dependent and general
parts. Added the following structure that defines an implementation:
struct cnotify_fns {
void * (*register_notify)(connection_struct *conn, char *path, uint32 flags);
BOOL (*check_notify)(connection_struct *conn, uint16 vuid, char *path, uint32 flags, void *data, time_t t);
void (*remove_notify)(void *data);
};
then I wrote two implementations, one using hash/poll (like our old
code) and the other using the new Linux kernel change notify. It
should be easy to add other change notify implementations by creating
a sructure of the above type.
- fixed a bug in change notify where we were returning the wrong error
code.
- rewrote the core change notify code to be much simpler
- moved to real-time signals for leases and change notify
Amazingly, it all seems to work. I was very surprised!
(This used to be commit 44766c39e0027c762bee8b33b12c621c109a3267)
2000-06-12 19:53:31 +04:00
handle a LEASE signal , incrementing the signals_received and blocking the signal
2000-06-10 18:29:31 +04:00
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2001-05-08 10:14:08 +04:00
static void signal_handler ( int sig , siginfo_t * info , void * unused )
2000-06-10 18:29:31 +04:00
{
2001-05-08 10:14:08 +04:00
BlockSignals ( True , sig ) ;
2001-06-25 06:53:13 +04:00
fd_pending = ( sig_atomic_t ) info - > si_fd ;
2000-06-10 18:29:31 +04:00
signals_received + + ;
totally rewrote the async signal, notification and oplock notification
handling in Samba. This was needed due to several limitations and
races in the previous code - as a side effect the new code is much
cleaner :)
in summary:
- changed sys_select() to avoid a signal/select race condition. It is a
rare race but once we have signals doing notification and oplocks it
is important.
- changed our main processing loop to take advantage of the new
sys_select semantics
- split the notify code into implementaion dependent and general
parts. Added the following structure that defines an implementation:
struct cnotify_fns {
void * (*register_notify)(connection_struct *conn, char *path, uint32 flags);
BOOL (*check_notify)(connection_struct *conn, uint16 vuid, char *path, uint32 flags, void *data, time_t t);
void (*remove_notify)(void *data);
};
then I wrote two implementations, one using hash/poll (like our old
code) and the other using the new Linux kernel change notify. It
should be easy to add other change notify implementations by creating
a sructure of the above type.
- fixed a bug in change notify where we were returning the wrong error
code.
- rewrote the core change notify code to be much simpler
- moved to real-time signals for leases and change notify
Amazingly, it all seems to work. I was very surprised!
(This used to be commit 44766c39e0027c762bee8b33b12c621c109a3267)
2000-06-12 19:53:31 +04:00
sys_select_signal ( ) ;
2000-06-10 18:29:31 +04:00
}
2000-06-11 09:57:58 +04:00
/****************************************************************************
2000-06-11 10:24:54 +04:00
try to gain a linux capability
2001-09-08 06:59:23 +04:00
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
static void set_capability ( unsigned capability )
2000-06-11 09:57:58 +04:00
{
2000-06-11 10:24:54 +04:00
# ifndef _LINUX_CAPABILITY_VERSION
# define _LINUX_CAPABILITY_VERSION 0x19980330
# endif
/* these can be removed when they are in glibc headers */
struct {
uint32 version ;
int pid ;
} header ;
struct {
uint32 effective ;
uint32 permitted ;
uint32 inheritable ;
} data ;
header . version = _LINUX_CAPABILITY_VERSION ;
header . pid = 0 ;
if ( capget ( & header , & data ) = = - 1 ) {
DEBUG ( 3 , ( " Unable to get kernel capabilities (%s) \n " , strerror ( errno ) ) ) ;
2000-06-11 09:57:58 +04:00
return ;
}
2000-06-11 10:24:54 +04:00
data . effective | = ( 1 < < capability ) ;
if ( capset ( & header , & data ) = = - 1 ) {
DEBUG ( 3 , ( " Unable to set %d capability (%s) \n " ,
capability , strerror ( errno ) ) ) ;
2000-06-11 09:57:58 +04:00
}
}
/****************************************************************************
call SETLEASE . If we get EACCES then we try setting up the right capability and
try again
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
static int linux_setlease ( int fd , int leasetype )
{
int ret ;
2000-06-12 21:06:00 +04:00
if ( fcntl ( fd , F_SETSIG , RT_SIGNAL_LEASE ) = = - 1 ) {
DEBUG ( 3 , ( " Failed to set signal handler for kernel lease \n " ) ) ;
return - 1 ;
}
2000-06-11 09:57:58 +04:00
ret = fcntl ( fd , F_SETLEASE , leasetype ) ;
if ( ret = = - 1 & & errno = = EACCES ) {
2000-06-11 10:24:54 +04:00
set_capability ( CAP_LEASE ) ;
2000-06-11 09:57:58 +04:00
ret = fcntl ( fd , F_SETLEASE , leasetype ) ;
}
return ret ;
}
2000-06-10 18:29:31 +04:00
/****************************************************************************
* Deal with the Linux kernel < - - > smbd
* oplock break protocol .
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
static BOOL linux_oplock_receive_message ( fd_set * fds , char * buffer , int buffer_len )
{
SMB_DEV_T dev ;
SMB_INO_T inode ;
SMB_STRUCT_STAT sbuf ;
2001-04-27 22:03:12 +04:00
BOOL ret = True ;
2000-06-10 18:29:31 +04:00
if ( signals_received = = signals_processed ) return False ;
2000-06-20 01:30:27 +04:00
if ( sys_fstat ( ( int ) fd_pending , & sbuf ) = = - 1 ) {
DEBUG ( 0 , ( " Invalid file descriptor %d in kernel oplock break! \n " , ( int ) fd_pending ) ) ;
2000-06-10 18:29:31 +04:00
ret = False ;
goto out ;
}
dev = sbuf . st_dev ;
inode = sbuf . st_ino ;
2000-06-12 21:06:00 +04:00
DEBUG ( 3 , ( " receive_local_message: kernel oplock break request received for \
2000-06-10 18:29:31 +04:00
dev = % x , inode = % .0f \ n " , (unsigned int)dev, (double)inode ));
/*
* Create a kernel oplock break message .
*/
/* Setup the message header */
SIVAL ( buffer , OPBRK_CMD_LEN_OFFSET , KERNEL_OPLOCK_BREAK_MSG_LEN ) ;
SSVAL ( buffer , OPBRK_CMD_PORT_OFFSET , 0 ) ;
buffer + = OPBRK_CMD_HEADER_LEN ;
SSVAL ( buffer , OPBRK_MESSAGE_CMD_OFFSET , KERNEL_OPLOCK_BREAK_CMD ) ;
memcpy ( buffer + KERNEL_OPLOCK_BREAK_DEV_OFFSET , ( char * ) & dev , sizeof ( dev ) ) ;
memcpy ( buffer + KERNEL_OPLOCK_BREAK_INODE_OFFSET , ( char * ) & inode , sizeof ( inode ) ) ;
out :
/* now we can receive more signals */
2001-06-25 06:53:13 +04:00
fd_pending = ( sig_atomic_t ) - 1 ;
2000-06-10 18:29:31 +04:00
signals_processed + + ;
totally rewrote the async signal, notification and oplock notification
handling in Samba. This was needed due to several limitations and
races in the previous code - as a side effect the new code is much
cleaner :)
in summary:
- changed sys_select() to avoid a signal/select race condition. It is a
rare race but once we have signals doing notification and oplocks it
is important.
- changed our main processing loop to take advantage of the new
sys_select semantics
- split the notify code into implementaion dependent and general
parts. Added the following structure that defines an implementation:
struct cnotify_fns {
void * (*register_notify)(connection_struct *conn, char *path, uint32 flags);
BOOL (*check_notify)(connection_struct *conn, uint16 vuid, char *path, uint32 flags, void *data, time_t t);
void (*remove_notify)(void *data);
};
then I wrote two implementations, one using hash/poll (like our old
code) and the other using the new Linux kernel change notify. It
should be easy to add other change notify implementations by creating
a sructure of the above type.
- fixed a bug in change notify where we were returning the wrong error
code.
- rewrote the core change notify code to be much simpler
- moved to real-time signals for leases and change notify
Amazingly, it all seems to work. I was very surprised!
(This used to be commit 44766c39e0027c762bee8b33b12c621c109a3267)
2000-06-12 19:53:31 +04:00
BlockSignals ( False , RT_SIGNAL_LEASE ) ;
2000-06-10 18:29:31 +04:00
2001-04-27 22:03:12 +04:00
return ret ;
2000-06-10 18:29:31 +04:00
}
/****************************************************************************
Attempt to set an kernel oplock on a file .
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
static BOOL linux_set_kernel_oplock ( files_struct * fsp , int oplock_type )
{
2000-06-11 09:57:58 +04:00
if ( linux_setlease ( fsp - > fd , F_WRLCK ) = = - 1 ) {
2000-06-12 21:06:00 +04:00
DEBUG ( 3 , ( " set_file_oplock: Refused oplock on file %s, fd = %d, dev = %x, \
2000-06-11 09:57:58 +04:00
inode = % .0f . ( % s ) \ n " ,
fsp - > fsp_name , fsp - > fd ,
( unsigned int ) fsp - > dev , ( double ) fsp - > inode , strerror ( errno ) ) ) ;
2000-06-10 18:29:31 +04:00
return False ;
}
2000-06-12 21:06:00 +04:00
DEBUG ( 3 , ( " set_file_oplock: got kernel oplock on file %s, dev = %x, inode = %.0f \n " ,
2000-06-10 18:29:31 +04:00
fsp - > fsp_name , ( unsigned int ) fsp - > dev , ( double ) fsp - > inode ) ) ;
return True ;
}
/****************************************************************************
Release a kernel oplock on a file .
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
static void linux_release_kernel_oplock ( files_struct * fsp )
{
if ( DEBUGLVL ( 10 ) ) {
/*
* Check and print out the current kernel
* oplock state of this file .
*/
int state = fcntl ( fsp - > fd , F_GETLEASE , 0 ) ;
dbgtext ( " release_kernel_oplock: file %s, dev = %x, inode = %.0f has kernel \
oplock state of % x . \ n " , fsp->fsp_name, (unsigned int)fsp->dev,
( double ) fsp - > inode , state ) ;
}
/*
* Remove the kernel oplock on this file .
*/
2000-06-11 09:57:58 +04:00
if ( linux_setlease ( fsp - > fd , F_UNLCK ) = = - 1 ) {
2000-06-10 18:29:31 +04:00
if ( DEBUGLVL ( 0 ) ) {
dbgtext ( " release_kernel_oplock: Error when removing kernel oplock on file " ) ;
dbgtext ( " %s, dev = %x, inode = %.0f. Error was %s \n " ,
fsp - > fsp_name , ( unsigned int ) fsp - > dev ,
( double ) fsp - > inode , strerror ( errno ) ) ;
}
}
}
/****************************************************************************
parse a kernel oplock message
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
static BOOL linux_kernel_oplock_parse ( char * msg_start , int msg_len , SMB_INO_T * inode , SMB_DEV_T * dev )
{
/* Ensure that the msg length is correct. */
if ( msg_len ! = KERNEL_OPLOCK_BREAK_MSG_LEN ) {
totally rewrote the async signal, notification and oplock notification
handling in Samba. This was needed due to several limitations and
races in the previous code - as a side effect the new code is much
cleaner :)
in summary:
- changed sys_select() to avoid a signal/select race condition. It is a
rare race but once we have signals doing notification and oplocks it
is important.
- changed our main processing loop to take advantage of the new
sys_select semantics
- split the notify code into implementaion dependent and general
parts. Added the following structure that defines an implementation:
struct cnotify_fns {
void * (*register_notify)(connection_struct *conn, char *path, uint32 flags);
BOOL (*check_notify)(connection_struct *conn, uint16 vuid, char *path, uint32 flags, void *data, time_t t);
void (*remove_notify)(void *data);
};
then I wrote two implementations, one using hash/poll (like our old
code) and the other using the new Linux kernel change notify. It
should be easy to add other change notify implementations by creating
a sructure of the above type.
- fixed a bug in change notify where we were returning the wrong error
code.
- rewrote the core change notify code to be much simpler
- moved to real-time signals for leases and change notify
Amazingly, it all seems to work. I was very surprised!
(This used to be commit 44766c39e0027c762bee8b33b12c621c109a3267)
2000-06-12 19:53:31 +04:00
DEBUG ( 0 , ( " incorrect length for KERNEL_OPLOCK_BREAK_CMD (was %d, should be %d). \n " ,
msg_len , KERNEL_OPLOCK_BREAK_MSG_LEN ) ) ;
2000-06-10 18:29:31 +04:00
return False ;
}
memcpy ( ( char * ) inode , msg_start + KERNEL_OPLOCK_BREAK_INODE_OFFSET , sizeof ( * inode ) ) ;
memcpy ( ( char * ) dev , msg_start + KERNEL_OPLOCK_BREAK_DEV_OFFSET , sizeof ( * dev ) ) ;
2000-06-12 21:06:00 +04:00
DEBUG ( 3 , ( " kernel oplock break request for file dev = %x, inode = %.0f \n " ,
totally rewrote the async signal, notification and oplock notification
handling in Samba. This was needed due to several limitations and
races in the previous code - as a side effect the new code is much
cleaner :)
in summary:
- changed sys_select() to avoid a signal/select race condition. It is a
rare race but once we have signals doing notification and oplocks it
is important.
- changed our main processing loop to take advantage of the new
sys_select semantics
- split the notify code into implementaion dependent and general
parts. Added the following structure that defines an implementation:
struct cnotify_fns {
void * (*register_notify)(connection_struct *conn, char *path, uint32 flags);
BOOL (*check_notify)(connection_struct *conn, uint16 vuid, char *path, uint32 flags, void *data, time_t t);
void (*remove_notify)(void *data);
};
then I wrote two implementations, one using hash/poll (like our old
code) and the other using the new Linux kernel change notify. It
should be easy to add other change notify implementations by creating
a sructure of the above type.
- fixed a bug in change notify where we were returning the wrong error
code.
- rewrote the core change notify code to be much simpler
- moved to real-time signals for leases and change notify
Amazingly, it all seems to work. I was very surprised!
(This used to be commit 44766c39e0027c762bee8b33b12c621c109a3267)
2000-06-12 19:53:31 +04:00
( unsigned int ) * dev , ( double ) * inode ) ) ;
2000-06-10 18:29:31 +04:00
return True ;
}
/****************************************************************************
see if a oplock message is waiting
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
static BOOL linux_oplock_msg_waiting ( fd_set * fds )
{
return signals_processed ! = signals_received ;
}
2000-06-11 10:46:05 +04:00
/****************************************************************************
see if the kernel supports oplocks
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
static BOOL linux_oplocks_available ( void )
{
int fd , ret ;
fd = open ( " /dev/null " , O_RDONLY ) ;
if ( fd = = - 1 ) return False ; /* uggh! */
ret = fcntl ( fd , F_GETLEASE , 0 ) ;
close ( fd ) ;
return ret = = F_UNLCK ;
}
2000-06-10 18:29:31 +04:00
/****************************************************************************
setup kernel oplocks
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
struct kernel_oplocks * linux_init_kernel_oplocks ( void )
{
static struct kernel_oplocks koplocks ;
struct sigaction act ;
2000-06-11 10:46:05 +04:00
if ( ! linux_oplocks_available ( ) ) {
DEBUG ( 3 , ( " Linux kernel oplocks not available \n " ) ) ;
return NULL ;
}
2000-06-10 18:29:31 +04:00
act . sa_handler = NULL ;
totally rewrote the async signal, notification and oplock notification
handling in Samba. This was needed due to several limitations and
races in the previous code - as a side effect the new code is much
cleaner :)
in summary:
- changed sys_select() to avoid a signal/select race condition. It is a
rare race but once we have signals doing notification and oplocks it
is important.
- changed our main processing loop to take advantage of the new
sys_select semantics
- split the notify code into implementaion dependent and general
parts. Added the following structure that defines an implementation:
struct cnotify_fns {
void * (*register_notify)(connection_struct *conn, char *path, uint32 flags);
BOOL (*check_notify)(connection_struct *conn, uint16 vuid, char *path, uint32 flags, void *data, time_t t);
void (*remove_notify)(void *data);
};
then I wrote two implementations, one using hash/poll (like our old
code) and the other using the new Linux kernel change notify. It
should be easy to add other change notify implementations by creating
a sructure of the above type.
- fixed a bug in change notify where we were returning the wrong error
code.
- rewrote the core change notify code to be much simpler
- moved to real-time signals for leases and change notify
Amazingly, it all seems to work. I was very surprised!
(This used to be commit 44766c39e0027c762bee8b33b12c621c109a3267)
2000-06-12 19:53:31 +04:00
act . sa_sigaction = signal_handler ;
2000-06-10 18:29:31 +04:00
act . sa_flags = SA_SIGINFO ;
totally rewrote the async signal, notification and oplock notification
handling in Samba. This was needed due to several limitations and
races in the previous code - as a side effect the new code is much
cleaner :)
in summary:
- changed sys_select() to avoid a signal/select race condition. It is a
rare race but once we have signals doing notification and oplocks it
is important.
- changed our main processing loop to take advantage of the new
sys_select semantics
- split the notify code into implementaion dependent and general
parts. Added the following structure that defines an implementation:
struct cnotify_fns {
void * (*register_notify)(connection_struct *conn, char *path, uint32 flags);
BOOL (*check_notify)(connection_struct *conn, uint16 vuid, char *path, uint32 flags, void *data, time_t t);
void (*remove_notify)(void *data);
};
then I wrote two implementations, one using hash/poll (like our old
code) and the other using the new Linux kernel change notify. It
should be easy to add other change notify implementations by creating
a sructure of the above type.
- fixed a bug in change notify where we were returning the wrong error
code.
- rewrote the core change notify code to be much simpler
- moved to real-time signals for leases and change notify
Amazingly, it all seems to work. I was very surprised!
(This used to be commit 44766c39e0027c762bee8b33b12c621c109a3267)
2000-06-12 19:53:31 +04:00
if ( sigaction ( RT_SIGNAL_LEASE , & act , NULL ) ! = 0 ) {
DEBUG ( 0 , ( " Failed to setup RT_SIGNAL_LEASE handler \n " ) ) ;
2000-06-10 18:29:31 +04:00
return NULL ;
}
koplocks . receive_message = linux_oplock_receive_message ;
koplocks . set_oplock = linux_set_kernel_oplock ;
koplocks . release_oplock = linux_release_kernel_oplock ;
koplocks . parse_message = linux_kernel_oplock_parse ;
koplocks . msg_waiting = linux_oplock_msg_waiting ;
koplocks . notification_fd = - 1 ;
2000-06-12 21:06:00 +04:00
DEBUG ( 3 , ( " Linux kernel oplocks enabled \n " ) ) ;
2000-06-10 18:29:31 +04:00
return & koplocks ;
}
# else
void oplock_linux_dummy ( void ) { }
# endif /* HAVE_KERNEL_OPLOCKS_LINUX */