2000-06-10 14:29:31 +00:00
/*
2002-01-30 06:08:46 +00:00
Unix SMB / CIFS implementation .
2000-06-10 14:29:31 +00:00
kernel oplock processing for Linux
Copyright ( C ) Andrew Tridgell 2000
This program is free software ; you can redistribute it and / or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation ; either version 2 of the License , or
( at your option ) any later version .
This program is distributed in the hope that it will be useful ,
but WITHOUT ANY WARRANTY ; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
GNU General Public License for more details .
You should have received a copy of the GNU General Public License
along with this program ; if not , write to the Free Software
Foundation , Inc . , 675 Mass Ave , Cambridge , MA 0213 9 , USA .
*/
# include "includes.h"
2000-06-11 05:57:58 +00:00
# if HAVE_KERNEL_OPLOCKS_LINUX
2001-06-25 02:53:13 +00:00
static VOLATILE sig_atomic_t signals_received ;
static VOLATILE sig_atomic_t signals_processed ;
static VOLATILE sig_atomic_t fd_pending ; /* the fd of the current pending signal */
2000-06-10 14:29:31 +00:00
2000-06-11 06:24:54 +00:00
# ifndef F_SETLEASE
# define F_SETLEASE 1024
# endif
# ifndef F_GETLEASE
# define F_GETLEASE 1025
# endif
# ifndef CAP_LEASE
# define CAP_LEASE 28
# endif
2000-06-11 05:57:58 +00:00
totally rewrote the async signal, notification and oplock notification
handling in Samba. This was needed due to several limitations and
races in the previous code - as a side effect the new code is much
cleaner :)
in summary:
- changed sys_select() to avoid a signal/select race condition. It is a
rare race but once we have signals doing notification and oplocks it
is important.
- changed our main processing loop to take advantage of the new
sys_select semantics
- split the notify code into implementaion dependent and general
parts. Added the following structure that defines an implementation:
struct cnotify_fns {
void * (*register_notify)(connection_struct *conn, char *path, uint32 flags);
BOOL (*check_notify)(connection_struct *conn, uint16 vuid, char *path, uint32 flags, void *data, time_t t);
void (*remove_notify)(void *data);
};
then I wrote two implementations, one using hash/poll (like our old
code) and the other using the new Linux kernel change notify. It
should be easy to add other change notify implementations by creating
a sructure of the above type.
- fixed a bug in change notify where we were returning the wrong error
code.
- rewrote the core change notify code to be much simpler
- moved to real-time signals for leases and change notify
Amazingly, it all seems to work. I was very surprised!
(This used to be commit 44766c39e0027c762bee8b33b12c621c109a3267)
2000-06-12 15:53:31 +00:00
# ifndef RT_SIGNAL_LEASE
# define RT_SIGNAL_LEASE 33
# endif
2000-06-12 17:06:00 +00:00
# ifndef F_SETSIG
# define F_SETSIG 10
# endif
2000-06-10 14:29:31 +00:00
/****************************************************************************
2001-10-20 21:59:34 +00:00
Handle a LEASE signal , incrementing the signals_received and blocking the signal .
2000-06-10 14:29:31 +00:00
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2001-10-20 21:59:34 +00:00
2001-05-08 06:14:08 +00:00
static void signal_handler ( int sig , siginfo_t * info , void * unused )
2000-06-10 14:29:31 +00:00
{
2001-05-08 06:14:08 +00:00
BlockSignals ( True , sig ) ;
2001-06-25 02:53:13 +00:00
fd_pending = ( sig_atomic_t ) info - > si_fd ;
2000-06-10 14:29:31 +00:00
signals_received + + ;
totally rewrote the async signal, notification and oplock notification
handling in Samba. This was needed due to several limitations and
races in the previous code - as a side effect the new code is much
cleaner :)
in summary:
- changed sys_select() to avoid a signal/select race condition. It is a
rare race but once we have signals doing notification and oplocks it
is important.
- changed our main processing loop to take advantage of the new
sys_select semantics
- split the notify code into implementaion dependent and general
parts. Added the following structure that defines an implementation:
struct cnotify_fns {
void * (*register_notify)(connection_struct *conn, char *path, uint32 flags);
BOOL (*check_notify)(connection_struct *conn, uint16 vuid, char *path, uint32 flags, void *data, time_t t);
void (*remove_notify)(void *data);
};
then I wrote two implementations, one using hash/poll (like our old
code) and the other using the new Linux kernel change notify. It
should be easy to add other change notify implementations by creating
a sructure of the above type.
- fixed a bug in change notify where we were returning the wrong error
code.
- rewrote the core change notify code to be much simpler
- moved to real-time signals for leases and change notify
Amazingly, it all seems to work. I was very surprised!
(This used to be commit 44766c39e0027c762bee8b33b12c621c109a3267)
2000-06-12 15:53:31 +00:00
sys_select_signal ( ) ;
2000-06-10 14:29:31 +00:00
}
2000-06-11 05:57:58 +00:00
/****************************************************************************
2001-10-20 21:59:34 +00:00
Try to gain a linux capability .
2001-09-08 02:59:23 +00:00
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2001-10-20 21:59:34 +00:00
2001-09-08 02:59:23 +00:00
static void set_capability ( unsigned capability )
2000-06-11 05:57:58 +00:00
{
2000-06-11 06:24:54 +00:00
# ifndef _LINUX_CAPABILITY_VERSION
# define _LINUX_CAPABILITY_VERSION 0x19980330
# endif
/* these can be removed when they are in glibc headers */
struct {
uint32 version ;
int pid ;
} header ;
struct {
uint32 effective ;
uint32 permitted ;
uint32 inheritable ;
} data ;
header . version = _LINUX_CAPABILITY_VERSION ;
header . pid = 0 ;
if ( capget ( & header , & data ) = = - 1 ) {
DEBUG ( 3 , ( " Unable to get kernel capabilities (%s) \n " , strerror ( errno ) ) ) ;
2000-06-11 05:57:58 +00:00
return ;
}
2000-06-11 06:24:54 +00:00
data . effective | = ( 1 < < capability ) ;
if ( capset ( & header , & data ) = = - 1 ) {
DEBUG ( 3 , ( " Unable to set %d capability (%s) \n " ,
capability , strerror ( errno ) ) ) ;
2000-06-11 05:57:58 +00:00
}
}
/****************************************************************************
2001-10-20 21:59:34 +00:00
Call SETLEASE . If we get EACCES then we try setting up the right capability and
try again
2000-06-11 05:57:58 +00:00
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2001-10-20 21:59:34 +00:00
2000-06-11 05:57:58 +00:00
static int linux_setlease ( int fd , int leasetype )
{
int ret ;
2000-06-12 17:06:00 +00:00
if ( fcntl ( fd , F_SETSIG , RT_SIGNAL_LEASE ) = = - 1 ) {
DEBUG ( 3 , ( " Failed to set signal handler for kernel lease \n " ) ) ;
return - 1 ;
}
2000-06-11 05:57:58 +00:00
ret = fcntl ( fd , F_SETLEASE , leasetype ) ;
if ( ret = = - 1 & & errno = = EACCES ) {
2000-06-11 06:24:54 +00:00
set_capability ( CAP_LEASE ) ;
2000-06-11 05:57:58 +00:00
ret = fcntl ( fd , F_SETLEASE , leasetype ) ;
}
return ret ;
}
2000-06-10 14:29:31 +00:00
/****************************************************************************
* Deal with the Linux kernel < - - > smbd
* oplock break protocol .
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2001-10-20 21:59:34 +00:00
2000-06-10 14:29:31 +00:00
static BOOL linux_oplock_receive_message ( fd_set * fds , char * buffer , int buffer_len )
{
2001-04-27 18:03:12 +00:00
BOOL ret = True ;
2001-10-20 21:59:34 +00:00
struct files_struct * fsp ;
2000-06-10 14:29:31 +00:00
2001-10-20 21:59:34 +00:00
if ( signals_received = = signals_processed )
return False ;
2000-06-10 14:29:31 +00:00
2001-10-20 21:59:34 +00:00
if ( ( fsp = file_find_fd ( fd_pending ) ) = = NULL ) {
2000-06-19 21:30:27 +00:00
DEBUG ( 0 , ( " Invalid file descriptor %d in kernel oplock break! \n " , ( int ) fd_pending ) ) ;
2000-06-10 14:29:31 +00:00
ret = False ;
goto out ;
}
2000-06-12 17:06:00 +00:00
DEBUG ( 3 , ( " receive_local_message: kernel oplock break request received for \
2001-10-20 21:59:34 +00:00
dev = % x , inode = % .0f \ n " , (unsigned int)fsp->dev, (double)fsp->inode ));
2000-06-10 14:29:31 +00:00
/*
* Create a kernel oplock break message .
*/
/* Setup the message header */
SIVAL ( buffer , OPBRK_CMD_LEN_OFFSET , KERNEL_OPLOCK_BREAK_MSG_LEN ) ;
SSVAL ( buffer , OPBRK_CMD_PORT_OFFSET , 0 ) ;
buffer + = OPBRK_CMD_HEADER_LEN ;
SSVAL ( buffer , OPBRK_MESSAGE_CMD_OFFSET , KERNEL_OPLOCK_BREAK_CMD ) ;
2001-10-20 21:59:34 +00:00
memcpy ( buffer + KERNEL_OPLOCK_BREAK_DEV_OFFSET , ( char * ) & fsp - > dev , sizeof ( fsp - > dev ) ) ;
memcpy ( buffer + KERNEL_OPLOCK_BREAK_INODE_OFFSET , ( char * ) & fsp - > inode , sizeof ( fsp - > inode ) ) ;
memcpy ( buffer + KERNEL_OPLOCK_BREAK_FILEID_OFFSET , ( char * ) & fsp - > file_id , sizeof ( fsp - > file_id ) ) ;
2000-06-10 14:29:31 +00:00
out :
/* now we can receive more signals */
2001-06-25 02:53:13 +00:00
fd_pending = ( sig_atomic_t ) - 1 ;
2000-06-10 14:29:31 +00:00
signals_processed + + ;
totally rewrote the async signal, notification and oplock notification
handling in Samba. This was needed due to several limitations and
races in the previous code - as a side effect the new code is much
cleaner :)
in summary:
- changed sys_select() to avoid a signal/select race condition. It is a
rare race but once we have signals doing notification and oplocks it
is important.
- changed our main processing loop to take advantage of the new
sys_select semantics
- split the notify code into implementaion dependent and general
parts. Added the following structure that defines an implementation:
struct cnotify_fns {
void * (*register_notify)(connection_struct *conn, char *path, uint32 flags);
BOOL (*check_notify)(connection_struct *conn, uint16 vuid, char *path, uint32 flags, void *data, time_t t);
void (*remove_notify)(void *data);
};
then I wrote two implementations, one using hash/poll (like our old
code) and the other using the new Linux kernel change notify. It
should be easy to add other change notify implementations by creating
a sructure of the above type.
- fixed a bug in change notify where we were returning the wrong error
code.
- rewrote the core change notify code to be much simpler
- moved to real-time signals for leases and change notify
Amazingly, it all seems to work. I was very surprised!
(This used to be commit 44766c39e0027c762bee8b33b12c621c109a3267)
2000-06-12 15:53:31 +00:00
BlockSignals ( False , RT_SIGNAL_LEASE ) ;
2000-06-10 14:29:31 +00:00
2001-04-27 18:03:12 +00:00
return ret ;
2000-06-10 14:29:31 +00:00
}
/****************************************************************************
Attempt to set an kernel oplock on a file .
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2001-10-20 21:59:34 +00:00
2000-06-10 14:29:31 +00:00
static BOOL linux_set_kernel_oplock ( files_struct * fsp , int oplock_type )
{
2000-06-11 05:57:58 +00:00
if ( linux_setlease ( fsp - > fd , F_WRLCK ) = = - 1 ) {
2000-06-12 17:06:00 +00:00
DEBUG ( 3 , ( " set_file_oplock: Refused oplock on file %s, fd = %d, dev = %x, \
2000-06-11 05:57:58 +00:00
inode = % .0f . ( % s ) \ n " ,
fsp - > fsp_name , fsp - > fd ,
( unsigned int ) fsp - > dev , ( double ) fsp - > inode , strerror ( errno ) ) ) ;
2000-06-10 14:29:31 +00:00
return False ;
}
2001-11-06 22:07:04 +00:00
DEBUG ( 3 , ( " set_file_oplock: got kernel oplock on file %s, dev = %x, inode = %.0f, file_id = %lu \n " ,
2001-10-20 21:59:34 +00:00
fsp - > fsp_name , ( unsigned int ) fsp - > dev , ( double ) fsp - > inode , fsp - > file_id ) ) ;
2000-06-10 14:29:31 +00:00
return True ;
}
/****************************************************************************
Release a kernel oplock on a file .
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2001-10-20 21:59:34 +00:00
2000-06-10 14:29:31 +00:00
static void linux_release_kernel_oplock ( files_struct * fsp )
{
if ( DEBUGLVL ( 10 ) ) {
/*
* Check and print out the current kernel
* oplock state of this file .
*/
int state = fcntl ( fsp - > fd , F_GETLEASE , 0 ) ;
2001-11-06 22:07:04 +00:00
dbgtext ( " release_kernel_oplock: file %s, dev = %x, inode = %.0f file_id = %lu has kernel \
2000-06-10 14:29:31 +00:00
oplock state of % x . \ n " , fsp->fsp_name, (unsigned int)fsp->dev,
2001-10-20 21:59:34 +00:00
( double ) fsp - > inode , fsp - > file_id , state ) ;
2000-06-10 14:29:31 +00:00
}
/*
* Remove the kernel oplock on this file .
*/
2000-06-11 05:57:58 +00:00
if ( linux_setlease ( fsp - > fd , F_UNLCK ) = = - 1 ) {
2000-06-10 14:29:31 +00:00
if ( DEBUGLVL ( 0 ) ) {
dbgtext ( " release_kernel_oplock: Error when removing kernel oplock on file " ) ;
2001-11-06 22:07:04 +00:00
dbgtext ( " %s, dev = %x, inode = %.0f, file_id = %lu. Error was %s \n " ,
2000-06-10 14:29:31 +00:00
fsp - > fsp_name , ( unsigned int ) fsp - > dev ,
2001-10-20 21:59:34 +00:00
( double ) fsp - > inode , fsp - > file_id , strerror ( errno ) ) ;
2000-06-10 14:29:31 +00:00
}
}
}
/****************************************************************************
2001-10-20 21:59:34 +00:00
Parse a kernel oplock message .
2000-06-10 14:29:31 +00:00
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2001-10-20 21:59:34 +00:00
static BOOL linux_kernel_oplock_parse ( char * msg_start , int msg_len , SMB_INO_T * inode ,
SMB_DEV_T * dev , unsigned long * file_id )
2000-06-10 14:29:31 +00:00
{
/* Ensure that the msg length is correct. */
if ( msg_len ! = KERNEL_OPLOCK_BREAK_MSG_LEN ) {
totally rewrote the async signal, notification and oplock notification
handling in Samba. This was needed due to several limitations and
races in the previous code - as a side effect the new code is much
cleaner :)
in summary:
- changed sys_select() to avoid a signal/select race condition. It is a
rare race but once we have signals doing notification and oplocks it
is important.
- changed our main processing loop to take advantage of the new
sys_select semantics
- split the notify code into implementaion dependent and general
parts. Added the following structure that defines an implementation:
struct cnotify_fns {
void * (*register_notify)(connection_struct *conn, char *path, uint32 flags);
BOOL (*check_notify)(connection_struct *conn, uint16 vuid, char *path, uint32 flags, void *data, time_t t);
void (*remove_notify)(void *data);
};
then I wrote two implementations, one using hash/poll (like our old
code) and the other using the new Linux kernel change notify. It
should be easy to add other change notify implementations by creating
a sructure of the above type.
- fixed a bug in change notify where we were returning the wrong error
code.
- rewrote the core change notify code to be much simpler
- moved to real-time signals for leases and change notify
Amazingly, it all seems to work. I was very surprised!
(This used to be commit 44766c39e0027c762bee8b33b12c621c109a3267)
2000-06-12 15:53:31 +00:00
DEBUG ( 0 , ( " incorrect length for KERNEL_OPLOCK_BREAK_CMD (was %d, should be %d). \n " ,
msg_len , KERNEL_OPLOCK_BREAK_MSG_LEN ) ) ;
2000-06-10 14:29:31 +00:00
return False ;
}
2001-10-20 21:59:34 +00:00
memcpy ( ( char * ) inode , msg_start + KERNEL_OPLOCK_BREAK_INODE_OFFSET , sizeof ( * inode ) ) ;
memcpy ( ( char * ) dev , msg_start + KERNEL_OPLOCK_BREAK_DEV_OFFSET , sizeof ( * dev ) ) ;
memcpy ( ( char * ) file_id , msg_start + KERNEL_OPLOCK_BREAK_FILEID_OFFSET , sizeof ( * file_id ) ) ;
2000-06-10 14:29:31 +00:00
2001-11-06 22:07:04 +00:00
DEBUG ( 3 , ( " kernel oplock break request for file dev = %x, inode = %.0f, file_id = %lu \n " ,
2001-10-20 21:59:34 +00:00
( unsigned int ) * dev , ( double ) * inode , * file_id ) ) ;
2000-06-10 14:29:31 +00:00
return True ;
}
/****************************************************************************
2001-10-20 21:59:34 +00:00
See if a oplock message is waiting .
2000-06-10 14:29:31 +00:00
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2001-10-20 21:59:34 +00:00
2000-06-10 14:29:31 +00:00
static BOOL linux_oplock_msg_waiting ( fd_set * fds )
{
return signals_processed ! = signals_received ;
}
2000-06-11 06:46:05 +00:00
/****************************************************************************
2001-10-20 21:59:34 +00:00
See if the kernel supports oplocks .
2000-06-11 06:46:05 +00:00
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2001-10-20 21:59:34 +00:00
2000-06-11 06:46:05 +00:00
static BOOL linux_oplocks_available ( void )
{
int fd , ret ;
fd = open ( " /dev/null " , O_RDONLY ) ;
2001-10-20 21:59:34 +00:00
if ( fd = = - 1 )
return False ; /* uggh! */
2000-06-11 06:46:05 +00:00
ret = fcntl ( fd , F_GETLEASE , 0 ) ;
close ( fd ) ;
return ret = = F_UNLCK ;
}
2000-06-10 14:29:31 +00:00
/****************************************************************************
2001-10-20 21:59:34 +00:00
Setup kernel oplocks .
2000-06-10 14:29:31 +00:00
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2001-10-20 21:59:34 +00:00
2000-06-10 14:29:31 +00:00
struct kernel_oplocks * linux_init_kernel_oplocks ( void )
{
static struct kernel_oplocks koplocks ;
struct sigaction act ;
2000-06-11 06:46:05 +00:00
if ( ! linux_oplocks_available ( ) ) {
DEBUG ( 3 , ( " Linux kernel oplocks not available \n " ) ) ;
return NULL ;
}
2001-10-20 21:59:34 +00:00
act . sa_handler = NULL ;
act . sa_sigaction = signal_handler ;
act . sa_flags = SA_SIGINFO ;
if ( sigaction ( RT_SIGNAL_LEASE , & act , NULL ) ! = 0 ) {
totally rewrote the async signal, notification and oplock notification
handling in Samba. This was needed due to several limitations and
races in the previous code - as a side effect the new code is much
cleaner :)
in summary:
- changed sys_select() to avoid a signal/select race condition. It is a
rare race but once we have signals doing notification and oplocks it
is important.
- changed our main processing loop to take advantage of the new
sys_select semantics
- split the notify code into implementaion dependent and general
parts. Added the following structure that defines an implementation:
struct cnotify_fns {
void * (*register_notify)(connection_struct *conn, char *path, uint32 flags);
BOOL (*check_notify)(connection_struct *conn, uint16 vuid, char *path, uint32 flags, void *data, time_t t);
void (*remove_notify)(void *data);
};
then I wrote two implementations, one using hash/poll (like our old
code) and the other using the new Linux kernel change notify. It
should be easy to add other change notify implementations by creating
a sructure of the above type.
- fixed a bug in change notify where we were returning the wrong error
code.
- rewrote the core change notify code to be much simpler
- moved to real-time signals for leases and change notify
Amazingly, it all seems to work. I was very surprised!
(This used to be commit 44766c39e0027c762bee8b33b12c621c109a3267)
2000-06-12 15:53:31 +00:00
DEBUG ( 0 , ( " Failed to setup RT_SIGNAL_LEASE handler \n " ) ) ;
2000-06-10 14:29:31 +00:00
return NULL ;
2001-10-20 21:59:34 +00:00
}
2000-06-10 14:29:31 +00:00
koplocks . receive_message = linux_oplock_receive_message ;
koplocks . set_oplock = linux_set_kernel_oplock ;
koplocks . release_oplock = linux_release_kernel_oplock ;
koplocks . parse_message = linux_kernel_oplock_parse ;
koplocks . msg_waiting = linux_oplock_msg_waiting ;
koplocks . notification_fd = - 1 ;
2000-06-12 17:06:00 +00:00
DEBUG ( 3 , ( " Linux kernel oplocks enabled \n " ) ) ;
2000-06-10 14:29:31 +00:00
return & koplocks ;
}
# else
void oplock_linux_dummy ( void ) { }
# endif /* HAVE_KERNEL_OPLOCKS_LINUX */