2000-06-10 18:29:31 +04:00
/*
2002-01-30 09:08:46 +03:00
Unix SMB / CIFS implementation .
2000-06-10 18:29:31 +04:00
kernel oplock processing for Linux
Copyright ( C ) Andrew Tridgell 2000
This program is free software ; you can redistribute it and / or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation ; either version 2 of the License , or
( at your option ) any later version .
This program is distributed in the hope that it will be useful ,
but WITHOUT ANY WARRANTY ; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
GNU General Public License for more details .
You should have received a copy of the GNU General Public License
along with this program ; if not , write to the Free Software
Foundation , Inc . , 675 Mass Ave , Cambridge , MA 0213 9 , USA .
*/
# include "includes.h"
2000-06-11 09:57:58 +04:00
# if HAVE_KERNEL_OPLOCKS_LINUX
2002-07-15 14:35:28 +04:00
static SIG_ATOMIC_T signals_received ;
# define FD_PENDING_SIZE 100
static SIG_ATOMIC_T fd_pending_array [ FD_PENDING_SIZE ] ;
2000-06-10 18:29:31 +04:00
2000-06-11 10:24:54 +04:00
# ifndef F_SETLEASE
# define F_SETLEASE 1024
# endif
# ifndef F_GETLEASE
# define F_GETLEASE 1025
# endif
# ifndef CAP_LEASE
# define CAP_LEASE 28
# endif
2000-06-11 09:57:58 +04:00
totally rewrote the async signal, notification and oplock notification
handling in Samba. This was needed due to several limitations and
races in the previous code - as a side effect the new code is much
cleaner :)
in summary:
- changed sys_select() to avoid a signal/select race condition. It is a
rare race but once we have signals doing notification and oplocks it
is important.
- changed our main processing loop to take advantage of the new
sys_select semantics
- split the notify code into implementaion dependent and general
parts. Added the following structure that defines an implementation:
struct cnotify_fns {
void * (*register_notify)(connection_struct *conn, char *path, uint32 flags);
BOOL (*check_notify)(connection_struct *conn, uint16 vuid, char *path, uint32 flags, void *data, time_t t);
void (*remove_notify)(void *data);
};
then I wrote two implementations, one using hash/poll (like our old
code) and the other using the new Linux kernel change notify. It
should be easy to add other change notify implementations by creating
a sructure of the above type.
- fixed a bug in change notify where we were returning the wrong error
code.
- rewrote the core change notify code to be much simpler
- moved to real-time signals for leases and change notify
Amazingly, it all seems to work. I was very surprised!
(This used to be commit 44766c39e0027c762bee8b33b12c621c109a3267)
2000-06-12 19:53:31 +04:00
# ifndef RT_SIGNAL_LEASE
2003-03-28 04:07:05 +03:00
# define RT_SIGNAL_LEASE (SIGRTMIN+1)
totally rewrote the async signal, notification and oplock notification
handling in Samba. This was needed due to several limitations and
races in the previous code - as a side effect the new code is much
cleaner :)
in summary:
- changed sys_select() to avoid a signal/select race condition. It is a
rare race but once we have signals doing notification and oplocks it
is important.
- changed our main processing loop to take advantage of the new
sys_select semantics
- split the notify code into implementaion dependent and general
parts. Added the following structure that defines an implementation:
struct cnotify_fns {
void * (*register_notify)(connection_struct *conn, char *path, uint32 flags);
BOOL (*check_notify)(connection_struct *conn, uint16 vuid, char *path, uint32 flags, void *data, time_t t);
void (*remove_notify)(void *data);
};
then I wrote two implementations, one using hash/poll (like our old
code) and the other using the new Linux kernel change notify. It
should be easy to add other change notify implementations by creating
a sructure of the above type.
- fixed a bug in change notify where we were returning the wrong error
code.
- rewrote the core change notify code to be much simpler
- moved to real-time signals for leases and change notify
Amazingly, it all seems to work. I was very surprised!
(This used to be commit 44766c39e0027c762bee8b33b12c621c109a3267)
2000-06-12 19:53:31 +04:00
# endif
2000-06-12 21:06:00 +04:00
# ifndef F_SETSIG
# define F_SETSIG 10
# endif
2000-06-10 18:29:31 +04:00
/****************************************************************************
2001-10-21 01:59:34 +04:00
Handle a LEASE signal , incrementing the signals_received and blocking the signal .
2000-06-10 18:29:31 +04:00
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2001-10-21 01:59:34 +04:00
2001-05-08 10:14:08 +04:00
static void signal_handler ( int sig , siginfo_t * info , void * unused )
2000-06-10 18:29:31 +04:00
{
2002-07-15 14:35:28 +04:00
if ( signals_received < FD_PENDING_SIZE - 1 ) {
fd_pending_array [ signals_received ] = ( SIG_ATOMIC_T ) info - > si_fd ;
signals_received + + ;
} /* Else signal is lost. */
totally rewrote the async signal, notification and oplock notification
handling in Samba. This was needed due to several limitations and
races in the previous code - as a side effect the new code is much
cleaner :)
in summary:
- changed sys_select() to avoid a signal/select race condition. It is a
rare race but once we have signals doing notification and oplocks it
is important.
- changed our main processing loop to take advantage of the new
sys_select semantics
- split the notify code into implementaion dependent and general
parts. Added the following structure that defines an implementation:
struct cnotify_fns {
void * (*register_notify)(connection_struct *conn, char *path, uint32 flags);
BOOL (*check_notify)(connection_struct *conn, uint16 vuid, char *path, uint32 flags, void *data, time_t t);
void (*remove_notify)(void *data);
};
then I wrote two implementations, one using hash/poll (like our old
code) and the other using the new Linux kernel change notify. It
should be easy to add other change notify implementations by creating
a sructure of the above type.
- fixed a bug in change notify where we were returning the wrong error
code.
- rewrote the core change notify code to be much simpler
- moved to real-time signals for leases and change notify
Amazingly, it all seems to work. I was very surprised!
(This used to be commit 44766c39e0027c762bee8b33b12c621c109a3267)
2000-06-12 19:53:31 +04:00
sys_select_signal ( ) ;
2000-06-10 18:29:31 +04:00
}
2000-06-11 09:57:58 +04:00
/****************************************************************************
2001-10-21 01:59:34 +04:00
Try to gain a linux capability .
2001-09-08 06:59:23 +04:00
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2001-10-21 01:59:34 +04:00
2001-09-08 06:59:23 +04:00
static void set_capability ( unsigned capability )
2000-06-11 09:57:58 +04:00
{
2000-06-11 10:24:54 +04:00
# ifndef _LINUX_CAPABILITY_VERSION
# define _LINUX_CAPABILITY_VERSION 0x19980330
# endif
/* these can be removed when they are in glibc headers */
struct {
uint32 version ;
int pid ;
} header ;
struct {
uint32 effective ;
uint32 permitted ;
uint32 inheritable ;
} data ;
header . version = _LINUX_CAPABILITY_VERSION ;
header . pid = 0 ;
if ( capget ( & header , & data ) = = - 1 ) {
DEBUG ( 3 , ( " Unable to get kernel capabilities (%s) \n " , strerror ( errno ) ) ) ;
2000-06-11 09:57:58 +04:00
return ;
}
2000-06-11 10:24:54 +04:00
data . effective | = ( 1 < < capability ) ;
if ( capset ( & header , & data ) = = - 1 ) {
DEBUG ( 3 , ( " Unable to set %d capability (%s) \n " ,
capability , strerror ( errno ) ) ) ;
2000-06-11 09:57:58 +04:00
}
}
/****************************************************************************
2001-10-21 01:59:34 +04:00
Call SETLEASE . If we get EACCES then we try setting up the right capability and
try again
2000-06-11 09:57:58 +04:00
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2001-10-21 01:59:34 +04:00
2000-06-11 09:57:58 +04:00
static int linux_setlease ( int fd , int leasetype )
{
int ret ;
2000-06-12 21:06:00 +04:00
if ( fcntl ( fd , F_SETSIG , RT_SIGNAL_LEASE ) = = - 1 ) {
DEBUG ( 3 , ( " Failed to set signal handler for kernel lease \n " ) ) ;
return - 1 ;
}
2000-06-11 09:57:58 +04:00
ret = fcntl ( fd , F_SETLEASE , leasetype ) ;
if ( ret = = - 1 & & errno = = EACCES ) {
2000-06-11 10:24:54 +04:00
set_capability ( CAP_LEASE ) ;
2000-06-11 09:57:58 +04:00
ret = fcntl ( fd , F_SETLEASE , leasetype ) ;
}
return ret ;
}
2000-06-10 18:29:31 +04:00
/****************************************************************************
* Deal with the Linux kernel < - - > smbd
* oplock break protocol .
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2001-10-21 01:59:34 +04:00
2000-06-10 18:29:31 +04:00
static BOOL linux_oplock_receive_message ( fd_set * fds , char * buffer , int buffer_len )
{
2002-07-15 14:35:28 +04:00
int fd ;
2001-10-21 01:59:34 +04:00
struct files_struct * fsp ;
2000-06-10 18:29:31 +04:00
2002-07-15 14:35:28 +04:00
BlockSignals ( True , RT_SIGNAL_LEASE ) ;
fd = fd_pending_array [ 0 ] ;
fsp = file_find_fd ( fd ) ;
fd_pending_array [ 0 ] = ( SIG_ATOMIC_T ) - 1 ;
if ( signals_received > 1 )
memmove ( ( void * ) & fd_pending_array [ 0 ] , ( void * ) & fd_pending_array [ 1 ] ,
sizeof ( SIG_ATOMIC_T ) * ( signals_received - 1 ) ) ;
signals_received - - ;
/* now we can receive more signals */
BlockSignals ( False , RT_SIGNAL_LEASE ) ;
2000-06-10 18:29:31 +04:00
2002-07-15 14:35:28 +04:00
if ( fsp = = NULL ) {
DEBUG ( 0 , ( " Invalid file descriptor %d in kernel oplock break! \n " , ( int ) fd ) ) ;
return False ;
2000-06-10 18:29:31 +04:00
}
2002-07-15 14:35:28 +04:00
DEBUG ( 3 , ( " linux_oplock_receive_message: kernel oplock break request received for \
dev = % x , inode = % .0f fd = % d , fileid = % lu \ n " , (unsigned int)fsp->dev, (double)fsp->inode,
fd , fsp - > file_id ) ) ;
2000-06-10 18:29:31 +04:00
/*
* Create a kernel oplock break message .
*/
/* Setup the message header */
SIVAL ( buffer , OPBRK_CMD_LEN_OFFSET , KERNEL_OPLOCK_BREAK_MSG_LEN ) ;
SSVAL ( buffer , OPBRK_CMD_PORT_OFFSET , 0 ) ;
buffer + = OPBRK_CMD_HEADER_LEN ;
SSVAL ( buffer , OPBRK_MESSAGE_CMD_OFFSET , KERNEL_OPLOCK_BREAK_CMD ) ;
2001-10-21 01:59:34 +04:00
memcpy ( buffer + KERNEL_OPLOCK_BREAK_DEV_OFFSET , ( char * ) & fsp - > dev , sizeof ( fsp - > dev ) ) ;
memcpy ( buffer + KERNEL_OPLOCK_BREAK_INODE_OFFSET , ( char * ) & fsp - > inode , sizeof ( fsp - > inode ) ) ;
memcpy ( buffer + KERNEL_OPLOCK_BREAK_FILEID_OFFSET , ( char * ) & fsp - > file_id , sizeof ( fsp - > file_id ) ) ;
2000-06-10 18:29:31 +04:00
2002-07-15 14:35:28 +04:00
return True ;
2000-06-10 18:29:31 +04:00
}
/****************************************************************************
Attempt to set an kernel oplock on a file .
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2001-10-21 01:59:34 +04:00
2000-06-10 18:29:31 +04:00
static BOOL linux_set_kernel_oplock ( files_struct * fsp , int oplock_type )
{
2000-06-11 09:57:58 +04:00
if ( linux_setlease ( fsp - > fd , F_WRLCK ) = = - 1 ) {
2002-07-15 14:35:28 +04:00
DEBUG ( 3 , ( " linux_set_kernel_oplock: Refused oplock on file %s, fd = %d, dev = %x, \
2000-06-11 09:57:58 +04:00
inode = % .0f . ( % s ) \ n " ,
fsp - > fsp_name , fsp - > fd ,
( unsigned int ) fsp - > dev , ( double ) fsp - > inode , strerror ( errno ) ) ) ;
2000-06-10 18:29:31 +04:00
return False ;
}
2002-07-15 14:35:28 +04:00
DEBUG ( 3 , ( " linux_set_kernel_oplock: got kernel oplock on file %s, dev = %x, inode = %.0f, file_id = %lu \n " ,
2001-10-21 01:59:34 +04:00
fsp - > fsp_name , ( unsigned int ) fsp - > dev , ( double ) fsp - > inode , fsp - > file_id ) ) ;
2000-06-10 18:29:31 +04:00
return True ;
}
/****************************************************************************
Release a kernel oplock on a file .
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2001-10-21 01:59:34 +04:00
2000-06-10 18:29:31 +04:00
static void linux_release_kernel_oplock ( files_struct * fsp )
{
if ( DEBUGLVL ( 10 ) ) {
/*
* Check and print out the current kernel
* oplock state of this file .
*/
int state = fcntl ( fsp - > fd , F_GETLEASE , 0 ) ;
2002-07-15 14:35:28 +04:00
dbgtext ( " linux_release_kernel_oplock: file %s, dev = %x, inode = %.0f file_id = %lu has kernel \
2000-06-10 18:29:31 +04:00
oplock state of % x . \ n " , fsp->fsp_name, (unsigned int)fsp->dev,
2001-10-21 01:59:34 +04:00
( double ) fsp - > inode , fsp - > file_id , state ) ;
2000-06-10 18:29:31 +04:00
}
/*
* Remove the kernel oplock on this file .
*/
2000-06-11 09:57:58 +04:00
if ( linux_setlease ( fsp - > fd , F_UNLCK ) = = - 1 ) {
2000-06-10 18:29:31 +04:00
if ( DEBUGLVL ( 0 ) ) {
2002-07-15 14:35:28 +04:00
dbgtext ( " linux_release_kernel_oplock: Error when removing kernel oplock on file " ) ;
2001-11-07 01:07:04 +03:00
dbgtext ( " %s, dev = %x, inode = %.0f, file_id = %lu. Error was %s \n " ,
2000-06-10 18:29:31 +04:00
fsp - > fsp_name , ( unsigned int ) fsp - > dev ,
2001-10-21 01:59:34 +04:00
( double ) fsp - > inode , fsp - > file_id , strerror ( errno ) ) ;
2000-06-10 18:29:31 +04:00
}
}
}
/****************************************************************************
2001-10-21 01:59:34 +04:00
Parse a kernel oplock message .
2000-06-10 18:29:31 +04:00
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2001-10-21 01:59:34 +04:00
static BOOL linux_kernel_oplock_parse ( char * msg_start , int msg_len , SMB_INO_T * inode ,
SMB_DEV_T * dev , unsigned long * file_id )
2000-06-10 18:29:31 +04:00
{
/* Ensure that the msg length is correct. */
if ( msg_len ! = KERNEL_OPLOCK_BREAK_MSG_LEN ) {
2003-11-03 17:34:25 +03:00
DEBUG ( 0 , ( " incorrect length for KERNEL_OPLOCK_BREAK_CMD (was %d, should be %lu). \n " ,
msg_len , ( unsigned long ) KERNEL_OPLOCK_BREAK_MSG_LEN ) ) ;
2000-06-10 18:29:31 +04:00
return False ;
}
2001-10-21 01:59:34 +04:00
memcpy ( ( char * ) inode , msg_start + KERNEL_OPLOCK_BREAK_INODE_OFFSET , sizeof ( * inode ) ) ;
memcpy ( ( char * ) dev , msg_start + KERNEL_OPLOCK_BREAK_DEV_OFFSET , sizeof ( * dev ) ) ;
memcpy ( ( char * ) file_id , msg_start + KERNEL_OPLOCK_BREAK_FILEID_OFFSET , sizeof ( * file_id ) ) ;
2000-06-10 18:29:31 +04:00
2001-11-07 01:07:04 +03:00
DEBUG ( 3 , ( " kernel oplock break request for file dev = %x, inode = %.0f, file_id = %lu \n " ,
2001-10-21 01:59:34 +04:00
( unsigned int ) * dev , ( double ) * inode , * file_id ) ) ;
2000-06-10 18:29:31 +04:00
return True ;
}
/****************************************************************************
2001-10-21 01:59:34 +04:00
See if a oplock message is waiting .
2000-06-10 18:29:31 +04:00
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2001-10-21 01:59:34 +04:00
2000-06-10 18:29:31 +04:00
static BOOL linux_oplock_msg_waiting ( fd_set * fds )
{
2002-07-15 14:35:28 +04:00
return signals_received ! = 0 ;
2000-06-10 18:29:31 +04:00
}
2000-06-11 10:46:05 +04:00
/****************************************************************************
2001-10-21 01:59:34 +04:00
See if the kernel supports oplocks .
2000-06-11 10:46:05 +04:00
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2001-10-21 01:59:34 +04:00
2000-06-11 10:46:05 +04:00
static BOOL linux_oplocks_available ( void )
{
int fd , ret ;
fd = open ( " /dev/null " , O_RDONLY ) ;
2001-10-21 01:59:34 +04:00
if ( fd = = - 1 )
return False ; /* uggh! */
2000-06-11 10:46:05 +04:00
ret = fcntl ( fd , F_GETLEASE , 0 ) ;
close ( fd ) ;
return ret = = F_UNLCK ;
}
2000-06-10 18:29:31 +04:00
/****************************************************************************
2001-10-21 01:59:34 +04:00
Setup kernel oplocks .
2000-06-10 18:29:31 +04:00
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2001-10-21 01:59:34 +04:00
2000-06-10 18:29:31 +04:00
struct kernel_oplocks * linux_init_kernel_oplocks ( void )
{
static struct kernel_oplocks koplocks ;
struct sigaction act ;
2000-06-11 10:46:05 +04:00
if ( ! linux_oplocks_available ( ) ) {
DEBUG ( 3 , ( " Linux kernel oplocks not available \n " ) ) ;
return NULL ;
}
2003-01-14 10:26:12 +03:00
ZERO_STRUCT ( act ) ;
2001-10-21 01:59:34 +04:00
act . sa_handler = NULL ;
act . sa_sigaction = signal_handler ;
act . sa_flags = SA_SIGINFO ;
2003-05-15 21:37:52 +04:00
sigemptyset ( & act . sa_mask ) ;
2001-10-21 01:59:34 +04:00
if ( sigaction ( RT_SIGNAL_LEASE , & act , NULL ) ! = 0 ) {
totally rewrote the async signal, notification and oplock notification
handling in Samba. This was needed due to several limitations and
races in the previous code - as a side effect the new code is much
cleaner :)
in summary:
- changed sys_select() to avoid a signal/select race condition. It is a
rare race but once we have signals doing notification and oplocks it
is important.
- changed our main processing loop to take advantage of the new
sys_select semantics
- split the notify code into implementaion dependent and general
parts. Added the following structure that defines an implementation:
struct cnotify_fns {
void * (*register_notify)(connection_struct *conn, char *path, uint32 flags);
BOOL (*check_notify)(connection_struct *conn, uint16 vuid, char *path, uint32 flags, void *data, time_t t);
void (*remove_notify)(void *data);
};
then I wrote two implementations, one using hash/poll (like our old
code) and the other using the new Linux kernel change notify. It
should be easy to add other change notify implementations by creating
a sructure of the above type.
- fixed a bug in change notify where we were returning the wrong error
code.
- rewrote the core change notify code to be much simpler
- moved to real-time signals for leases and change notify
Amazingly, it all seems to work. I was very surprised!
(This used to be commit 44766c39e0027c762bee8b33b12c621c109a3267)
2000-06-12 19:53:31 +04:00
DEBUG ( 0 , ( " Failed to setup RT_SIGNAL_LEASE handler \n " ) ) ;
2000-06-10 18:29:31 +04:00
return NULL ;
2001-10-21 01:59:34 +04:00
}
2000-06-10 18:29:31 +04:00
koplocks . receive_message = linux_oplock_receive_message ;
koplocks . set_oplock = linux_set_kernel_oplock ;
koplocks . release_oplock = linux_release_kernel_oplock ;
koplocks . parse_message = linux_kernel_oplock_parse ;
koplocks . msg_waiting = linux_oplock_msg_waiting ;
koplocks . notification_fd = - 1 ;
2003-03-28 04:07:05 +03:00
/* the signal can start off blocked due to a bug in bash */
BlockSignals ( False , RT_SIGNAL_LEASE ) ;
2000-06-12 21:06:00 +04:00
DEBUG ( 3 , ( " Linux kernel oplocks enabled \n " ) ) ;
2000-06-10 18:29:31 +04:00
return & koplocks ;
}
# else
void oplock_linux_dummy ( void ) { }
# endif /* HAVE_KERNEL_OPLOCKS_LINUX */