2008-02-21 18:14:08 +03:00
/*
* Simulate the Posix AIO using mmap / fork
*
* Copyright ( C ) Volker Lendecke 2008
2010-06-05 08:00:24 +04:00
* Copyright ( C ) Jeremy Allison 2010
2008-02-21 18:14:08 +03:00
*
* This program is free software ; you can redistribute it and / or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation ; either version 2 of the License , or
* ( at your option ) any later version .
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
* GNU General Public License for more details .
*
* You should have received a copy of the GNU General Public License
* along with this program ; if not , write to the Free Software
* Foundation , Inc . , 675 Mass Ave , Cambridge , MA 0213 9 , USA .
*/
# include "includes.h"
2011-03-31 00:24:18 +04:00
# include "system/filesys.h"
2011-02-25 19:03:27 +03:00
# include "system/shmem.h"
2011-03-31 00:24:18 +04:00
# include "smbd/smbd.h"
2012-04-08 23:49:59 +04:00
# include "smbd/globals.h"
2012-07-09 13:10:30 +04:00
# include "lib/async_req/async_sock.h"
# include "lib/util/tevent_unix.h"
2008-02-21 18:14:08 +03:00
2014-05-31 13:58:01 +04:00
# if !defined(HAVE_MSGHDR_MSG_CONTROL) && !defined(HAVE_MSGHDR_MSG_ACCTRIGHTS)
# error Can not pass file descriptors
# endif
2012-07-16 14:44:42 +04:00
# undef recvmsg
2011-04-20 20:21:25 +04:00
# ifndef MAP_FILE
# define MAP_FILE 0
# endif
2012-08-21 13:22:37 +04:00
struct aio_fork_config {
bool erratic_testing_mode ;
} ;
2008-02-21 18:14:08 +03:00
struct mmap_area {
size_t size ;
volatile void * ptr ;
} ;
static int mmap_area_destructor ( struct mmap_area * area )
{
munmap ( ( void * ) area - > ptr , area - > size ) ;
return 0 ;
}
static struct mmap_area * mmap_area_init ( TALLOC_CTX * mem_ctx , size_t size )
{
struct mmap_area * result ;
int fd ;
result = talloc ( mem_ctx , struct mmap_area ) ;
if ( result = = NULL ) {
DEBUG ( 0 , ( " talloc failed \n " ) ) ;
goto fail ;
}
fd = open ( " /dev/zero " , O_RDWR ) ;
if ( fd = = - 1 ) {
DEBUG ( 3 , ( " open( \" /dev/zero \" ) failed: %s \n " ,
strerror ( errno ) ) ) ;
goto fail ;
}
result - > ptr = mmap ( NULL , size , PROT_READ | PROT_WRITE ,
MAP_SHARED | MAP_FILE , fd , 0 ) ;
2013-02-20 12:16:25 +04:00
close ( fd ) ;
2008-02-21 18:14:08 +03:00
if ( result - > ptr = = MAP_FAILED ) {
DEBUG ( 1 , ( " mmap failed: %s \n " , strerror ( errno ) ) ) ;
goto fail ;
}
result - > size = size ;
talloc_set_destructor ( result , mmap_area_destructor ) ;
return result ;
fail :
TALLOC_FREE ( result ) ;
return NULL ;
}
2012-07-13 15:59:52 +04:00
enum cmd_type {
READ_CMD ,
2012-07-13 16:05:11 +04:00
WRITE_CMD ,
FSYNC_CMD
2012-07-13 15:59:52 +04:00
} ;
static const char * cmd_type_str ( enum cmd_type cmd )
{
const char * result ;
switch ( cmd ) {
case READ_CMD :
result = " READ " ;
break ;
case WRITE_CMD :
result = " WRITE " ;
break ;
2012-07-13 16:05:11 +04:00
case FSYNC_CMD :
result = " FSYNC " ;
break ;
2012-07-13 15:59:52 +04:00
default :
result = " <UNKNOWN> " ;
break ;
}
return result ;
}
2008-02-21 18:14:08 +03:00
struct rw_cmd {
size_t n ;
2012-04-05 08:53:08 +04:00
off_t offset ;
2012-07-13 15:59:52 +04:00
enum cmd_type cmd ;
2012-08-21 13:22:37 +04:00
bool erratic_testing_mode ;
2008-02-21 18:14:08 +03:00
} ;
struct rw_ret {
ssize_t size ;
int ret_errno ;
} ;
struct aio_child_list ;
struct aio_child {
struct aio_child * prev , * next ;
struct aio_child_list * list ;
pid_t pid ;
int sockfd ;
2012-07-09 13:10:30 +04:00
struct mmap_area * map ;
2008-02-21 18:14:08 +03:00
bool dont_delete ; /* Marked as in use since last cleanup */
2012-07-09 13:10:30 +04:00
bool busy ;
2008-02-21 18:14:08 +03:00
} ;
struct aio_child_list {
struct aio_child * children ;
2013-02-18 13:18:29 +04:00
struct tevent_timer * cleanup_event ;
2008-02-21 18:14:08 +03:00
} ;
static void free_aio_children ( void * * p )
{
TALLOC_FREE ( * p ) ;
}
static ssize_t read_fd ( int fd , void * ptr , size_t nbytes , int * recvfd )
{
struct msghdr msg ;
struct iovec iov [ 1 ] ;
ssize_t n ;
# ifndef HAVE_MSGHDR_MSG_CONTROL
int newfd ;
2014-05-31 13:58:01 +04:00
msg . msg_accrights = ( caddr_t ) & newfd ;
msg . msg_accrightslen = sizeof ( int ) ;
# else
2008-02-21 18:14:08 +03:00
union {
struct cmsghdr cm ;
char control [ CMSG_SPACE ( sizeof ( int ) ) ] ;
} control_un ;
struct cmsghdr * cmptr ;
msg . msg_control = control_un . control ;
msg . msg_controllen = sizeof ( control_un . control ) ;
# endif
msg . msg_name = NULL ;
msg . msg_namelen = 0 ;
2012-06-11 13:16:12 +04:00
msg . msg_flags = 0 ;
2008-02-21 18:14:08 +03:00
2009-05-12 22:45:37 +04:00
iov [ 0 ] . iov_base = ( void * ) ptr ;
2008-02-21 18:14:08 +03:00
iov [ 0 ] . iov_len = nbytes ;
msg . msg_iov = iov ;
msg . msg_iovlen = 1 ;
if ( ( n = recvmsg ( fd , & msg , 0 ) ) < = 0 ) {
return ( n ) ;
}
# ifdef HAVE_MSGHDR_MSG_CONTROL
if ( ( cmptr = CMSG_FIRSTHDR ( & msg ) ) ! = NULL
& & cmptr - > cmsg_len = = CMSG_LEN ( sizeof ( int ) ) ) {
if ( cmptr - > cmsg_level ! = SOL_SOCKET ) {
DEBUG ( 10 , ( " control level != SOL_SOCKET " ) ) ;
errno = EINVAL ;
return - 1 ;
}
if ( cmptr - > cmsg_type ! = SCM_RIGHTS ) {
DEBUG ( 10 , ( " control type != SCM_RIGHTS " ) ) ;
errno = EINVAL ;
return - 1 ;
}
2012-03-31 12:37:15 +04:00
memcpy ( recvfd , CMSG_DATA ( cmptr ) , sizeof ( * recvfd ) ) ;
2008-02-21 18:14:08 +03:00
} else {
* recvfd = - 1 ; /* descriptor was not passed */
}
# else
if ( msg . msg_accrightslen = = sizeof ( int ) ) {
* recvfd = newfd ;
}
else {
* recvfd = - 1 ; /* descriptor was not passed */
}
# endif
return ( n ) ;
}
static ssize_t write_fd ( int fd , void * ptr , size_t nbytes , int sendfd )
{
struct msghdr msg ;
struct iovec iov [ 1 ] ;
# ifdef HAVE_MSGHDR_MSG_CONTROL
union {
struct cmsghdr cm ;
char control [ CMSG_SPACE ( sizeof ( int ) ) ] ;
} control_un ;
struct cmsghdr * cmptr ;
ZERO_STRUCT ( msg ) ;
ZERO_STRUCT ( control_un ) ;
msg . msg_control = control_un . control ;
msg . msg_controllen = sizeof ( control_un . control ) ;
cmptr = CMSG_FIRSTHDR ( & msg ) ;
cmptr - > cmsg_len = CMSG_LEN ( sizeof ( int ) ) ;
cmptr - > cmsg_level = SOL_SOCKET ;
cmptr - > cmsg_type = SCM_RIGHTS ;
2012-03-31 12:37:15 +04:00
memcpy ( CMSG_DATA ( cmptr ) , & sendfd , sizeof ( sendfd ) ) ;
2008-02-21 18:14:08 +03:00
# else
ZERO_STRUCT ( msg ) ;
msg . msg_accrights = ( caddr_t ) & sendfd ;
msg . msg_accrightslen = sizeof ( int ) ;
# endif
msg . msg_name = NULL ;
msg . msg_namelen = 0 ;
ZERO_STRUCT ( iov ) ;
2009-05-12 22:45:37 +04:00
iov [ 0 ] . iov_base = ( void * ) ptr ;
2008-02-21 18:14:08 +03:00
iov [ 0 ] . iov_len = nbytes ;
msg . msg_iov = iov ;
msg . msg_iovlen = 1 ;
return ( sendmsg ( fd , & msg , 0 ) ) ;
}
2013-02-18 12:59:08 +04:00
static void aio_child_cleanup ( struct tevent_context * event_ctx ,
2013-02-18 13:18:29 +04:00
struct tevent_timer * te ,
2009-01-05 12:22:50 +03:00
struct timeval now ,
2008-02-21 18:14:08 +03:00
void * private_data )
{
struct aio_child_list * list = talloc_get_type_abort (
private_data , struct aio_child_list ) ;
struct aio_child * child , * next ;
TALLOC_FREE ( list - > cleanup_event ) ;
for ( child = list - > children ; child ! = NULL ; child = next ) {
next = child - > next ;
2012-07-09 13:10:30 +04:00
if ( child - > busy ) {
2008-02-21 18:14:08 +03:00
DEBUG ( 10 , ( " child %d currently active \n " ,
( int ) child - > pid ) ) ;
continue ;
}
if ( child - > dont_delete ) {
DEBUG ( 10 , ( " Child %d was active since last cleanup \n " ,
( int ) child - > pid ) ) ;
child - > dont_delete = false ;
continue ;
}
DEBUG ( 10 , ( " Child %d idle for more than 30 seconds, "
" deleting \n " , ( int ) child - > pid ) ) ;
TALLOC_FREE ( child ) ;
2011-01-13 17:59:18 +03:00
child = next ;
2008-02-21 18:14:08 +03:00
}
if ( list - > children ! = NULL ) {
/*
* Re - schedule the next cleanup round
*/
2013-02-18 13:57:54 +04:00
list - > cleanup_event = tevent_add_timer ( server_event_context ( ) , list ,
2009-01-05 12:22:50 +03:00
timeval_add ( & now , 30 , 0 ) ,
2008-02-21 18:14:08 +03:00
aio_child_cleanup , list ) ;
}
}
static struct aio_child_list * init_aio_children ( struct vfs_handle_struct * handle )
{
struct aio_child_list * data = NULL ;
if ( SMB_VFS_HANDLE_TEST_DATA ( handle ) ) {
SMB_VFS_HANDLE_GET_DATA ( handle , data , struct aio_child_list ,
return NULL ) ;
}
if ( data = = NULL ) {
2011-06-07 05:44:43 +04:00
data = talloc_zero ( NULL , struct aio_child_list ) ;
2008-02-21 18:14:08 +03:00
if ( data = = NULL ) {
return NULL ;
}
}
/*
* Regardless of whether the child_list had been around or not , make
* sure that we have a cleanup timed event . This timed event will
* delete itself when it finds that no children are around anymore .
*/
if ( data - > cleanup_event = = NULL ) {
2013-02-18 13:57:54 +04:00
data - > cleanup_event = tevent_add_timer ( server_event_context ( ) , data ,
2008-02-21 18:14:08 +03:00
timeval_current_ofs ( 30 , 0 ) ,
aio_child_cleanup , data ) ;
if ( data - > cleanup_event = = NULL ) {
TALLOC_FREE ( data ) ;
return NULL ;
}
}
if ( ! SMB_VFS_HANDLE_TEST_DATA ( handle ) ) {
SMB_VFS_HANDLE_SET_DATA ( handle , data , free_aio_children ,
struct aio_child_list , return False ) ;
}
return data ;
}
static void aio_child_loop ( int sockfd , struct mmap_area * map )
{
while ( true ) {
int fd = - 1 ;
ssize_t ret ;
struct rw_cmd cmd_struct ;
struct rw_ret ret_struct ;
ret = read_fd ( sockfd , & cmd_struct , sizeof ( cmd_struct ) , & fd ) ;
if ( ret ! = sizeof ( cmd_struct ) ) {
DEBUG ( 10 , ( " read_fd returned %d: %s \n " , ( int ) ret ,
strerror ( errno ) ) ) ;
exit ( 1 ) ;
}
DEBUG ( 10 , ( " aio_child_loop: %s %d bytes at %d from fd %d \n " ,
2012-07-13 15:59:52 +04:00
cmd_type_str ( cmd_struct . cmd ) ,
2008-02-21 18:14:08 +03:00
( int ) cmd_struct . n , ( int ) cmd_struct . offset , fd ) ) ;
2012-08-21 13:22:37 +04:00
if ( cmd_struct . erratic_testing_mode ) {
2008-02-21 18:14:08 +03:00
/*
2012-08-21 02:16:24 +04:00
* For developer testing , we want erratic behaviour for
2008-02-21 18:14:08 +03:00
* async I / O times
*/
uint8_t randval ;
unsigned msecs ;
/*
* use generate_random_buffer , we just forked from a
* common parent state
*/
generate_random_buffer ( & randval , sizeof ( randval ) ) ;
msecs = randval + 20 ;
DEBUG ( 10 , ( " delaying for %u msecs \n " , msecs ) ) ;
smb_msleep ( msecs ) ;
}
ZERO_STRUCT ( ret_struct ) ;
2012-07-13 15:59:52 +04:00
switch ( cmd_struct . cmd ) {
case READ_CMD :
2008-02-21 18:14:08 +03:00
ret_struct . size = sys_pread (
fd , ( void * ) map - > ptr , cmd_struct . n ,
cmd_struct . offset ) ;
2010-06-05 08:00:24 +04:00
#if 0
/* This breaks "make test" when run with aio_fork module. */
2012-08-21 02:16:24 +04:00
# ifdef DEVELOPER
2009-09-14 05:21:30 +04:00
ret_struct . size = MAX ( 1 , ret_struct . size * 0.9 ) ;
2010-06-05 08:00:24 +04:00
# endif
2009-09-14 05:21:30 +04:00
# endif
2012-07-13 15:59:52 +04:00
break ;
case WRITE_CMD :
2008-02-21 18:14:08 +03:00
ret_struct . size = sys_pwrite (
fd , ( void * ) map - > ptr , cmd_struct . n ,
cmd_struct . offset ) ;
2012-07-13 15:59:52 +04:00
break ;
2012-07-13 16:05:11 +04:00
case FSYNC_CMD :
ret_struct . size = fsync ( fd ) ;
break ;
2012-07-13 15:59:52 +04:00
default :
ret_struct . size = - 1 ;
errno = EINVAL ;
2008-02-21 18:14:08 +03:00
}
DEBUG ( 10 , ( " aio_child_loop: syscall returned %d \n " ,
( int ) ret_struct . size ) ) ;
if ( ret_struct . size = = - 1 ) {
ret_struct . ret_errno = errno ;
}
2009-05-18 11:36:16 +04:00
/*
* Close the fd before telling our parent we ' re done . The
* parent might close and re - open the file very quickly , and
* with system - level share modes ( GPFS ) we would get an
* unjustified SHARING_VIOLATION .
*/
close ( fd ) ;
2008-02-21 18:14:08 +03:00
ret = write_data ( sockfd , ( char * ) & ret_struct ,
sizeof ( ret_struct ) ) ;
if ( ret ! = sizeof ( ret_struct ) ) {
DEBUG ( 10 , ( " could not write ret_struct: %s \n " ,
strerror ( errno ) ) ) ;
exit ( 2 ) ;
}
}
}
static int aio_child_destructor ( struct aio_child * child )
{
2011-01-13 18:04:36 +03:00
char c = 0 ;
2012-07-09 13:10:30 +04:00
SMB_ASSERT ( ! child - > busy ) ;
2011-01-13 18:04:36 +03:00
DEBUG ( 10 , ( " aio_child_destructor: removing child %d on fd %d \n " ,
child - > pid , child - > sockfd ) ) ;
/*
* closing the sockfd makes the child not return from recvmsg ( ) on RHEL
* 5.5 so instead force the child to exit by writing bad data to it
*/
write ( child - > sockfd , & c , sizeof ( c ) ) ;
2008-02-21 18:14:08 +03:00
close ( child - > sockfd ) ;
DLIST_REMOVE ( child - > list - > children , child ) ;
return 0 ;
}
2009-05-18 11:49:23 +04:00
/*
* We have to close all fd ' s in open files , we might incorrectly hold a system
* level share mode on a file .
*/
static struct files_struct * close_fsp_fd ( struct files_struct * fsp ,
void * private_data )
{
if ( ( fsp - > fh ! = NULL ) & & ( fsp - > fh - > fd ! = - 1 ) ) {
close ( fsp - > fh - > fd ) ;
fsp - > fh - > fd = - 1 ;
}
return NULL ;
}
2012-07-09 11:00:55 +04:00
static int create_aio_child ( struct smbd_server_connection * sconn ,
struct aio_child_list * children ,
size_t map_size ,
struct aio_child * * presult )
2008-02-21 18:14:08 +03:00
{
struct aio_child * result ;
int fdpair [ 2 ] ;
2012-07-09 11:00:55 +04:00
int ret ;
2008-02-21 18:14:08 +03:00
fdpair [ 0 ] = fdpair [ 1 ] = - 1 ;
2011-06-07 05:44:43 +04:00
result = talloc_zero ( children , struct aio_child ) ;
2012-07-09 11:00:55 +04:00
if ( result = = NULL ) {
return ENOMEM ;
}
2008-02-21 18:14:08 +03:00
if ( socketpair ( AF_UNIX , SOCK_STREAM , 0 , fdpair ) = = - 1 ) {
2012-07-09 11:00:55 +04:00
ret = errno ;
2008-02-21 18:14:08 +03:00
DEBUG ( 10 , ( " socketpair() failed: %s \n " , strerror ( errno ) ) ) ;
goto fail ;
}
DEBUG ( 10 , ( " fdpair = %d/%d \n " , fdpair [ 0 ] , fdpair [ 1 ] ) ) ;
result - > map = mmap_area_init ( result , map_size ) ;
if ( result - > map = = NULL ) {
2012-07-09 11:00:55 +04:00
ret = errno ;
2008-02-21 18:14:08 +03:00
DEBUG ( 0 , ( " Could not create mmap area \n " ) ) ;
goto fail ;
}
2012-03-24 23:17:08 +04:00
result - > pid = fork ( ) ;
2008-02-21 18:14:08 +03:00
if ( result - > pid = = - 1 ) {
2012-07-09 11:00:55 +04:00
ret = errno ;
2008-02-21 18:14:08 +03:00
DEBUG ( 0 , ( " fork failed: %s \n " , strerror ( errno ) ) ) ;
goto fail ;
}
if ( result - > pid = = 0 ) {
close ( fdpair [ 0 ] ) ;
result - > sockfd = fdpair [ 1 ] ;
2010-09-27 05:53:00 +04:00
files_forall ( sconn , close_fsp_fd , NULL ) ;
2008-02-21 18:14:08 +03:00
aio_child_loop ( result - > sockfd , result - > map ) ;
}
2011-01-13 18:04:36 +03:00
DEBUG ( 10 , ( " Child %d created with sockfd %d \n " ,
result - > pid , fdpair [ 0 ] ) ) ;
2008-02-21 18:14:08 +03:00
result - > sockfd = fdpair [ 0 ] ;
close ( fdpair [ 1 ] ) ;
result - > list = children ;
DLIST_ADD ( children - > children , result ) ;
talloc_set_destructor ( result , aio_child_destructor ) ;
* presult = result ;
2012-07-09 11:00:55 +04:00
return 0 ;
2008-02-21 18:14:08 +03:00
fail :
if ( fdpair [ 0 ] ! = - 1 ) close ( fdpair [ 0 ] ) ;
if ( fdpair [ 1 ] ! = - 1 ) close ( fdpair [ 1 ] ) ;
TALLOC_FREE ( result ) ;
2012-07-09 11:00:55 +04:00
return ret ;
2008-02-21 18:14:08 +03:00
}
2012-07-09 11:00:55 +04:00
static int get_idle_child ( struct vfs_handle_struct * handle ,
struct aio_child * * pchild )
2008-02-21 18:14:08 +03:00
{
struct aio_child_list * children ;
struct aio_child * child ;
children = init_aio_children ( handle ) ;
if ( children = = NULL ) {
2012-07-09 11:00:55 +04:00
return ENOMEM ;
2008-02-21 18:14:08 +03:00
}
for ( child = children - > children ; child ! = NULL ; child = child - > next ) {
2012-07-09 13:10:30 +04:00
if ( ! child - > busy ) {
2008-02-21 18:14:08 +03:00
break ;
}
}
if ( child = = NULL ) {
2012-07-09 11:00:55 +04:00
int ret ;
2008-02-21 18:14:08 +03:00
DEBUG ( 10 , ( " no idle child found, creating new one \n " ) ) ;
2012-07-09 11:00:55 +04:00
ret = create_aio_child ( handle - > conn - > sconn , children ,
2010-09-27 05:53:00 +04:00
128 * 1024 , & child ) ;
2012-07-09 11:00:55 +04:00
if ( ret ! = 0 ) {
2008-02-21 18:14:08 +03:00
DEBUG ( 10 , ( " create_aio_child failed: %s \n " ,
2012-07-09 11:00:55 +04:00
strerror ( errno ) ) ) ;
2012-07-09 11:00:55 +04:00
return ret ;
2008-02-21 18:14:08 +03:00
}
}
child - > dont_delete = true ;
2012-07-09 13:10:30 +04:00
child - > busy = true ;
2008-02-21 18:14:08 +03:00
* pchild = child ;
2012-07-09 11:00:55 +04:00
return 0 ;
2008-02-21 18:14:08 +03:00
}
2012-07-09 13:10:30 +04:00
struct aio_fork_pread_state {
2008-02-21 18:14:08 +03:00
struct aio_child * child ;
ssize_t ret ;
2012-07-09 11:00:55 +04:00
int err ;
2012-07-09 13:10:30 +04:00
} ;
2008-02-21 18:14:08 +03:00
2012-07-09 13:10:30 +04:00
static void aio_fork_pread_done ( struct tevent_req * subreq ) ;
2008-02-21 18:14:08 +03:00
2012-07-09 13:10:30 +04:00
static struct tevent_req * aio_fork_pread_send ( struct vfs_handle_struct * handle ,
TALLOC_CTX * mem_ctx ,
struct tevent_context * ev ,
struct files_struct * fsp ,
void * data ,
size_t n , off_t offset )
2008-02-21 18:14:08 +03:00
{
2012-07-09 13:10:30 +04:00
struct tevent_req * req , * subreq ;
struct aio_fork_pread_state * state ;
2008-02-21 18:14:08 +03:00
struct rw_cmd cmd ;
2012-07-09 13:10:30 +04:00
ssize_t written ;
2012-07-09 11:00:55 +04:00
int err ;
2012-08-21 13:22:37 +04:00
struct aio_fork_config * config ;
2012-08-31 16:45:08 +04:00
2012-08-21 13:22:37 +04:00
SMB_VFS_HANDLE_GET_DATA ( handle , config ,
struct aio_fork_config ,
2012-08-31 16:45:08 +04:00
return NULL ) ;
2008-02-21 18:14:08 +03:00
2012-07-09 13:10:30 +04:00
req = tevent_req_create ( mem_ctx , & state , struct aio_fork_pread_state ) ;
if ( req = = NULL ) {
return NULL ;
}
if ( n > 128 * 1024 ) {
2008-02-21 18:14:08 +03:00
/* TODO: support variable buffers */
2012-07-09 13:10:30 +04:00
tevent_req_error ( req , EINVAL ) ;
return tevent_req_post ( req , ev ) ;
2008-02-21 18:14:08 +03:00
}
2012-07-09 13:10:30 +04:00
err = get_idle_child ( handle , & state - > child ) ;
2012-07-09 11:00:55 +04:00
if ( err ! = 0 ) {
2012-07-09 13:10:30 +04:00
tevent_req_error ( req , err ) ;
return tevent_req_post ( req , ev ) ;
2008-02-21 18:14:08 +03:00
}
ZERO_STRUCT ( cmd ) ;
2012-07-09 13:10:30 +04:00
cmd . n = n ;
cmd . offset = offset ;
2012-07-13 15:59:52 +04:00
cmd . cmd = READ_CMD ;
2012-08-21 13:22:37 +04:00
cmd . erratic_testing_mode = config - > erratic_testing_mode ;
2008-02-21 18:14:08 +03:00
DEBUG ( 10 , ( " sending fd %d to child %d \n " , fsp - > fh - > fd ,
2012-07-09 13:10:30 +04:00
( int ) state - > child - > pid ) ) ;
2008-02-21 18:14:08 +03:00
2012-07-09 13:10:30 +04:00
/*
* Not making this async . We ' re writing into an empty unix
* domain socket . This should never block .
*/
written = write_fd ( state - > child - > sockfd , & cmd , sizeof ( cmd ) ,
fsp - > fh - > fd ) ;
if ( written = = - 1 ) {
err = errno ;
TALLOC_FREE ( state - > child ) ;
DEBUG ( 10 , ( " write_fd failed: %s \n " , strerror ( err ) ) ) ;
tevent_req_error ( req , err ) ;
return tevent_req_post ( req , ev ) ;
2008-02-21 18:14:08 +03:00
}
2012-07-09 13:10:30 +04:00
subreq = read_packet_send ( state , ev , state - > child - > sockfd ,
sizeof ( struct rw_ret ) , NULL , NULL ) ;
if ( tevent_req_nomem ( subreq , req ) ) {
TALLOC_FREE ( state - > child ) ; /* we sent sth down */
return tevent_req_post ( req , ev ) ;
}
tevent_req_set_callback ( subreq , aio_fork_pread_done , req ) ;
return req ;
2008-02-21 18:14:08 +03:00
}
2012-07-09 13:10:30 +04:00
static void aio_fork_pread_done ( struct tevent_req * subreq )
2008-02-21 18:14:08 +03:00
{
2012-07-09 13:10:30 +04:00
struct tevent_req * req = tevent_req_callback_data (
subreq , struct tevent_req ) ;
struct aio_fork_pread_state * state = tevent_req_data (
req , struct aio_fork_pread_state ) ;
ssize_t nread ;
uint8_t * buf ;
int err ;
struct rw_ret * retbuf ;
2008-02-21 18:14:08 +03:00
2012-07-09 13:10:30 +04:00
nread = read_packet_recv ( subreq , talloc_tos ( ) , & buf , & err ) ;
TALLOC_FREE ( subreq ) ;
if ( nread = = - 1 ) {
TALLOC_FREE ( state - > child ) ;
tevent_req_error ( req , err ) ;
return ;
2008-02-21 18:14:08 +03:00
}
2012-07-09 13:10:30 +04:00
state - > child - > busy = false ;
2008-02-21 18:14:08 +03:00
2012-07-09 13:10:30 +04:00
retbuf = ( struct rw_ret * ) buf ;
state - > ret = retbuf - > size ;
state - > err = retbuf - > ret_errno ;
tevent_req_done ( req ) ;
2008-02-21 18:14:08 +03:00
}
2012-07-09 13:10:30 +04:00
static ssize_t aio_fork_pread_recv ( struct tevent_req * req , int * err )
2008-02-21 18:14:08 +03:00
{
2012-07-09 13:10:30 +04:00
struct aio_fork_pread_state * state = tevent_req_data (
req , struct aio_fork_pread_state ) ;
2008-02-21 18:14:08 +03:00
2012-07-09 13:10:30 +04:00
if ( tevent_req_is_unix_error ( req , err ) ) {
2008-02-21 18:14:08 +03:00
return - 1 ;
}
2012-07-09 13:10:30 +04:00
if ( state - > ret = = - 1 ) {
* err = state - > err ;
2012-04-13 02:04:08 +04:00
}
2012-07-09 13:10:30 +04:00
return state - > ret ;
}
2012-04-13 02:04:08 +04:00
2012-07-09 13:10:30 +04:00
struct aio_fork_pwrite_state {
struct aio_child * child ;
ssize_t ret ;
int err ;
} ;
2008-02-21 18:14:08 +03:00
2012-07-09 13:10:30 +04:00
static void aio_fork_pwrite_done ( struct tevent_req * subreq ) ;
2008-02-21 18:14:08 +03:00
2012-07-09 13:10:30 +04:00
static struct tevent_req * aio_fork_pwrite_send (
struct vfs_handle_struct * handle , TALLOC_CTX * mem_ctx ,
struct tevent_context * ev , struct files_struct * fsp ,
const void * data , size_t n , off_t offset )
2008-02-21 18:14:08 +03:00
{
2012-07-09 13:10:30 +04:00
struct tevent_req * req , * subreq ;
struct aio_fork_pwrite_state * state ;
struct rw_cmd cmd ;
ssize_t written ;
int err ;
2012-08-21 13:22:37 +04:00
struct aio_fork_config * config ;
SMB_VFS_HANDLE_GET_DATA ( handle , config ,
struct aio_fork_config ,
return NULL ) ;
2008-02-21 18:14:08 +03:00
2012-07-09 13:10:30 +04:00
req = tevent_req_create ( mem_ctx , & state , struct aio_fork_pwrite_state ) ;
if ( req = = NULL ) {
return NULL ;
2008-02-21 18:14:08 +03:00
}
2012-07-09 13:10:30 +04:00
if ( n > 128 * 1024 ) {
/* TODO: support variable buffers */
tevent_req_error ( req , EINVAL ) ;
return tevent_req_post ( req , ev ) ;
}
2008-02-21 18:14:08 +03:00
2012-07-09 13:10:30 +04:00
err = get_idle_child ( handle , & state - > child ) ;
if ( err ! = 0 ) {
tevent_req_error ( req , err ) ;
return tevent_req_post ( req , ev ) ;
2008-02-21 18:14:08 +03:00
}
2012-07-09 13:10:30 +04:00
ZERO_STRUCT ( cmd ) ;
cmd . n = n ;
cmd . offset = offset ;
2012-07-13 15:59:52 +04:00
cmd . cmd = WRITE_CMD ;
2012-08-21 13:22:37 +04:00
cmd . erratic_testing_mode = config - > erratic_testing_mode ;
2008-02-21 18:14:08 +03:00
2012-07-09 13:10:30 +04:00
DEBUG ( 10 , ( " sending fd %d to child %d \n " , fsp - > fh - > fd ,
( int ) state - > child - > pid ) ) ;
2008-02-21 18:14:08 +03:00
2012-07-09 13:10:30 +04:00
/*
* Not making this async . We ' re writing into an empty unix
* domain socket . This should never block .
*/
written = write_fd ( state - > child - > sockfd , & cmd , sizeof ( cmd ) ,
fsp - > fh - > fd ) ;
if ( written = = - 1 ) {
err = errno ;
2008-02-21 18:14:08 +03:00
2012-07-09 13:10:30 +04:00
TALLOC_FREE ( state - > child ) ;
2008-02-21 18:14:08 +03:00
2012-07-09 13:10:30 +04:00
DEBUG ( 10 , ( " write_fd failed: %s \n " , strerror ( err ) ) ) ;
tevent_req_error ( req , err ) ;
return tevent_req_post ( req , ev ) ;
}
subreq = read_packet_send ( state , ev , state - > child - > sockfd ,
sizeof ( struct rw_ret ) , NULL , NULL ) ;
if ( tevent_req_nomem ( subreq , req ) ) {
TALLOC_FREE ( state - > child ) ; /* we sent sth down */
return tevent_req_post ( req , ev ) ;
}
tevent_req_set_callback ( subreq , aio_fork_pwrite_done , req ) ;
return req ;
2010-06-05 08:00:24 +04:00
}
2012-07-09 13:10:30 +04:00
static void aio_fork_pwrite_done ( struct tevent_req * subreq )
2010-06-05 08:00:24 +04:00
{
2012-07-09 13:10:30 +04:00
struct tevent_req * req = tevent_req_callback_data (
subreq , struct tevent_req ) ;
struct aio_fork_pwrite_state * state = tevent_req_data (
req , struct aio_fork_pwrite_state ) ;
ssize_t nread ;
uint8_t * buf ;
2012-06-11 13:00:27 +04:00
int err ;
2012-07-09 13:10:30 +04:00
struct rw_ret * retbuf ;
2010-06-05 08:00:24 +04:00
2012-07-09 13:10:30 +04:00
nread = read_packet_recv ( subreq , talloc_tos ( ) , & buf , & err ) ;
TALLOC_FREE ( subreq ) ;
if ( nread = = - 1 ) {
TALLOC_FREE ( state - > child ) ;
tevent_req_error ( req , err ) ;
return ;
2010-06-05 08:00:24 +04:00
}
2012-07-09 13:10:30 +04:00
state - > child - > busy = false ;
2010-06-05 08:00:24 +04:00
2012-07-09 13:10:30 +04:00
retbuf = ( struct rw_ret * ) buf ;
state - > ret = retbuf - > size ;
state - > err = retbuf - > ret_errno ;
tevent_req_done ( req ) ;
}
2010-06-05 08:00:24 +04:00
2012-07-09 13:10:30 +04:00
static ssize_t aio_fork_pwrite_recv ( struct tevent_req * req , int * err )
{
struct aio_fork_pwrite_state * state = tevent_req_data (
req , struct aio_fork_pwrite_state ) ;
2010-06-05 08:00:24 +04:00
2012-07-09 13:10:30 +04:00
if ( tevent_req_is_unix_error ( req , err ) ) {
return - 1 ;
2010-06-05 08:00:24 +04:00
}
2012-07-09 13:10:30 +04:00
if ( state - > ret = = - 1 ) {
* err = state - > err ;
}
return state - > ret ;
2010-06-05 08:00:24 +04:00
}
2012-07-13 16:05:11 +04:00
struct aio_fork_fsync_state {
struct aio_child * child ;
ssize_t ret ;
int err ;
} ;
static void aio_fork_fsync_done ( struct tevent_req * subreq ) ;
static struct tevent_req * aio_fork_fsync_send (
struct vfs_handle_struct * handle , TALLOC_CTX * mem_ctx ,
struct tevent_context * ev , struct files_struct * fsp )
{
struct tevent_req * req , * subreq ;
struct aio_fork_fsync_state * state ;
struct rw_cmd cmd ;
ssize_t written ;
int err ;
2012-08-21 13:22:37 +04:00
struct aio_fork_config * config ;
2012-08-31 16:45:08 +04:00
2012-08-21 13:22:37 +04:00
SMB_VFS_HANDLE_GET_DATA ( handle , config ,
struct aio_fork_config ,
2012-08-31 16:45:08 +04:00
return NULL ) ;
2012-07-13 16:05:11 +04:00
req = tevent_req_create ( mem_ctx , & state , struct aio_fork_fsync_state ) ;
if ( req = = NULL ) {
return NULL ;
}
err = get_idle_child ( handle , & state - > child ) ;
if ( err ! = 0 ) {
tevent_req_error ( req , err ) ;
return tevent_req_post ( req , ev ) ;
}
ZERO_STRUCT ( cmd ) ;
cmd . cmd = FSYNC_CMD ;
2012-08-21 13:22:37 +04:00
cmd . erratic_testing_mode = config - > erratic_testing_mode ;
2012-07-13 16:05:11 +04:00
DEBUG ( 10 , ( " sending fd %d to child %d \n " , fsp - > fh - > fd ,
( int ) state - > child - > pid ) ) ;
/*
* Not making this async . We ' re writing into an empty unix
* domain socket . This should never block .
*/
written = write_fd ( state - > child - > sockfd , & cmd , sizeof ( cmd ) ,
fsp - > fh - > fd ) ;
if ( written = = - 1 ) {
err = errno ;
TALLOC_FREE ( state - > child ) ;
DEBUG ( 10 , ( " write_fd failed: %s \n " , strerror ( err ) ) ) ;
tevent_req_error ( req , err ) ;
return tevent_req_post ( req , ev ) ;
}
subreq = read_packet_send ( state , ev , state - > child - > sockfd ,
sizeof ( struct rw_ret ) , NULL , NULL ) ;
if ( tevent_req_nomem ( subreq , req ) ) {
TALLOC_FREE ( state - > child ) ; /* we sent sth down */
return tevent_req_post ( req , ev ) ;
}
tevent_req_set_callback ( subreq , aio_fork_fsync_done , req ) ;
return req ;
}
static void aio_fork_fsync_done ( struct tevent_req * subreq )
{
struct tevent_req * req = tevent_req_callback_data (
subreq , struct tevent_req ) ;
struct aio_fork_fsync_state * state = tevent_req_data (
req , struct aio_fork_fsync_state ) ;
ssize_t nread ;
uint8_t * buf ;
int err ;
struct rw_ret * retbuf ;
nread = read_packet_recv ( subreq , talloc_tos ( ) , & buf , & err ) ;
TALLOC_FREE ( subreq ) ;
if ( nread = = - 1 ) {
TALLOC_FREE ( state - > child ) ;
tevent_req_error ( req , err ) ;
return ;
}
state - > child - > busy = false ;
retbuf = ( struct rw_ret * ) buf ;
state - > ret = retbuf - > size ;
state - > err = retbuf - > ret_errno ;
tevent_req_done ( req ) ;
}
static int aio_fork_fsync_recv ( struct tevent_req * req , int * err )
{
struct aio_fork_fsync_state * state = tevent_req_data (
req , struct aio_fork_fsync_state ) ;
if ( tevent_req_is_unix_error ( req , err ) ) {
return - 1 ;
}
if ( state - > ret = = - 1 ) {
* err = state - > err ;
}
return state - > ret ;
}
2012-04-08 23:49:59 +04:00
static int aio_fork_connect ( vfs_handle_struct * handle , const char * service ,
const char * user )
{
2012-08-21 13:22:37 +04:00
int ret ;
struct aio_fork_config * config ;
ret = SMB_VFS_NEXT_CONNECT ( handle , service , user ) ;
if ( ret < 0 ) {
return ret ;
}
config = talloc_zero ( handle - > conn , struct aio_fork_config ) ;
if ( ! config ) {
SMB_VFS_NEXT_DISCONNECT ( handle ) ;
DEBUG ( 0 , ( " talloc_zero() failed \n " ) ) ;
return - 1 ;
}
config - > erratic_testing_mode = lp_parm_bool ( SNUM ( handle - > conn ) , " vfs_aio_fork " ,
" erratic_testing_mode " , false ) ;
SMB_VFS_HANDLE_SET_DATA ( handle , config ,
NULL , struct aio_fork_config ,
return - 1 ) ;
2012-04-08 23:49:59 +04:00
/*********************************************************************
* How many threads to initialize ?
* 100 per process seems insane as a default until you realize that
* ( a ) Threads terminate after 1 second when idle .
* ( b ) Throttling is done in SMB2 via the crediting algorithm .
* ( c ) SMB1 clients are limited to max_mux ( 50 ) outstanding
* requests and Windows clients don ' t use this anyway .
* Essentially we want this to be unlimited unless smb . conf
* says different .
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
aio_pending_size = 100 ;
2012-08-21 13:22:37 +04:00
return 0 ;
2012-04-08 23:49:59 +04:00
}
2009-07-24 04:28:58 +04:00
static struct vfs_fn_pointers vfs_aio_fork_fns = {
2012-04-08 23:49:59 +04:00
. connect_fn = aio_fork_connect ,
2012-07-09 13:10:30 +04:00
. pread_send_fn = aio_fork_pread_send ,
. pread_recv_fn = aio_fork_pread_recv ,
. pwrite_send_fn = aio_fork_pwrite_send ,
. pwrite_recv_fn = aio_fork_pwrite_recv ,
2012-07-13 16:05:11 +04:00
. fsync_send_fn = aio_fork_fsync_send ,
. fsync_recv_fn = aio_fork_fsync_recv ,
2008-02-21 18:14:08 +03:00
} ;
NTSTATUS vfs_aio_fork_init ( void ) ;
NTSTATUS vfs_aio_fork_init ( void )
{
return smb_register_vfs ( SMB_VFS_INTERFACE_VERSION ,
2009-07-24 04:28:58 +04:00
" aio_fork " , & vfs_aio_fork_fns ) ;
2008-02-21 18:14:08 +03:00
}