2012-01-04 12:54:16 -08:00
/*
* Simulate Posix AIO using pthreads .
*
* Based on the aio_fork work from Volker and Volker ' s pthreadpool library .
*
* Copyright ( C ) Volker Lendecke 2008
* Copyright ( C ) Jeremy Allison 2012
*
* This program is free software ; you can redistribute it and / or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation ; either version 3 of the License , or
* ( at your option ) any later version .
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
* GNU General Public License for more details .
*
* You should have received a copy of the GNU General Public License
* along with this program ; if not , write to the Free Software
* Foundation , Inc . , 675 Mass Ave , Cambridge , MA 0213 9 , USA .
*/
# include "includes.h"
# include "system/filesys.h"
# include "system/shmem.h"
# include "smbd/smbd.h"
2012-04-08 21:47:38 +02:00
# include "smbd/globals.h"
2018-03-09 15:02:04 +01:00
# include "../lib/pthreadpool/pthreadpool_tevent.h"
2012-07-12 10:10:32 -07:00
# ifdef HAVE_LINUX_FALLOC_H
# include <linux/falloc.h>
# endif
2012-01-04 12:54:16 -08:00
2018-06-28 14:28:34 +02:00
# if defined(HAVE_OPENAT) && defined(HAVE_LINUX_THREAD_CREDENTIALS)
2012-01-04 12:54:16 -08:00
2012-07-10 20:59:27 -07:00
/*
* We must have openat ( ) to do any thread - based
* asynchronous opens . We also must be using
* thread - specific credentials ( Linux - only
* for now ) .
*/
struct aio_open_private_data {
struct aio_open_private_data * prev , * next ;
/* Inputs. */
int dir_fd ;
2020-05-20 16:47:05 +02:00
bool opened_dir_fd ;
2012-07-10 20:59:27 -07:00
int flags ;
mode_t mode ;
uint64_t mid ;
bool in_progress ;
2020-05-14 17:08:15 +02:00
struct smb_filename * fsp_name ;
struct smb_filename * smb_fname ;
2019-01-08 10:34:11 +01:00
connection_struct * conn ;
2020-03-04 16:39:39 -08:00
struct smbXsrv_connection * xconn ;
2018-12-23 09:24:51 +01:00
const struct security_unix_token * ux_tok ;
2012-07-12 10:10:32 -07:00
uint64_t initial_allocation_size ;
2012-07-10 20:59:27 -07:00
/* Returns. */
int ret_fd ;
int ret_errno ;
} ;
/* List of outstanding requests we have. */
static struct aio_open_private_data * open_pd_list ;
2019-01-08 10:39:56 +01:00
static void aio_open_do ( struct aio_open_private_data * opd ) ;
2020-03-04 13:47:13 -08:00
static void opd_free ( struct aio_open_private_data * opd ) ;
2019-01-08 10:39:56 +01:00
2012-07-10 20:59:27 -07:00
/************************************************************************
Find the open private data by mid .
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
static struct aio_open_private_data * find_open_private_data_by_mid ( uint64_t mid )
{
struct aio_open_private_data * opd ;
for ( opd = open_pd_list ; opd ! = NULL ; opd = opd - > next ) {
if ( opd - > mid = = mid ) {
return opd ;
}
}
return NULL ;
}
/************************************************************************
Callback when an open completes .
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2018-03-09 15:02:04 +01:00
static void aio_open_handle_completion ( struct tevent_req * subreq )
2012-07-10 20:59:27 -07:00
{
2018-03-09 15:02:04 +01:00
struct aio_open_private_data * opd =
tevent_req_callback_data ( subreq ,
struct aio_open_private_data ) ;
2012-07-10 20:59:27 -07:00
int ret ;
2018-03-09 15:02:04 +01:00
ret = pthreadpool_tevent_job_recv ( subreq ) ;
TALLOC_FREE ( subreq ) ;
2020-03-05 10:22:00 -08:00
/*
* We ' re no longer in flight . Remove the
* destructor used to preserve opd so
* a talloc_free actually removes it .
*/
talloc_set_destructor ( opd , NULL ) ;
if ( opd - > conn = = NULL ) {
/*
* We were shutdown closed in flight . No one
* wants the result , and state has been reparented
* to the NULL context , so just free it so we
* don ' t leak memory .
*/
2020-05-14 17:08:15 +02:00
DBG_NOTICE ( " aio open request for %s abandoned in flight \n " ,
opd - > fsp_name - > base_name ) ;
2020-03-05 10:22:00 -08:00
if ( opd - > ret_fd ! = - 1 ) {
close ( opd - > ret_fd ) ;
opd - > ret_fd = - 1 ;
}
/*
* Find outstanding event and reschedule so the client
* gets an error message return from the open .
*/
schedule_deferred_open_message_smb ( opd - > xconn , opd - > mid ) ;
opd_free ( opd ) ;
return ;
}
2018-03-09 15:02:04 +01:00
if ( ret ! = 0 ) {
2019-01-08 10:39:56 +01:00
bool ok ;
if ( ret ! = EAGAIN ) {
smb_panic ( " aio_open_handle_completion " ) ;
/* notreached. */
return ;
}
/*
* Make sure we run as the user again
*/
2019-07-13 16:17:17 +02:00
ok = change_to_user_and_service ( opd - > conn , opd - > conn - > vuid ) ;
2019-01-08 10:39:56 +01:00
if ( ! ok ) {
smb_panic ( " Can't change to user " ) ;
return ;
}
/*
* If we get EAGAIN from pthreadpool_tevent_job_recv ( ) this
* means the lower level pthreadpool failed to create a new
* thread . Fallback to sync processing in that case to allow
* some progress for the client .
*/
aio_open_do ( opd ) ;
2012-07-10 20:59:27 -07:00
}
2018-03-09 15:02:04 +01:00
DEBUG ( 10 , ( " aio_open_handle_completion: mid %llu "
2020-05-14 17:08:15 +02:00
" for file %s completed \n " ,
2012-07-10 20:59:27 -07:00
( unsigned long long ) opd - > mid ,
2020-05-14 17:08:15 +02:00
opd - > fsp_name - > base_name ) ) ;
2012-07-10 20:59:27 -07:00
opd - > in_progress = false ;
2014-10-20 19:38:09 +00:00
/* Find outstanding event and reschedule. */
2020-03-04 16:39:39 -08:00
if ( ! schedule_deferred_open_message_smb ( opd - > xconn , opd - > mid ) ) {
2012-07-10 20:59:27 -07:00
/*
* Outstanding event didn ' t exist or was
* cancelled . Free up the fd and throw
* away the result .
*/
if ( opd - > ret_fd ! = - 1 ) {
close ( opd - > ret_fd ) ;
opd - > ret_fd = - 1 ;
}
2020-03-04 13:47:13 -08:00
opd_free ( opd ) ;
2012-07-10 20:59:27 -07:00
}
}
/*****************************************************************
The core of the async open code - the worker function . Note we
use the new openat ( ) system call to avoid any problems with
current working directory changes plus we change credentials
on the thread to prevent any security race conditions .
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
static void aio_open_worker ( void * private_data )
{
struct aio_open_private_data * opd =
( struct aio_open_private_data * ) private_data ;
2018-12-23 09:24:51 +01:00
/* Become the correct credential on this thread. */
if ( set_thread_credentials ( opd - > ux_tok - > uid ,
opd - > ux_tok - > gid ,
( size_t ) opd - > ux_tok - > ngroups ,
opd - > ux_tok - > groups ) ! = 0 ) {
opd - > ret_fd = - 1 ;
opd - > ret_errno = errno ;
return ;
}
2019-01-08 10:39:56 +01:00
aio_open_do ( opd ) ;
}
static void aio_open_do ( struct aio_open_private_data * opd )
{
2012-07-10 20:59:27 -07:00
opd - > ret_fd = openat ( opd - > dir_fd ,
2020-05-14 17:08:15 +02:00
opd - > smb_fname - > base_name ,
2012-07-10 20:59:27 -07:00
opd - > flags ,
opd - > mode ) ;
if ( opd - > ret_fd = = - 1 ) {
opd - > ret_errno = errno ;
} else {
/* Create was successful. */
opd - > ret_errno = 0 ;
2012-07-12 10:10:32 -07:00
# if defined(HAVE_LINUX_FALLOCATE)
/*
* See if we can set the initial
* allocation size . We don ' t record
* the return for this as it ' s an
* optimization - the upper layer
* will also do this for us once
* the open returns .
*/
if ( opd - > initial_allocation_size ) {
( void ) fallocate ( opd - > ret_fd ,
FALLOC_FL_KEEP_SIZE ,
0 ,
( off_t ) opd - > initial_allocation_size ) ;
}
# endif
2012-07-10 20:59:27 -07:00
}
}
/************************************************************************
2020-03-04 13:47:13 -08:00
Open private data teardown .
2012-07-10 20:59:27 -07:00
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2020-03-04 13:47:13 -08:00
static void opd_free ( struct aio_open_private_data * opd )
2012-07-10 20:59:27 -07:00
{
2020-05-20 16:47:05 +02:00
if ( opd - > opened_dir_fd & & opd - > dir_fd ! = - 1 ) {
2012-07-10 20:59:27 -07:00
close ( opd - > dir_fd ) ;
}
DLIST_REMOVE ( open_pd_list , opd ) ;
2020-03-04 13:47:13 -08:00
TALLOC_FREE ( opd ) ;
2012-07-10 20:59:27 -07:00
}
/************************************************************************
Create and initialize a private data struct for async open .
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2020-05-20 16:43:11 +02:00
static struct aio_open_private_data * create_private_open_data (
TALLOC_CTX * ctx ,
2020-05-20 16:47:05 +02:00
const struct files_struct * dirfsp ,
2020-05-20 16:43:11 +02:00
const struct smb_filename * smb_fname ,
const files_struct * fsp ,
int flags ,
mode_t mode )
2012-07-10 20:59:27 -07:00
{
2020-03-06 09:30:26 -08:00
struct aio_open_private_data * opd = talloc_zero ( ctx ,
2012-07-10 20:59:27 -07:00
struct aio_open_private_data ) ;
if ( ! opd ) {
return NULL ;
}
2019-01-08 10:32:16 +01:00
* opd = ( struct aio_open_private_data ) {
. dir_fd = - 1 ,
. ret_fd = - 1 ,
. ret_errno = EINPROGRESS ,
. flags = flags ,
. mode = mode ,
. mid = fsp - > mid ,
. in_progress = true ,
2019-01-08 10:34:11 +01:00
. conn = fsp - > conn ,
2020-03-04 16:39:39 -08:00
/*
* TODO : In future we need a proper algorithm
* to find the correct connection for a fsp .
* For now we only have one connection , so this is correct . . .
*/
. xconn = fsp - > conn - > sconn - > client - > connections ,
2019-01-08 10:32:16 +01:00
. initial_allocation_size = fsp - > initial_allocation_size ,
} ;
2012-07-10 20:59:27 -07:00
2018-12-23 09:24:51 +01:00
/* Copy our current credentials. */
opd - > ux_tok = copy_unix_token ( opd , get_current_utok ( fsp - > conn ) ) ;
if ( opd - > ux_tok = = NULL ) {
2020-03-04 13:47:13 -08:00
opd_free ( opd ) ;
2018-12-23 09:24:51 +01:00
return NULL ;
}
2012-07-10 20:59:27 -07:00
/*
2020-05-14 17:08:15 +02:00
* Copy the full fsp_name and smb_fname which is the basename .
2012-07-10 20:59:27 -07:00
*/
2020-05-14 17:08:15 +02:00
opd - > smb_fname = cp_smb_filename ( opd , smb_fname ) ;
if ( opd - > smb_fname = = NULL ) {
opd_free ( opd ) ;
return NULL ;
}
opd - > fsp_name = cp_smb_filename ( opd , fsp - > fsp_name ) ;
if ( opd - > fsp_name = = NULL ) {
2020-03-04 13:47:13 -08:00
opd_free ( opd ) ;
2012-07-10 20:59:27 -07:00
return NULL ;
}
2020-09-26 21:52:52 +02:00
if ( fsp_get_io_fd ( dirfsp ) ! = AT_FDCWD ) {
opd - > dir_fd = fsp_get_io_fd ( dirfsp ) ;
2020-05-20 16:47:05 +02:00
} else {
2012-07-10 20:59:27 -07:00
# if defined(O_DIRECTORY)
2020-05-20 16:47:05 +02:00
opd - > dir_fd = open ( " . " , O_RDONLY | O_DIRECTORY ) ;
2012-07-10 20:59:27 -07:00
# else
2020-05-20 16:47:05 +02:00
opd - > dir_fd = open ( " . " , O_RDONLY ) ;
2012-07-10 20:59:27 -07:00
# endif
2020-05-20 16:47:05 +02:00
opd - > opened_dir_fd = true ;
}
2012-07-10 20:59:27 -07:00
if ( opd - > dir_fd = = - 1 ) {
2020-03-04 13:47:13 -08:00
opd_free ( opd ) ;
2012-07-10 20:59:27 -07:00
return NULL ;
}
2016-02-05 11:32:18 +01:00
DLIST_ADD_END ( open_pd_list , opd ) ;
2012-07-10 20:59:27 -07:00
return opd ;
}
2020-03-05 10:22:00 -08:00
static int opd_inflight_destructor ( struct aio_open_private_data * opd )
{
/*
* Setting conn to NULL allows us to
* discover the connection was torn
* down which kills the fsp that owns
* opd .
*/
2020-05-14 17:08:15 +02:00
DBG_NOTICE ( " aio open request for %s cancelled \n " ,
opd - > fsp_name - > base_name ) ;
2020-03-05 10:22:00 -08:00
opd - > conn = NULL ;
/* Don't let opd go away. */
return - 1 ;
}
2012-07-10 20:59:27 -07:00
/*****************************************************************
Setup an async open .
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2020-05-20 16:47:05 +02:00
static int open_async ( const struct files_struct * dirfsp ,
const struct smb_filename * smb_fname ,
2020-05-14 16:50:40 +02:00
const files_struct * fsp ,
int flags ,
mode_t mode )
2012-07-10 20:59:27 -07:00
{
struct aio_open_private_data * opd = NULL ;
2018-03-09 15:02:04 +01:00
struct tevent_req * subreq = NULL ;
2012-07-10 20:59:27 -07:00
2020-03-05 10:22:00 -08:00
/*
* Allocate off fsp - > conn , not NULL or fsp . As we ' re going
* async fsp will get talloc_free ' d when we return
* EINPROGRESS / NT_STATUS_MORE_PROCESSING_REQUIRED . A new fsp
* pointer gets allocated on every re - run of the
* open code path . Allocating on fsp - > conn instead
* of NULL allows use to get notified via destructor
* if the conn is force - closed or we shutdown .
* opd is always safely freed in all codepath so no
* memory leaks .
*/
2020-05-20 16:44:15 +02:00
opd = create_private_open_data ( fsp - > conn ,
2020-05-20 16:47:05 +02:00
dirfsp ,
2020-05-20 16:44:15 +02:00
smb_fname ,
fsp ,
flags ,
mode ) ;
2012-07-10 20:59:27 -07:00
if ( opd = = NULL ) {
DEBUG ( 10 , ( " open_async: Could not create private data. \n " ) ) ;
return - 1 ;
}
2018-12-23 09:24:51 +01:00
subreq = pthreadpool_tevent_job_send ( opd ,
2018-12-27 15:19:20 +01:00
fsp - > conn - > sconn - > ev_ctx ,
2018-12-23 09:34:20 +01:00
fsp - > conn - > sconn - > pool ,
2018-12-23 09:24:51 +01:00
aio_open_worker , opd ) ;
2018-03-09 15:02:04 +01:00
if ( subreq = = NULL ) {
2020-03-04 13:47:13 -08:00
opd_free ( opd ) ;
2012-07-10 20:59:27 -07:00
return - 1 ;
}
2018-03-09 15:02:04 +01:00
tevent_req_set_callback ( subreq , aio_open_handle_completion , opd ) ;
2012-07-10 20:59:27 -07:00
2020-05-14 17:08:15 +02:00
DEBUG ( 5 , ( " open_async: mid %llu created for file %s \n " ,
2012-07-10 20:59:27 -07:00
( unsigned long long ) opd - > mid ,
2020-05-14 17:08:15 +02:00
opd - > fsp_name - > base_name ) ) ;
2012-07-10 20:59:27 -07:00
2020-03-05 10:22:00 -08:00
/*
* Add a destructor to protect us from connection
* teardown whilst the open thread is in flight .
*/
talloc_set_destructor ( opd , opd_inflight_destructor ) ;
2012-07-10 20:59:27 -07:00
/* Cause the calling code to reschedule us. */
2020-02-20 14:13:35 +01:00
errno = EINPROGRESS ; /* Maps to NT_STATUS_MORE_PROCESSING_REQUIRED. */
2012-07-10 20:59:27 -07:00
return - 1 ;
}
/*****************************************************************
Look for a matching SMB2 mid . If we find it we ' re rescheduled ,
just return the completed open .
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
static bool find_completed_open ( files_struct * fsp ,
int * p_fd ,
int * p_errno )
{
struct aio_open_private_data * opd ;
opd = find_open_private_data_by_mid ( fsp - > mid ) ;
if ( ! opd ) {
return false ;
}
if ( opd - > in_progress ) {
DEBUG ( 0 , ( " find_completed_open: mid %llu "
2018-03-09 15:02:04 +01:00
" still in progress for "
2020-05-14 17:08:15 +02:00
" file %s. PANIC ! \n " ,
2012-07-10 20:59:27 -07:00
( unsigned long long ) opd - > mid ,
2020-05-14 17:08:15 +02:00
opd - > fsp_name - > base_name ) ) ;
2012-07-10 20:59:27 -07:00
/* Disaster ! This is an open timeout. Just panic. */
smb_panic ( " find_completed_open - in_progress \n " ) ;
/* notreached. */
return false ;
}
* p_fd = opd - > ret_fd ;
* p_errno = opd - > ret_errno ;
DEBUG ( 5 , ( " find_completed_open: mid %llu returning "
" fd = %d, errno = %d (%s) "
2018-03-09 15:02:04 +01:00
" for file %s \n " ,
2012-07-10 20:59:27 -07:00
( unsigned long long ) opd - > mid ,
opd - > ret_fd ,
opd - > ret_errno ,
strerror ( opd - > ret_errno ) ,
smb_fname_str_dbg ( fsp - > fsp_name ) ) ) ;
/* Now we can free the opd. */
2020-03-04 13:47:13 -08:00
opd_free ( opd ) ;
2012-07-10 20:59:27 -07:00
return true ;
}
/*****************************************************************
The core open function . Only go async on O_CREAT | O_EXCL
opens to prevent any race conditions .
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2020-05-20 22:58:29 +02:00
static int aio_pthread_openat_fn ( vfs_handle_struct * handle ,
const struct files_struct * dirfsp ,
const struct smb_filename * smb_fname ,
struct files_struct * fsp ,
int flags ,
mode_t mode )
{
int my_errno = 0 ;
int fd = - 1 ;
bool aio_allow_open = lp_parm_bool (
SNUM ( handle - > conn ) , " aio_pthread " , " aio open " , false ) ;
if ( smb_fname - > stream_name ! = NULL ) {
/* Don't handle stream opens. */
errno = ENOENT ;
return - 1 ;
}
2021-02-26 12:31:29 +01:00
if ( fsp - > conn - > sconn - > client - > server_multi_channel_enabled ) {
/*
* This module is not compatible with multi channel yet .
*/
aio_allow_open = false ;
}
2020-05-20 22:58:29 +02:00
if ( ! aio_allow_open ) {
/* aio opens turned off. */
2020-09-26 21:52:52 +02:00
return openat ( fsp_get_io_fd ( dirfsp ) ,
2020-05-20 22:58:29 +02:00
smb_fname - > base_name ,
flags ,
mode ) ;
}
if ( ! ( flags & O_CREAT ) ) {
/* Only creates matter. */
2020-09-26 21:52:52 +02:00
return openat ( fsp_get_io_fd ( dirfsp ) ,
2020-05-20 22:58:29 +02:00
smb_fname - > base_name ,
flags ,
mode ) ;
}
if ( ! ( flags & O_EXCL ) ) {
/* Only creates with O_EXCL matter. */
2020-09-26 21:52:52 +02:00
return openat ( fsp_get_io_fd ( dirfsp ) ,
2020-05-20 22:58:29 +02:00
smb_fname - > base_name ,
flags ,
mode ) ;
}
/*
* See if this is a reentrant call - i . e . is this a
* restart of an existing open that just completed .
*/
if ( find_completed_open ( fsp ,
& fd ,
& my_errno ) ) {
errno = my_errno ;
return fd ;
}
/* Ok, it's a create exclusive call - pass it to a thread helper. */
return open_async ( dirfsp , smb_fname , fsp , flags , mode ) ;
}
2012-07-10 20:59:27 -07:00
# endif
2012-01-04 12:54:16 -08:00
static struct vfs_fn_pointers vfs_aio_pthread_fns = {
2018-06-28 14:28:34 +02:00
# if defined(HAVE_OPENAT) && defined(HAVE_LINUX_THREAD_CREDENTIALS)
2020-05-20 22:58:29 +02:00
. openat_fn = aio_pthread_openat_fn ,
2012-07-10 20:59:27 -07:00
# endif
2012-01-04 12:54:16 -08:00
} ;
2017-12-15 15:32:12 -07:00
static_decl_vfs ;
2017-04-20 12:24:43 -07:00
NTSTATUS vfs_aio_pthread_init ( TALLOC_CTX * ctx )
2012-01-04 12:54:16 -08:00
{
return smb_register_vfs ( SMB_VFS_INTERFACE_VERSION ,
" aio_pthread " , & vfs_aio_pthread_fns ) ;
}