2007-01-05 12:35:49 +03:00
/*
Unix SMB / CIFS implementation .
main select loop and event handling - epoll implementation
Copyright ( C ) Andrew Tridgell 2003 - 2005
2013-02-15 01:50:56 +04:00
Copyright ( C ) Stefan Metzmacher 2005 - 2013
Copyright ( C ) Jeremy Allison 2013
2009-02-16 10:52:06 +03:00
* * NOTE ! The following LGPL license applies to the tevent
* * library . This does NOT imply that all of Samba is released
* * under the LGPL
This library is free software ; you can redistribute it and / or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation ; either
version 3 of the License , or ( at your option ) any later version .
This library is distributed in the hope that it will be useful ,
2007-01-05 12:35:49 +03:00
but WITHOUT ANY WARRANTY ; without even the implied warranty of
2009-02-16 10:52:06 +03:00
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the GNU
Lesser General Public License for more details .
You should have received a copy of the GNU Lesser General Public
License along with this library ; if not , see < http : //www.gnu.org/licenses/>.
2007-01-05 12:35:49 +03:00
*/
2008-04-25 01:28:30 +04:00
# include "replace.h"
2007-01-05 12:35:49 +03:00
# include "system/filesys.h"
2009-01-02 14:53:05 +03:00
# include "system/select.h"
2008-12-16 21:57:09 +03:00
# include "tevent.h"
# include "tevent_internal.h"
# include "tevent_util.h"
2007-01-05 12:35:49 +03:00
struct epoll_event_context {
/* a pointer back to the generic event_context */
2008-12-29 22:24:57 +03:00
struct tevent_context * ev ;
2007-01-05 12:35:49 +03:00
/* when using epoll this is the handle from epoll_create */
int epoll_fd ;
2007-05-17 06:38:07 +04:00
pid_t pid ;
2013-02-11 22:43:39 +04:00
2013-02-15 19:33:56 +04:00
bool panic_force_replay ;
bool * panic_state ;
2013-02-11 22:43:39 +04:00
bool ( * panic_fallback ) ( struct tevent_context * ev , bool replay ) ;
2007-01-05 12:35:49 +03:00
} ;
2013-02-20 14:26:00 +04:00
# define EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT (1<<0)
# define EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR (1<<1)
# define EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR (1<<2)
2013-02-15 01:50:56 +04:00
# define EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX (1<<3)
2013-02-20 14:26:00 +04:00
2013-02-11 22:52:30 +04:00
# ifdef TEST_PANIC_FALLBACK
2013-02-15 15:10:26 +04:00
static int epoll_create_panic_fallback ( struct epoll_event_context * epoll_ev ,
int size )
{
if ( epoll_ev - > panic_fallback = = NULL ) {
return epoll_create ( size ) ;
}
/* 50% of the time, fail... */
if ( ( random ( ) % 2 ) = = 0 ) {
errno = EINVAL ;
return - 1 ;
}
return epoll_create ( size ) ;
}
static int epoll_ctl_panic_fallback ( struct epoll_event_context * epoll_ev ,
int epfd , int op , int fd ,
struct epoll_event * event )
{
if ( epoll_ev - > panic_fallback = = NULL ) {
return epoll_ctl ( epfd , op , fd , event ) ;
}
/* 50% of the time, fail... */
if ( ( random ( ) % 2 ) = = 0 ) {
errno = EINVAL ;
return - 1 ;
}
return epoll_ctl ( epfd , op , fd , event ) ;
}
static int epoll_wait_panic_fallback ( struct epoll_event_context * epoll_ev ,
int epfd ,
struct epoll_event * events ,
int maxevents ,
int timeout )
2013-02-11 22:52:30 +04:00
{
2013-02-15 15:10:26 +04:00
if ( epoll_ev - > panic_fallback = = NULL ) {
return epoll_wait ( epfd , events , maxevents , timeout ) ;
}
2013-02-11 22:52:30 +04:00
/* 50% of the time, fail... */
if ( ( random ( ) % 2 ) = = 0 ) {
errno = EINVAL ;
return - 1 ;
}
return epoll_wait ( epfd , events , maxevents , timeout ) ;
}
2013-02-15 15:10:26 +04:00
# define epoll_create(_size) \
epoll_create_panic_fallback ( epoll_ev , _size )
# define epoll_ctl(_epfd, _op, _fd, _event) \
epoll_ctl_panic_fallback ( epoll_ev , _epfd , _op , _fd , _event )
# define epoll_wait(_epfd, _events, _maxevents, _timeout) \
epoll_wait_panic_fallback ( epoll_ev , _epfd , _events , _maxevents , _timeout )
2013-02-11 22:52:30 +04:00
# endif
2013-02-11 22:43:39 +04:00
/*
called to set the panic fallback function .
*/
2013-12-11 21:58:41 +04:00
_PRIVATE_ void tevent_epoll_set_panic_fallback ( struct tevent_context * ev ,
2013-02-11 22:43:39 +04:00
bool ( * panic_fallback ) ( struct tevent_context * ev ,
bool replay ) )
{
2013-12-05 11:47:27 +04:00
struct epoll_event_context * epoll_ev =
talloc_get_type_abort ( ev - > additional_data ,
struct epoll_event_context ) ;
2013-02-11 22:43:39 +04:00
epoll_ev - > panic_fallback = panic_fallback ;
}
2007-01-05 12:35:49 +03:00
/*
2011-02-17 18:45:46 +03:00
called when a epoll call fails
2007-01-05 12:35:49 +03:00
*/
2013-02-14 13:58:55 +04:00
static void epoll_panic ( struct epoll_event_context * epoll_ev ,
const char * reason , bool replay )
2007-01-05 12:35:49 +03:00
{
2013-02-11 22:48:02 +04:00
struct tevent_context * ev = epoll_ev - > ev ;
2013-02-15 19:33:56 +04:00
bool ( * panic_fallback ) ( struct tevent_context * ev , bool replay ) ;
2013-02-11 22:48:02 +04:00
2013-02-15 19:33:56 +04:00
panic_fallback = epoll_ev - > panic_fallback ;
if ( epoll_ev - > panic_state ! = NULL ) {
* epoll_ev - > panic_state = true ;
}
if ( epoll_ev - > panic_force_replay ) {
replay = true ;
}
TALLOC_FREE ( ev - > additional_data ) ;
if ( panic_fallback = = NULL ) {
2013-02-11 22:48:02 +04:00
tevent_debug ( ev , TEVENT_DEBUG_FATAL ,
" %s (%s) replay[%u] - calling abort() \n " ,
reason , strerror ( errno ) , ( unsigned ) replay ) ;
abort ( ) ;
}
2013-03-01 17:30:54 +04:00
tevent_debug ( ev , TEVENT_DEBUG_ERROR ,
2013-02-11 22:48:02 +04:00
" %s (%s) replay[%u] - calling panic_fallback \n " ,
reason , strerror ( errno ) , ( unsigned ) replay ) ;
2013-02-15 19:33:56 +04:00
if ( ! panic_fallback ( ev , replay ) ) {
2013-02-11 22:48:02 +04:00
/* Fallback failed. */
tevent_debug ( ev , TEVENT_DEBUG_FATAL ,
" %s (%s) replay[%u] - calling abort() \n " ,
reason , strerror ( errno ) , ( unsigned ) replay ) ;
abort ( ) ;
}
2007-01-05 12:35:49 +03:00
}
/*
2009-01-02 18:25:29 +03:00
map from TEVENT_FD_ * to EPOLLIN / EPOLLOUT
2007-01-05 12:35:49 +03:00
*/
static uint32_t epoll_map_flags ( uint16_t flags )
{
uint32_t ret = 0 ;
2009-01-02 18:25:29 +03:00
if ( flags & TEVENT_FD_READ ) ret | = ( EPOLLIN | EPOLLERR | EPOLLHUP ) ;
if ( flags & TEVENT_FD_WRITE ) ret | = ( EPOLLOUT | EPOLLERR | EPOLLHUP ) ;
2007-01-05 12:35:49 +03:00
return ret ;
}
/*
free the epoll fd
*/
static int epoll_ctx_destructor ( struct epoll_event_context * epoll_ev )
{
close ( epoll_ev - > epoll_fd ) ;
epoll_ev - > epoll_fd = - 1 ;
return 0 ;
}
/*
init the epoll fd
*/
2007-08-17 09:21:05 +04:00
static int epoll_init_ctx ( struct epoll_event_context * epoll_ev )
2007-01-05 12:35:49 +03:00
{
epoll_ev - > epoll_fd = epoll_create ( 64 ) ;
2007-08-17 09:21:05 +04:00
if ( epoll_ev - > epoll_fd = = - 1 ) {
2011-08-11 14:30:48 +04:00
tevent_debug ( epoll_ev - > ev , TEVENT_DEBUG_FATAL ,
" Failed to create epoll handle. \n " ) ;
2007-08-17 09:21:05 +04:00
return - 1 ;
}
2011-08-11 14:30:48 +04:00
if ( ! ev_set_close_on_exec ( epoll_ev - > epoll_fd ) ) {
tevent_debug ( epoll_ev - > ev , TEVENT_DEBUG_WARNING ,
" Failed to set close-on-exec, file descriptor may be leaked to children. \n " ) ;
}
epoll_ev - > pid = getpid ( ) ;
talloc_set_destructor ( epoll_ev , epoll_ctx_destructor ) ;
2007-08-17 09:21:05 +04:00
return 0 ;
2007-01-05 12:35:49 +03:00
}
2013-02-20 14:26:00 +04:00
static void epoll_update_event ( struct epoll_event_context * epoll_ev , struct tevent_fd * fde ) ;
2007-05-17 06:38:07 +04:00
/*
reopen the epoll handle when our pid changes
2015-11-17 20:13:41 +03:00
see http : //junkcode.samba.org/ftp/unpacked/junkcode/epoll_fork.c for an
2007-05-17 06:38:07 +04:00
demonstration of why this is needed
*/
2007-05-17 11:52:33 +04:00
static void epoll_check_reopen ( struct epoll_event_context * epoll_ev )
2007-05-17 06:38:07 +04:00
{
2008-12-29 22:24:57 +03:00
struct tevent_fd * fde ;
2013-02-15 19:33:56 +04:00
bool * caller_panic_state = epoll_ev - > panic_state ;
bool panic_triggered = false ;
2007-05-17 06:38:07 +04:00
2007-05-17 11:52:33 +04:00
if ( epoll_ev - > pid = = getpid ( ) ) {
return ;
}
2007-05-17 06:38:07 +04:00
close ( epoll_ev - > epoll_fd ) ;
epoll_ev - > epoll_fd = epoll_create ( 64 ) ;
if ( epoll_ev - > epoll_fd = = - 1 ) {
2013-02-15 19:33:56 +04:00
epoll_panic ( epoll_ev , " epoll_create() failed " , false ) ;
2007-05-17 06:38:07 +04:00
return ;
}
2011-08-11 14:30:48 +04:00
if ( ! ev_set_close_on_exec ( epoll_ev - > epoll_fd ) ) {
tevent_debug ( epoll_ev - > ev , TEVENT_DEBUG_WARNING ,
" Failed to set close-on-exec, file descriptor may be leaked to children. \n " ) ;
}
2007-05-17 06:38:07 +04:00
epoll_ev - > pid = getpid ( ) ;
2013-02-15 19:33:56 +04:00
epoll_ev - > panic_state = & panic_triggered ;
2009-01-05 19:36:50 +03:00
for ( fde = epoll_ev - > ev - > fd_events ; fde ; fde = fde - > next ) {
2013-02-20 14:26:00 +04:00
fde - > additional_flags & = ~ EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT ;
epoll_update_event ( epoll_ev , fde ) ;
2013-02-15 19:33:56 +04:00
if ( panic_triggered ) {
if ( caller_panic_state ! = NULL ) {
* caller_panic_state = true ;
}
return ;
}
2007-05-17 06:38:07 +04:00
}
2013-02-15 19:33:56 +04:00
epoll_ev - > panic_state = NULL ;
2007-05-17 06:38:07 +04:00
}
2013-02-15 01:50:56 +04:00
/*
epoll cannot add the same file descriptor twice , once
with read , once with write which is allowed by the
tevent backend . Multiplex the existing fde , flag it
as such so we can search for the correct fde on
event triggering .
*/
static int epoll_add_multiplex_fd ( struct epoll_event_context * epoll_ev ,
struct tevent_fd * add_fde )
{
struct epoll_event event ;
struct tevent_fd * mpx_fde ;
int ret ;
/* Find the existing fde that caused the EEXIST error. */
for ( mpx_fde = epoll_ev - > ev - > fd_events ; mpx_fde ; mpx_fde = mpx_fde - > next ) {
if ( mpx_fde - > fd ! = add_fde - > fd ) {
continue ;
}
if ( mpx_fde = = add_fde ) {
continue ;
}
break ;
}
if ( mpx_fde = = NULL ) {
tevent_debug ( epoll_ev - > ev , TEVENT_DEBUG_FATAL ,
" can't find multiplex fde for fd[%d] " ,
add_fde - > fd ) ;
return - 1 ;
}
if ( mpx_fde - > additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX ) {
/* Logic error. Can't have more than 2 multiplexed fde's. */
tevent_debug ( epoll_ev - > ev , TEVENT_DEBUG_FATAL ,
" multiplex fde for fd[%d] is already multiplexed \n " ,
mpx_fde - > fd ) ;
return - 1 ;
}
/*
* The multiplex fde must have the same fd , and also
* already have an epoll event attached .
*/
if ( ! ( mpx_fde - > additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT ) ) {
/* Logic error. Can't have more than 2 multiplexed fde's. */
tevent_debug ( epoll_ev - > ev , TEVENT_DEBUG_FATAL ,
" multiplex fde for fd[%d] has no event \n " ,
mpx_fde - > fd ) ;
return - 1 ;
}
/* Modify the mpx_fde to add in the new flags. */
ZERO_STRUCT ( event ) ;
event . events = epoll_map_flags ( mpx_fde - > flags ) ;
event . events | = epoll_map_flags ( add_fde - > flags ) ;
event . data . ptr = mpx_fde ;
ret = epoll_ctl ( epoll_ev - > epoll_fd , EPOLL_CTL_MOD , mpx_fde - > fd , & event ) ;
if ( ret ! = 0 & & errno = = EBADF ) {
tevent_debug ( epoll_ev - > ev , TEVENT_DEBUG_ERROR ,
" EPOLL_CTL_MOD EBADF for "
" add_fde[%p] mpx_fde[%p] fd[%d] - disabling \n " ,
add_fde , mpx_fde , add_fde - > fd ) ;
DLIST_REMOVE ( epoll_ev - > ev - > fd_events , mpx_fde ) ;
mpx_fde - > event_ctx = NULL ;
DLIST_REMOVE ( epoll_ev - > ev - > fd_events , add_fde ) ;
add_fde - > event_ctx = NULL ;
return 0 ;
} else if ( ret ! = 0 ) {
return ret ;
}
/*
* Make each fde - > additional_data pointers point at each other
* so we can look them up from each other . They are now paired .
*/
mpx_fde - > additional_data = ( struct tevent_fd * ) add_fde ;
add_fde - > additional_data = ( struct tevent_fd * ) mpx_fde ;
/* Now flag both fde's as being multiplexed. */
mpx_fde - > additional_flags | = EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX ;
add_fde - > additional_flags | = EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX ;
/* we need to keep the GOT_ERROR flag */
if ( mpx_fde - > additional_flags & EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR ) {
add_fde - > additional_flags | = EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR ;
}
return 0 ;
}
2007-01-05 12:35:49 +03:00
/*
add the epoll event to the given fd_event
*/
2008-12-29 22:24:57 +03:00
static void epoll_add_event ( struct epoll_event_context * epoll_ev , struct tevent_fd * fde )
2007-01-05 12:35:49 +03:00
{
struct epoll_event event ;
2013-02-20 16:12:27 +04:00
int ret ;
2013-02-20 20:14:52 +04:00
struct tevent_fd * mpx_fde = NULL ;
2007-05-17 06:38:07 +04:00
2013-02-20 20:38:10 +04:00
fde - > additional_flags & = ~ EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT ;
2007-01-05 12:35:49 +03:00
fde - > additional_flags & = ~ EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR ;
2013-02-20 20:14:52 +04:00
if ( fde - > additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX ) {
/*
* This is a multiplexed fde , we need to include both
* flags in the modified event .
*/
mpx_fde = talloc_get_type_abort ( fde - > additional_data ,
struct tevent_fd ) ;
mpx_fde - > additional_flags & = ~ EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT ;
mpx_fde - > additional_flags & = ~ EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR ;
}
2007-01-05 12:35:49 +03:00
ZERO_STRUCT ( event ) ;
event . events = epoll_map_flags ( fde - > flags ) ;
2013-02-20 20:14:52 +04:00
if ( mpx_fde ! = NULL ) {
event . events | = epoll_map_flags ( mpx_fde - > flags ) ;
}
2007-01-05 12:35:49 +03:00
event . data . ptr = fde ;
2013-02-20 16:12:27 +04:00
ret = epoll_ctl ( epoll_ev - > epoll_fd , EPOLL_CTL_ADD , fde - > fd , & event ) ;
2013-02-20 16:33:19 +04:00
if ( ret ! = 0 & & errno = = EBADF ) {
tevent_debug ( epoll_ev - > ev , TEVENT_DEBUG_ERROR ,
" EPOLL_CTL_ADD EBADF for "
2013-02-20 20:14:52 +04:00
" fde[%p] mpx_fde[%p] fd[%d] - disabling \n " ,
fde , mpx_fde , fde - > fd ) ;
2013-02-20 16:33:19 +04:00
DLIST_REMOVE ( epoll_ev - > ev - > fd_events , fde ) ;
fde - > event_ctx = NULL ;
2013-02-20 20:14:52 +04:00
if ( mpx_fde ! = NULL ) {
DLIST_REMOVE ( epoll_ev - > ev - > fd_events , mpx_fde ) ;
mpx_fde - > event_ctx = NULL ;
}
2013-02-20 16:33:19 +04:00
return ;
2013-02-15 01:52:41 +04:00
} else if ( ret ! = 0 & & errno = = EEXIST & & mpx_fde = = NULL ) {
ret = epoll_add_multiplex_fd ( epoll_ev , fde ) ;
if ( ret ! = 0 ) {
epoll_panic ( epoll_ev , " epoll_add_multiplex_fd failed " ,
false ) ;
return ;
}
2013-02-20 16:33:19 +04:00
} else if ( ret ! = 0 ) {
2013-02-14 13:58:55 +04:00
epoll_panic ( epoll_ev , " EPOLL_CTL_ADD failed " , false ) ;
2013-02-11 22:38:01 +04:00
return ;
2007-01-05 12:35:49 +03:00
}
2013-02-20 20:38:10 +04:00
fde - > additional_flags | = EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT ;
2007-01-05 12:35:49 +03:00
/* only if we want to read we want to tell the event handler about errors */
2009-01-02 18:25:29 +03:00
if ( fde - > flags & TEVENT_FD_READ ) {
2007-01-05 12:35:49 +03:00
fde - > additional_flags | = EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR ;
}
2013-02-20 20:14:52 +04:00
if ( mpx_fde = = NULL ) {
return ;
}
mpx_fde - > additional_flags | = EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT ;
/* only if we want to read we want to tell the event handler about errors */
if ( mpx_fde - > flags & TEVENT_FD_READ ) {
mpx_fde - > additional_flags | = EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR ;
}
2007-01-05 12:35:49 +03:00
}
/*
delete the epoll event for given fd_event
*/
2008-12-29 22:24:57 +03:00
static void epoll_del_event ( struct epoll_event_context * epoll_ev , struct tevent_fd * fde )
2007-01-05 12:35:49 +03:00
{
struct epoll_event event ;
2013-02-20 16:12:27 +04:00
int ret ;
2013-02-15 22:16:44 +04:00
struct tevent_fd * mpx_fde = NULL ;
2007-05-17 06:38:07 +04:00
2013-02-20 20:38:10 +04:00
fde - > additional_flags & = ~ EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT ;
2007-01-05 12:35:49 +03:00
fde - > additional_flags & = ~ EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR ;
2013-02-15 22:16:44 +04:00
if ( fde - > additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX ) {
/*
* This is a multiplexed fde , we need to modify both events .
*/
mpx_fde = talloc_get_type_abort ( fde - > additional_data ,
struct tevent_fd ) ;
mpx_fde - > additional_flags & = ~ EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT ;
mpx_fde - > additional_flags & = ~ EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR ;
}
2007-01-05 12:35:49 +03:00
ZERO_STRUCT ( event ) ;
2013-02-20 16:12:27 +04:00
ret = epoll_ctl ( epoll_ev - > epoll_fd , EPOLL_CTL_DEL , fde - > fd , & event ) ;
2013-02-15 13:31:36 +04:00
if ( ret ! = 0 & & errno = = ENOENT ) {
/*
* This can happen after a epoll_check_reopen
* within epoll_event_fd_destructor .
*/
tevent_debug ( epoll_ev - > ev , TEVENT_DEBUG_TRACE ,
" EPOLL_CTL_DEL ignoring ENOENT for fd[%d] \n " ,
fde - > fd ) ;
2013-02-20 20:38:10 +04:00
return ;
2013-02-20 16:33:19 +04:00
} else if ( ret ! = 0 & & errno = = EBADF ) {
tevent_debug ( epoll_ev - > ev , TEVENT_DEBUG_WARNING ,
" EPOLL_CTL_DEL EBADF for "
2013-02-15 22:16:44 +04:00
" fde[%p] mpx_fde[%p] fd[%d] - disabling \n " ,
fde , mpx_fde , fde - > fd ) ;
2013-02-20 16:33:19 +04:00
DLIST_REMOVE ( epoll_ev - > ev - > fd_events , fde ) ;
fde - > event_ctx = NULL ;
2013-02-15 22:16:44 +04:00
if ( mpx_fde ! = NULL ) {
DLIST_REMOVE ( epoll_ev - > ev - > fd_events , mpx_fde ) ;
mpx_fde - > event_ctx = NULL ;
}
2013-02-20 16:33:19 +04:00
return ;
2013-02-15 13:31:36 +04:00
} else if ( ret ! = 0 ) {
epoll_panic ( epoll_ev , " EPOLL_CTL_DEL failed " , false ) ;
return ;
2007-05-14 04:57:48 +04:00
}
2007-01-05 12:35:49 +03:00
}
/*
change the epoll event to the given fd_event
*/
2008-12-29 22:24:57 +03:00
static void epoll_mod_event ( struct epoll_event_context * epoll_ev , struct tevent_fd * fde )
2007-01-05 12:35:49 +03:00
{
2013-03-01 16:57:05 +04:00
struct tevent_fd * mpx_fde = NULL ;
2007-01-05 12:35:49 +03:00
struct epoll_event event ;
2013-02-20 16:12:27 +04:00
int ret ;
2007-01-05 12:35:49 +03:00
2013-02-20 20:38:10 +04:00
fde - > additional_flags & = ~ EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT ;
2007-01-05 12:35:49 +03:00
fde - > additional_flags & = ~ EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR ;
2013-03-01 16:57:05 +04:00
if ( fde - > additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX ) {
/*
* This is a multiplexed fde , we need to include both
* flags in the modified event .
*/
mpx_fde = talloc_get_type_abort ( fde - > additional_data ,
struct tevent_fd ) ;
mpx_fde - > additional_flags & = ~ EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT ;
mpx_fde - > additional_flags & = ~ EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR ;
}
2007-01-05 12:35:49 +03:00
ZERO_STRUCT ( event ) ;
event . events = epoll_map_flags ( fde - > flags ) ;
2013-03-01 16:57:05 +04:00
if ( mpx_fde ! = NULL ) {
event . events | = epoll_map_flags ( mpx_fde - > flags ) ;
}
2007-01-05 12:35:49 +03:00
event . data . ptr = fde ;
2013-02-20 16:12:27 +04:00
ret = epoll_ctl ( epoll_ev - > epoll_fd , EPOLL_CTL_MOD , fde - > fd , & event ) ;
2013-02-20 16:33:19 +04:00
if ( ret ! = 0 & & errno = = EBADF ) {
tevent_debug ( epoll_ev - > ev , TEVENT_DEBUG_ERROR ,
" EPOLL_CTL_MOD EBADF for "
2013-03-01 16:57:05 +04:00
" fde[%p] mpx_fde[%p] fd[%d] - disabling \n " ,
fde , mpx_fde , fde - > fd ) ;
2013-02-20 16:33:19 +04:00
DLIST_REMOVE ( epoll_ev - > ev - > fd_events , fde ) ;
fde - > event_ctx = NULL ;
2013-03-01 16:57:05 +04:00
if ( mpx_fde ! = NULL ) {
DLIST_REMOVE ( epoll_ev - > ev - > fd_events , mpx_fde ) ;
mpx_fde - > event_ctx = NULL ;
}
2013-02-20 16:33:19 +04:00
return ;
} else if ( ret ! = 0 ) {
2013-02-14 13:58:55 +04:00
epoll_panic ( epoll_ev , " EPOLL_CTL_MOD failed " , false ) ;
2013-02-11 22:38:01 +04:00
return ;
2007-01-05 12:35:49 +03:00
}
2013-02-20 20:38:10 +04:00
fde - > additional_flags | = EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT ;
2007-01-05 12:35:49 +03:00
/* only if we want to read we want to tell the event handler about errors */
2009-01-02 18:25:29 +03:00
if ( fde - > flags & TEVENT_FD_READ ) {
2007-01-05 12:35:49 +03:00
fde - > additional_flags | = EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR ;
}
2013-03-01 16:57:05 +04:00
if ( mpx_fde = = NULL ) {
return ;
}
mpx_fde - > additional_flags | = EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT ;
/* only if we want to read we want to tell the event handler about errors */
if ( mpx_fde - > flags & TEVENT_FD_READ ) {
mpx_fde - > additional_flags | = EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR ;
}
2007-01-05 12:35:49 +03:00
}
2013-02-20 15:55:11 +04:00
static void epoll_update_event ( struct epoll_event_context * epoll_ev , struct tevent_fd * fde )
2007-01-05 12:35:49 +03:00
{
2007-05-02 01:29:42 +04:00
bool got_error = ( fde - > additional_flags & EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR ) ;
2009-01-02 18:25:29 +03:00
bool want_read = ( fde - > flags & TEVENT_FD_READ ) ;
bool want_write = ( fde - > flags & TEVENT_FD_WRITE ) ;
2013-02-20 20:17:31 +04:00
struct tevent_fd * mpx_fde = NULL ;
if ( fde - > additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX ) {
/*
* work out what the multiplexed fde wants .
*/
mpx_fde = talloc_get_type_abort ( fde - > additional_data ,
struct tevent_fd ) ;
if ( mpx_fde - > flags & TEVENT_FD_READ ) {
want_read = true ;
}
if ( mpx_fde - > flags & TEVENT_FD_WRITE ) {
want_write = true ;
}
}
2007-01-05 12:35:49 +03:00
/* there's already an event */
if ( fde - > additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT ) {
if ( want_read | | ( want_write & & ! got_error ) ) {
epoll_mod_event ( epoll_ev , fde ) ;
return ;
}
/*
* if we want to match the select behavior , we need to remove the epoll_event
* when the caller isn ' t interested in events .
*
* this is because epoll reports EPOLLERR and EPOLLHUP , even without asking for them
*/
epoll_del_event ( epoll_ev , fde ) ;
return ;
}
/* there's no epoll_event attached to the fde */
if ( want_read | | ( want_write & & ! got_error ) ) {
epoll_add_event ( epoll_ev , fde ) ;
return ;
}
}
2013-02-15 03:53:38 +04:00
/*
Cope with epoll returning EPOLLHUP | EPOLLERR on an event .
Return true if there ' s nothing else to do , false if
this event needs further handling .
*/
static bool epoll_handle_hup_or_err ( struct epoll_event_context * epoll_ev ,
struct tevent_fd * fde )
{
if ( fde = = NULL ) {
/* Nothing to do if no event. */
return true ;
}
fde - > additional_flags | = EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR ;
/*
* if we only wait for TEVENT_FD_WRITE , we should not tell the
* event handler about it , and remove the epoll_event ,
* as we only report errors when waiting for read events ,
* to match the select ( ) behavior
*/
if ( ! ( fde - > additional_flags & EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR ) ) {
/*
* Do the same as the poll backend and
* remove the writeable flag .
*/
fde - > flags & = ~ TEVENT_FD_WRITE ;
return true ;
}
/* This has TEVENT_FD_READ set, we're not finished. */
return false ;
}
2007-01-05 12:35:49 +03:00
/*
event loop handling using epoll
*/
static int epoll_event_loop ( struct epoll_event_context * epoll_ev , struct timeval * tvalp )
{
int ret , i ;
2009-03-16 14:34:23 +03:00
# define MAXEVENTS 1
2007-01-05 12:35:49 +03:00
struct epoll_event events [ MAXEVENTS ] ;
int timeout = - 1 ;
2013-02-15 14:24:59 +04:00
int wait_errno ;
2007-01-05 12:35:49 +03:00
if ( tvalp ) {
2013-02-26 18:51:38 +04:00
/* it's better to trigger timed events a bit later than too early */
2007-01-05 12:35:49 +03:00
timeout = ( ( tvalp - > tv_usec + 999 ) / 1000 ) + ( tvalp - > tv_sec * 1000 ) ;
}
2009-01-05 18:55:00 +03:00
if ( epoll_ev - > ev - > signal_events & &
2009-01-02 15:26:32 +03:00
tevent_common_check_signal ( epoll_ev - > ev ) ) {
2007-01-21 11:23:14 +03:00
return 0 ;
}
2012-06-05 10:00:07 +04:00
tevent_trace_point_callback ( epoll_ev - > ev , TEVENT_TRACE_BEFORE_WAIT ) ;
2007-01-05 12:35:49 +03:00
ret = epoll_wait ( epoll_ev - > epoll_fd , events , MAXEVENTS , timeout ) ;
2013-02-15 14:24:59 +04:00
wait_errno = errno ;
2012-06-05 10:00:07 +04:00
tevent_trace_point_callback ( epoll_ev - > ev , TEVENT_TRACE_AFTER_WAIT ) ;
2007-01-05 12:35:49 +03:00
2013-02-15 14:24:59 +04:00
if ( ret = = - 1 & & wait_errno = = EINTR & & epoll_ev - > ev - > signal_events ) {
2009-01-02 15:26:32 +03:00
if ( tevent_common_check_signal ( epoll_ev - > ev ) ) {
2007-01-21 11:23:14 +03:00
return 0 ;
}
}
2013-02-15 14:24:59 +04:00
if ( ret = = - 1 & & wait_errno ! = EINTR ) {
2013-02-14 13:58:55 +04:00
epoll_panic ( epoll_ev , " epoll_wait() failed " , true ) ;
2007-01-05 12:35:49 +03:00
return - 1 ;
}
if ( ret = = 0 & & tvalp ) {
r22661: optimize the handling of directly triggered timed events:
- if someone adds a timed_event with a zero timeval
we now avoid serval gettimeofday() calls and the
event handler doesn't get the current time when it's
called, instead we also pass a zero timeval
- this also makes sure multiple timed events with a zero timeval
are processed in the order there're added.
the little benchmark shows that processing 2000000 directly timed events
is now much faster, while avoiding syscalls at all!
> time ./evtest (with the old code)
real 0m6.388s
user 0m1.740s
sys 0m4.632s
> time ./evtest (with the new code)
real 0m1.498s
user 0m1.496s
sys 0m0.004s
metze@SERNOX:~/devel/samba/4.0/samba4-ci/source> cat evtest.c
#include <stdio.h>
#include <stdint.h>
#include <sys/time.h>
#include <talloc.h>
#include <events.h>
static void dummy_fde_handler(struct event_context *ev_ctx, struct fd_event *fde,
uint16_t flags, void *private_data)
{
}
static void timeout_handler(struct event_context *ev, struct timed_event *te,
struct timeval tval, void *private_data)
{
uint32_t *countp = (uint32_t *)private_data;
(*countp)++;
if (*countp > 2000000) exit(0);
event_add_timed(ev, ev, tval, timeout_handler, countp);
}
int main(void)
{
struct event_context *ev;
struct timeval tval = { 0, 0 };
uint32_t count = 0;
ev = event_context_init(NULL);
event_add_fd(ev, ev, 0, 0, dummy_fde_handler, NULL);
event_add_timed(ev, ev, tval, timeout_handler, &count);
return event_loop_wait(ev);
}
(This used to be commit 4db64b4ce2320b88d648078cbf86385f6fb44f1f)
2007-05-04 13:22:52 +04:00
/* we don't care about a possible delay here */
2009-01-02 15:26:32 +03:00
tevent_common_loop_timer_delay ( epoll_ev - > ev ) ;
2007-01-05 12:35:49 +03:00
return 0 ;
}
for ( i = 0 ; i < ret ; i + + ) {
2015-11-17 20:13:41 +03:00
struct tevent_fd * fde = talloc_get_type ( events [ i ] . data . ptr ,
2008-12-29 22:24:57 +03:00
struct tevent_fd ) ;
2007-01-05 12:35:49 +03:00
uint16_t flags = 0 ;
2013-02-20 17:49:17 +04:00
struct tevent_fd * mpx_fde = NULL ;
2007-01-05 12:35:49 +03:00
if ( fde = = NULL ) {
2013-02-14 13:58:55 +04:00
epoll_panic ( epoll_ev , " epoll_wait() gave bad data " , true ) ;
2007-01-05 12:35:49 +03:00
return - 1 ;
}
2013-02-20 17:49:17 +04:00
if ( fde - > additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX ) {
2007-01-05 12:35:49 +03:00
/*
2013-02-20 17:49:17 +04:00
* Save off the multiplexed event in case we need
* to use it to call the handler function .
2007-01-05 12:35:49 +03:00
*/
2013-02-20 17:49:17 +04:00
mpx_fde = talloc_get_type_abort ( fde - > additional_data ,
struct tevent_fd ) ;
}
if ( events [ i ] . events & ( EPOLLHUP | EPOLLERR ) ) {
bool handled_fde = epoll_handle_hup_or_err ( epoll_ev , fde ) ;
bool handled_mpx = epoll_handle_hup_or_err ( epoll_ev , mpx_fde ) ;
if ( handled_fde & & handled_mpx ) {
2013-02-20 14:26:00 +04:00
epoll_update_event ( epoll_ev , fde ) ;
2007-01-05 12:35:49 +03:00
continue ;
}
2013-02-20 17:49:17 +04:00
if ( ! handled_mpx ) {
/*
* If the mpx event was the one that needs
* further handling , it ' s the TEVENT_FD_READ
* event so switch over and call that handler .
*/
fde = mpx_fde ;
mpx_fde = NULL ;
}
2009-01-02 18:25:29 +03:00
flags | = TEVENT_FD_READ ;
2007-01-05 12:35:49 +03:00
}
2009-01-02 18:25:29 +03:00
if ( events [ i ] . events & EPOLLIN ) flags | = TEVENT_FD_READ ;
if ( events [ i ] . events & EPOLLOUT ) flags | = TEVENT_FD_WRITE ;
2013-02-20 17:49:17 +04:00
2013-02-20 14:50:56 +04:00
if ( flags & TEVENT_FD_WRITE ) {
if ( fde - > flags & TEVENT_FD_WRITE ) {
mpx_fde = NULL ;
}
if ( mpx_fde & & mpx_fde - > flags & TEVENT_FD_WRITE ) {
fde = mpx_fde ;
mpx_fde = NULL ;
}
}
2013-02-20 17:49:17 +04:00
if ( mpx_fde ) {
/* Ensure we got the right fde. */
if ( ( flags & fde - > flags ) = = 0 ) {
fde = mpx_fde ;
mpx_fde = NULL ;
}
}
/*
* make sure we only pass the flags
* the handler is expecting .
*/
flags & = fde - > flags ;
2007-01-05 12:35:49 +03:00
if ( flags ) {
fde - > handler ( epoll_ev - > ev , fde , flags , fde - > private_data ) ;
2009-03-16 14:34:23 +03:00
break ;
2007-01-05 12:35:49 +03:00
}
}
return 0 ;
}
/*
create a epoll_event_context structure .
*/
2008-12-29 22:24:57 +03:00
static int epoll_event_context_init ( struct tevent_context * ev )
2007-01-05 12:35:49 +03:00
{
2007-08-17 09:21:05 +04:00
int ret ;
2007-01-05 12:35:49 +03:00
struct epoll_event_context * epoll_ev ;
2013-02-11 22:53:15 +04:00
/*
* We might be called during tevent_re_initialise ( )
* which means we need to free our old additional_data .
*/
TALLOC_FREE ( ev - > additional_data ) ;
2007-01-05 12:35:49 +03:00
epoll_ev = talloc_zero ( ev , struct epoll_event_context ) ;
if ( ! epoll_ev ) return - 1 ;
epoll_ev - > ev = ev ;
epoll_ev - > epoll_fd = - 1 ;
2007-08-17 09:21:05 +04:00
ret = epoll_init_ctx ( epoll_ev ) ;
if ( ret ! = 0 ) {
talloc_free ( epoll_ev ) ;
return ret ;
}
2007-01-05 12:35:49 +03:00
ev - > additional_data = epoll_ev ;
return 0 ;
}
/*
destroy an fd_event
*/
2008-12-29 22:24:57 +03:00
static int epoll_event_fd_destructor ( struct tevent_fd * fde )
2007-01-05 12:35:49 +03:00
{
2008-12-29 22:24:57 +03:00
struct tevent_context * ev = fde - > event_ctx ;
2009-01-05 19:36:50 +03:00
struct epoll_event_context * epoll_ev = NULL ;
2013-02-15 19:33:56 +04:00
bool panic_triggered = false ;
2013-02-20 21:01:57 +04:00
struct tevent_fd * mpx_fde = NULL ;
2013-02-20 14:26:00 +04:00
int flags = fde - > flags ;
2007-01-05 12:35:49 +03:00
2013-02-15 19:33:56 +04:00
if ( ev = = NULL ) {
return tevent_common_fd_destructor ( fde ) ;
}
2007-06-02 04:32:49 +04:00
2013-02-15 19:33:56 +04:00
epoll_ev = talloc_get_type_abort ( ev - > additional_data ,
struct epoll_event_context ) ;
2007-01-05 12:35:49 +03:00
2013-02-15 19:33:56 +04:00
/*
* we must remove the event from the list
* otherwise a panic fallback handler may
* reuse invalid memory
*/
DLIST_REMOVE ( ev - > fd_events , fde ) ;
2013-02-20 21:01:57 +04:00
if ( fde - > additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX ) {
mpx_fde = talloc_get_type_abort ( fde - > additional_data ,
struct tevent_fd ) ;
fde - > additional_flags & = ~ EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX ;
mpx_fde - > additional_flags & = ~ EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX ;
fde - > additional_data = NULL ;
mpx_fde - > additional_data = NULL ;
fde - > additional_flags & = ~ EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT ;
}
2013-02-15 19:33:56 +04:00
epoll_ev - > panic_state = & panic_triggered ;
epoll_check_reopen ( epoll_ev ) ;
if ( panic_triggered ) {
return tevent_common_fd_destructor ( fde ) ;
}
2013-02-20 21:01:57 +04:00
if ( mpx_fde ! = NULL ) {
epoll_update_event ( epoll_ev , mpx_fde ) ;
if ( panic_triggered ) {
return tevent_common_fd_destructor ( fde ) ;
}
}
2013-02-20 14:26:00 +04:00
fde - > flags = 0 ;
epoll_update_event ( epoll_ev , fde ) ;
fde - > flags = flags ;
2013-02-15 19:33:56 +04:00
if ( panic_triggered ) {
return tevent_common_fd_destructor ( fde ) ;
2007-05-14 04:57:48 +04:00
}
2013-02-15 19:33:56 +04:00
epoll_ev - > panic_state = NULL ;
2007-05-14 04:57:48 +04:00
2009-01-05 19:36:50 +03:00
return tevent_common_fd_destructor ( fde ) ;
2007-01-05 12:35:49 +03:00
}
/*
add a fd based event
return NULL on failure ( memory allocation error )
*/
2008-12-29 22:24:57 +03:00
static struct tevent_fd * epoll_event_add_fd ( struct tevent_context * ev , TALLOC_CTX * mem_ctx ,
2009-01-02 15:26:32 +03:00
int fd , uint16_t flags ,
tevent_fd_handler_t handler ,
void * private_data ,
const char * handler_name ,
const char * location )
2007-01-05 12:35:49 +03:00
{
2013-12-05 11:47:27 +04:00
struct epoll_event_context * epoll_ev =
talloc_get_type_abort ( ev - > additional_data ,
struct epoll_event_context ) ;
2008-12-29 22:24:57 +03:00
struct tevent_fd * fde ;
2013-02-15 19:33:56 +04:00
bool panic_triggered = false ;
2007-05-17 11:52:33 +04:00
2009-01-05 19:36:50 +03:00
fde = tevent_common_add_fd ( ev , mem_ctx , fd , flags ,
handler , private_data ,
handler_name , location ) ;
2007-01-05 12:35:49 +03:00
if ( ! fde ) return NULL ;
talloc_set_destructor ( fde , epoll_event_fd_destructor ) ;
2013-02-15 19:33:56 +04:00
epoll_ev - > panic_state = & panic_triggered ;
epoll_check_reopen ( epoll_ev ) ;
if ( panic_triggered ) {
return fde ;
}
epoll_ev - > panic_state = NULL ;
2013-02-20 14:26:00 +04:00
epoll_update_event ( epoll_ev , fde ) ;
2007-01-05 12:35:49 +03:00
return fde ;
}
/*
set the fd event flags
*/
2008-12-29 22:24:57 +03:00
static void epoll_event_set_fd_flags ( struct tevent_fd * fde , uint16_t flags )
2007-01-05 12:35:49 +03:00
{
2008-12-29 22:24:57 +03:00
struct tevent_context * ev ;
2007-01-05 12:35:49 +03:00
struct epoll_event_context * epoll_ev ;
2013-02-15 19:33:56 +04:00
bool panic_triggered = false ;
2007-01-05 12:35:49 +03:00
if ( fde - > flags = = flags ) return ;
ev = fde - > event_ctx ;
2013-12-05 11:47:27 +04:00
epoll_ev = talloc_get_type_abort ( ev - > additional_data ,
struct epoll_event_context ) ;
2007-01-05 12:35:49 +03:00
fde - > flags = flags ;
2013-02-15 19:33:56 +04:00
epoll_ev - > panic_state = & panic_triggered ;
2007-05-17 11:52:33 +04:00
epoll_check_reopen ( epoll_ev ) ;
2013-02-15 19:33:56 +04:00
if ( panic_triggered ) {
return ;
}
epoll_ev - > panic_state = NULL ;
2007-05-17 11:52:33 +04:00
2013-02-20 15:55:11 +04:00
epoll_update_event ( epoll_ev , fde ) ;
2007-01-05 12:35:49 +03:00
}
/*
2015-11-17 20:13:41 +03:00
do a single event loop using the events defined in ev
2007-01-05 12:35:49 +03:00
*/
2009-03-12 11:33:26 +03:00
static int epoll_event_loop_once ( struct tevent_context * ev , const char * location )
2007-01-05 12:35:49 +03:00
{
2013-12-05 11:47:27 +04:00
struct epoll_event_context * epoll_ev =
talloc_get_type_abort ( ev - > additional_data ,
struct epoll_event_context ) ;
2007-01-05 12:35:49 +03:00
struct timeval tval ;
2013-02-15 19:33:56 +04:00
bool panic_triggered = false ;
2007-01-05 12:35:49 +03:00
2009-03-13 17:47:33 +03:00
if ( ev - > signal_events & &
tevent_common_check_signal ( ev ) ) {
return 0 ;
}
2016-08-08 12:26:37 +03:00
if ( ev - > threaded_contexts ! = NULL ) {
tevent_common_threaded_activate_immediate ( ev ) ;
}
2009-03-13 17:47:33 +03:00
if ( ev - > immediate_events & &
tevent_common_loop_immediate ( ev ) ) {
2009-03-16 14:45:48 +03:00
return 0 ;
}
2009-01-02 15:26:32 +03:00
tval = tevent_common_loop_timer_delay ( ev ) ;
2009-02-17 01:34:15 +03:00
if ( tevent_timeval_is_zero ( & tval ) ) {
2007-01-05 12:35:49 +03:00
return 0 ;
}
2013-02-15 19:33:56 +04:00
epoll_ev - > panic_state = & panic_triggered ;
epoll_ev - > panic_force_replay = true ;
2007-05-17 11:52:33 +04:00
epoll_check_reopen ( epoll_ev ) ;
2013-02-15 19:33:56 +04:00
if ( panic_triggered ) {
errno = EINVAL ;
return - 1 ;
}
epoll_ev - > panic_force_replay = false ;
epoll_ev - > panic_state = NULL ;
2007-05-17 11:52:33 +04:00
2007-01-05 12:35:49 +03:00
return epoll_event_loop ( epoll_ev , & tval ) ;
}
2009-01-02 15:35:32 +03:00
static const struct tevent_ops epoll_event_ops = {
2009-03-13 17:47:33 +03:00
. context_init = epoll_event_context_init ,
. add_fd = epoll_event_add_fd ,
. set_fd_close_fn = tevent_common_fd_set_close_fn ,
. get_fd_flags = tevent_common_fd_get_flags ,
. set_fd_flags = epoll_event_set_fd_flags ,
2013-02-22 15:45:39 +04:00
. add_timer = tevent_common_add_timer_v2 ,
2009-03-13 17:47:33 +03:00
. schedule_immediate = tevent_common_schedule_immediate ,
. add_signal = tevent_common_add_signal ,
. loop_once = epoll_event_loop_once ,
. loop_wait = tevent_common_loop_wait ,
2007-01-05 12:35:49 +03:00
} ;
2010-04-18 06:47:00 +04:00
_PRIVATE_ bool tevent_epoll_init ( void )
2007-01-05 12:35:49 +03:00
{
2009-01-02 15:35:32 +03:00
return tevent_register_backend ( " epoll " , & epoll_event_ops ) ;
2007-01-05 12:35:49 +03:00
}