2007-01-05 09:35:49 +00:00
/*
Unix SMB / CIFS implementation .
main select loop and event handling - epoll implementation
Copyright ( C ) Andrew Tridgell 2003 - 2005
2009-02-16 08:52:06 +01:00
Copyright ( C ) Stefan Metzmacher 2005 - 2009
* * NOTE ! The following LGPL license applies to the tevent
* * library . This does NOT imply that all of Samba is released
* * under the LGPL
This library is free software ; you can redistribute it and / or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation ; either
version 3 of the License , or ( at your option ) any later version .
This library is distributed in the hope that it will be useful ,
2007-01-05 09:35:49 +00:00
but WITHOUT ANY WARRANTY ; without even the implied warranty of
2009-02-16 08:52:06 +01:00
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the GNU
Lesser General Public License for more details .
You should have received a copy of the GNU Lesser General Public
License along with this library ; if not , see < http : //www.gnu.org/licenses/>.
2007-01-05 09:35:49 +00:00
*/
2008-04-24 17:28:30 -04:00
# include "replace.h"
2007-01-05 09:35:49 +00:00
# include "system/filesys.h"
2009-01-02 12:53:05 +01:00
# include "system/select.h"
2008-12-16 19:57:09 +01:00
# include "tevent.h"
# include "tevent_internal.h"
# include "tevent_util.h"
2007-01-05 09:35:49 +00:00
struct epoll_event_context {
/* a pointer back to the generic event_context */
2008-12-29 20:24:57 +01:00
struct tevent_context * ev ;
2007-01-05 09:35:49 +00:00
/* when using epoll this is the handle from epoll_create */
int epoll_fd ;
2007-05-17 02:38:07 +00:00
pid_t pid ;
2007-01-05 09:35:49 +00:00
} ;
/*
called when a epoll call fails , and we should fallback
to using select
*/
2008-04-24 17:28:30 -04:00
static void epoll_panic ( struct epoll_event_context * epoll_ev , const char * reason )
2007-01-05 09:35:49 +00:00
{
2009-01-02 13:39:56 +01:00
tevent_debug ( epoll_ev - > ev , TEVENT_DEBUG_FATAL ,
2008-06-14 11:23:31 -04:00
" %s (%s) - calling abort() \n " , reason , strerror ( errno ) ) ;
2007-08-17 05:21:05 +00:00
abort ( ) ;
2007-01-05 09:35:49 +00:00
}
/*
2009-01-02 16:25:29 +01:00
map from TEVENT_FD_ * to EPOLLIN / EPOLLOUT
2007-01-05 09:35:49 +00:00
*/
static uint32_t epoll_map_flags ( uint16_t flags )
{
uint32_t ret = 0 ;
2009-01-02 16:25:29 +01:00
if ( flags & TEVENT_FD_READ ) ret | = ( EPOLLIN | EPOLLERR | EPOLLHUP ) ;
if ( flags & TEVENT_FD_WRITE ) ret | = ( EPOLLOUT | EPOLLERR | EPOLLHUP ) ;
2007-01-05 09:35:49 +00:00
return ret ;
}
/*
free the epoll fd
*/
static int epoll_ctx_destructor ( struct epoll_event_context * epoll_ev )
{
close ( epoll_ev - > epoll_fd ) ;
epoll_ev - > epoll_fd = - 1 ;
return 0 ;
}
/*
init the epoll fd
*/
2007-08-17 05:21:05 +00:00
static int epoll_init_ctx ( struct epoll_event_context * epoll_ev )
2007-01-05 09:35:49 +00:00
{
epoll_ev - > epoll_fd = epoll_create ( 64 ) ;
2007-05-17 02:38:07 +00:00
epoll_ev - > pid = getpid ( ) ;
2007-01-05 09:35:49 +00:00
talloc_set_destructor ( epoll_ev , epoll_ctx_destructor ) ;
2007-08-17 05:21:05 +00:00
if ( epoll_ev - > epoll_fd = = - 1 ) {
return - 1 ;
}
return 0 ;
2007-01-05 09:35:49 +00:00
}
2008-12-29 20:24:57 +01:00
static void epoll_add_event ( struct epoll_event_context * epoll_ev , struct tevent_fd * fde ) ;
2007-05-17 02:38:07 +00:00
/*
reopen the epoll handle when our pid changes
see http : //junkcode.samba.org/ftp/unpacked/junkcode/epoll_fork.c for an
demonstration of why this is needed
*/
2007-05-17 07:52:33 +00:00
static void epoll_check_reopen ( struct epoll_event_context * epoll_ev )
2007-05-17 02:38:07 +00:00
{
2008-12-29 20:24:57 +01:00
struct tevent_fd * fde ;
2007-05-17 02:38:07 +00:00
2007-05-17 07:52:33 +00:00
if ( epoll_ev - > pid = = getpid ( ) ) {
return ;
}
2007-05-17 02:38:07 +00:00
close ( epoll_ev - > epoll_fd ) ;
epoll_ev - > epoll_fd = epoll_create ( 64 ) ;
if ( epoll_ev - > epoll_fd = = - 1 ) {
2009-01-02 13:39:56 +01:00
tevent_debug ( epoll_ev - > ev , TEVENT_DEBUG_FATAL ,
" Failed to recreate epoll handle after fork \n " ) ;
2007-05-17 02:38:07 +00:00
return ;
}
epoll_ev - > pid = getpid ( ) ;
2009-01-05 17:36:50 +01:00
for ( fde = epoll_ev - > ev - > fd_events ; fde ; fde = fde - > next ) {
2007-05-17 02:38:07 +00:00
epoll_add_event ( epoll_ev , fde ) ;
}
}
2007-01-05 09:35:49 +00:00
# define EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT (1<<0)
# define EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR (1<<1)
# define EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR (1<<2)
/*
add the epoll event to the given fd_event
*/
2008-12-29 20:24:57 +01:00
static void epoll_add_event ( struct epoll_event_context * epoll_ev , struct tevent_fd * fde )
2007-01-05 09:35:49 +00:00
{
struct epoll_event event ;
2007-05-17 02:38:07 +00:00
2007-01-05 09:35:49 +00:00
if ( epoll_ev - > epoll_fd = = - 1 ) return ;
fde - > additional_flags & = ~ EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR ;
/* if we don't want events yet, don't add an epoll_event */
if ( fde - > flags = = 0 ) return ;
ZERO_STRUCT ( event ) ;
event . events = epoll_map_flags ( fde - > flags ) ;
event . data . ptr = fde ;
if ( epoll_ctl ( epoll_ev - > epoll_fd , EPOLL_CTL_ADD , fde - > fd , & event ) ! = 0 ) {
2007-08-17 05:21:05 +00:00
epoll_panic ( epoll_ev , " EPOLL_CTL_ADD failed " ) ;
2007-01-05 09:35:49 +00:00
}
fde - > additional_flags | = EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT ;
/* only if we want to read we want to tell the event handler about errors */
2009-01-02 16:25:29 +01:00
if ( fde - > flags & TEVENT_FD_READ ) {
2007-01-05 09:35:49 +00:00
fde - > additional_flags | = EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR ;
}
}
/*
delete the epoll event for given fd_event
*/
2008-12-29 20:24:57 +01:00
static void epoll_del_event ( struct epoll_event_context * epoll_ev , struct tevent_fd * fde )
2007-01-05 09:35:49 +00:00
{
struct epoll_event event ;
2007-05-17 02:38:07 +00:00
2007-01-05 09:35:49 +00:00
if ( epoll_ev - > epoll_fd = = - 1 ) return ;
fde - > additional_flags & = ~ EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR ;
/* if there's no epoll_event, we don't need to delete it */
if ( ! ( fde - > additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT ) ) return ;
ZERO_STRUCT ( event ) ;
event . events = epoll_map_flags ( fde - > flags ) ;
event . data . ptr = fde ;
2007-05-14 00:57:48 +00:00
if ( epoll_ctl ( epoll_ev - > epoll_fd , EPOLL_CTL_DEL , fde - > fd , & event ) ! = 0 ) {
2009-01-02 13:39:56 +01:00
tevent_debug ( epoll_ev - > ev , TEVENT_DEBUG_FATAL ,
" epoll_del_event failed! probable early close bug (%s) \n " ,
strerror ( errno ) ) ;
2007-05-14 00:57:48 +00:00
}
2007-01-05 09:35:49 +00:00
fde - > additional_flags & = ~ EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT ;
}
/*
change the epoll event to the given fd_event
*/
2008-12-29 20:24:57 +01:00
static void epoll_mod_event ( struct epoll_event_context * epoll_ev , struct tevent_fd * fde )
2007-01-05 09:35:49 +00:00
{
struct epoll_event event ;
if ( epoll_ev - > epoll_fd = = - 1 ) return ;
fde - > additional_flags & = ~ EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR ;
ZERO_STRUCT ( event ) ;
event . events = epoll_map_flags ( fde - > flags ) ;
event . data . ptr = fde ;
if ( epoll_ctl ( epoll_ev - > epoll_fd , EPOLL_CTL_MOD , fde - > fd , & event ) ! = 0 ) {
2007-08-17 05:21:05 +00:00
epoll_panic ( epoll_ev , " EPOLL_CTL_MOD failed " ) ;
2007-01-05 09:35:49 +00:00
}
/* only if we want to read we want to tell the event handler about errors */
2009-01-02 16:25:29 +01:00
if ( fde - > flags & TEVENT_FD_READ ) {
2007-01-05 09:35:49 +00:00
fde - > additional_flags | = EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR ;
}
}
2008-12-29 20:24:57 +01:00
static void epoll_change_event ( struct epoll_event_context * epoll_ev , struct tevent_fd * fde )
2007-01-05 09:35:49 +00:00
{
2007-05-01 21:29:42 +00:00
bool got_error = ( fde - > additional_flags & EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR ) ;
2009-01-02 16:25:29 +01:00
bool want_read = ( fde - > flags & TEVENT_FD_READ ) ;
bool want_write = ( fde - > flags & TEVENT_FD_WRITE ) ;
2007-01-05 09:35:49 +00:00
if ( epoll_ev - > epoll_fd = = - 1 ) return ;
fde - > additional_flags & = ~ EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR ;
/* there's already an event */
if ( fde - > additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT ) {
if ( want_read | | ( want_write & & ! got_error ) ) {
epoll_mod_event ( epoll_ev , fde ) ;
return ;
}
/*
* if we want to match the select behavior , we need to remove the epoll_event
* when the caller isn ' t interested in events .
*
* this is because epoll reports EPOLLERR and EPOLLHUP , even without asking for them
*/
epoll_del_event ( epoll_ev , fde ) ;
return ;
}
/* there's no epoll_event attached to the fde */
if ( want_read | | ( want_write & & ! got_error ) ) {
epoll_add_event ( epoll_ev , fde ) ;
return ;
}
}
/*
event loop handling using epoll
*/
static int epoll_event_loop ( struct epoll_event_context * epoll_ev , struct timeval * tvalp )
{
int ret , i ;
2009-03-16 12:34:23 +01:00
# define MAXEVENTS 1
2007-01-05 09:35:49 +00:00
struct epoll_event events [ MAXEVENTS ] ;
int timeout = - 1 ;
if ( epoll_ev - > epoll_fd = = - 1 ) return - 1 ;
if ( tvalp ) {
/* it's better to trigger timed events a bit later than to early */
timeout = ( ( tvalp - > tv_usec + 999 ) / 1000 ) + ( tvalp - > tv_sec * 1000 ) ;
}
2009-01-05 16:55:00 +01:00
if ( epoll_ev - > ev - > signal_events & &
2009-01-02 13:26:32 +01:00
tevent_common_check_signal ( epoll_ev - > ev ) ) {
2007-01-21 08:23:14 +00:00
return 0 ;
}
2007-01-05 09:35:49 +00:00
ret = epoll_wait ( epoll_ev - > epoll_fd , events , MAXEVENTS , timeout ) ;
2009-01-05 16:55:00 +01:00
if ( ret = = - 1 & & errno = = EINTR & & epoll_ev - > ev - > signal_events ) {
2009-01-02 13:26:32 +01:00
if ( tevent_common_check_signal ( epoll_ev - > ev ) ) {
2007-01-21 08:23:14 +00:00
return 0 ;
}
}
2007-01-05 09:35:49 +00:00
if ( ret = = - 1 & & errno ! = EINTR ) {
2007-08-17 05:21:05 +00:00
epoll_panic ( epoll_ev , " epoll_wait() failed " ) ;
2007-01-05 09:35:49 +00:00
return - 1 ;
}
if ( ret = = 0 & & tvalp ) {
r22661: optimize the handling of directly triggered timed events:
- if someone adds a timed_event with a zero timeval
we now avoid serval gettimeofday() calls and the
event handler doesn't get the current time when it's
called, instead we also pass a zero timeval
- this also makes sure multiple timed events with a zero timeval
are processed in the order there're added.
the little benchmark shows that processing 2000000 directly timed events
is now much faster, while avoiding syscalls at all!
> time ./evtest (with the old code)
real 0m6.388s
user 0m1.740s
sys 0m4.632s
> time ./evtest (with the new code)
real 0m1.498s
user 0m1.496s
sys 0m0.004s
metze@SERNOX:~/devel/samba/4.0/samba4-ci/source> cat evtest.c
#include <stdio.h>
#include <stdint.h>
#include <sys/time.h>
#include <talloc.h>
#include <events.h>
static void dummy_fde_handler(struct event_context *ev_ctx, struct fd_event *fde,
uint16_t flags, void *private_data)
{
}
static void timeout_handler(struct event_context *ev, struct timed_event *te,
struct timeval tval, void *private_data)
{
uint32_t *countp = (uint32_t *)private_data;
(*countp)++;
if (*countp > 2000000) exit(0);
event_add_timed(ev, ev, tval, timeout_handler, countp);
}
int main(void)
{
struct event_context *ev;
struct timeval tval = { 0, 0 };
uint32_t count = 0;
ev = event_context_init(NULL);
event_add_fd(ev, ev, 0, 0, dummy_fde_handler, NULL);
event_add_timed(ev, ev, tval, timeout_handler, &count);
return event_loop_wait(ev);
}
(This used to be commit 4db64b4ce2320b88d648078cbf86385f6fb44f1f)
2007-05-04 09:22:52 +00:00
/* we don't care about a possible delay here */
2009-01-02 13:26:32 +01:00
tevent_common_loop_timer_delay ( epoll_ev - > ev ) ;
2007-01-05 09:35:49 +00:00
return 0 ;
}
for ( i = 0 ; i < ret ; i + + ) {
2008-12-29 20:24:57 +01:00
struct tevent_fd * fde = talloc_get_type ( events [ i ] . data . ptr ,
struct tevent_fd ) ;
2007-01-05 09:35:49 +00:00
uint16_t flags = 0 ;
if ( fde = = NULL ) {
2007-08-17 05:21:05 +00:00
epoll_panic ( epoll_ev , " epoll_wait() gave bad data " ) ;
2007-01-05 09:35:49 +00:00
return - 1 ;
}
if ( events [ i ] . events & ( EPOLLHUP | EPOLLERR ) ) {
fde - > additional_flags | = EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR ;
/*
2009-01-02 16:25:29 +01:00
* if we only wait for TEVENT_FD_WRITE , we should not tell the
2007-01-05 09:35:49 +00:00
* event handler about it , and remove the epoll_event ,
* as we only report errors when waiting for read events ,
* to match the select ( ) behavior
*/
if ( ! ( fde - > additional_flags & EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR ) ) {
epoll_del_event ( epoll_ev , fde ) ;
continue ;
}
2009-01-02 16:25:29 +01:00
flags | = TEVENT_FD_READ ;
2007-01-05 09:35:49 +00:00
}
2009-01-02 16:25:29 +01:00
if ( events [ i ] . events & EPOLLIN ) flags | = TEVENT_FD_READ ;
if ( events [ i ] . events & EPOLLOUT ) flags | = TEVENT_FD_WRITE ;
2007-01-05 09:35:49 +00:00
if ( flags ) {
fde - > handler ( epoll_ev - > ev , fde , flags , fde - > private_data ) ;
2009-03-16 12:34:23 +01:00
break ;
2007-01-05 09:35:49 +00:00
}
}
return 0 ;
}
/*
create a epoll_event_context structure .
*/
2008-12-29 20:24:57 +01:00
static int epoll_event_context_init ( struct tevent_context * ev )
2007-01-05 09:35:49 +00:00
{
2007-08-17 05:21:05 +00:00
int ret ;
2007-01-05 09:35:49 +00:00
struct epoll_event_context * epoll_ev ;
epoll_ev = talloc_zero ( ev , struct epoll_event_context ) ;
if ( ! epoll_ev ) return - 1 ;
epoll_ev - > ev = ev ;
epoll_ev - > epoll_fd = - 1 ;
2007-08-17 05:21:05 +00:00
ret = epoll_init_ctx ( epoll_ev ) ;
if ( ret ! = 0 ) {
talloc_free ( epoll_ev ) ;
return ret ;
}
2007-01-05 09:35:49 +00:00
ev - > additional_data = epoll_ev ;
return 0 ;
}
/*
destroy an fd_event
*/
2008-12-29 20:24:57 +01:00
static int epoll_event_fd_destructor ( struct tevent_fd * fde )
2007-01-05 09:35:49 +00:00
{
2008-12-29 20:24:57 +01:00
struct tevent_context * ev = fde - > event_ctx ;
2009-01-05 17:36:50 +01:00
struct epoll_event_context * epoll_ev = NULL ;
2007-01-05 09:35:49 +00:00
2009-01-05 17:36:50 +01:00
if ( ev ) {
epoll_ev = talloc_get_type ( ev - > additional_data ,
struct epoll_event_context ) ;
2007-06-02 00:32:49 +00:00
2009-01-05 17:36:50 +01:00
epoll_check_reopen ( epoll_ev ) ;
2007-01-05 09:35:49 +00:00
2009-01-05 17:36:50 +01:00
epoll_del_event ( epoll_ev , fde ) ;
2007-05-14 00:57:48 +00:00
}
2009-01-05 17:36:50 +01:00
return tevent_common_fd_destructor ( fde ) ;
2007-01-05 09:35:49 +00:00
}
/*
add a fd based event
return NULL on failure ( memory allocation error )
*/
2008-12-29 20:24:57 +01:00
static struct tevent_fd * epoll_event_add_fd ( struct tevent_context * ev , TALLOC_CTX * mem_ctx ,
2009-01-02 13:26:32 +01:00
int fd , uint16_t flags ,
tevent_fd_handler_t handler ,
void * private_data ,
const char * handler_name ,
const char * location )
2007-01-05 09:35:49 +00:00
{
struct epoll_event_context * epoll_ev = talloc_get_type ( ev - > additional_data ,
struct epoll_event_context ) ;
2008-12-29 20:24:57 +01:00
struct tevent_fd * fde ;
2007-01-05 09:35:49 +00:00
2007-05-17 07:52:33 +00:00
epoll_check_reopen ( epoll_ev ) ;
2009-01-05 17:36:50 +01:00
fde = tevent_common_add_fd ( ev , mem_ctx , fd , flags ,
handler , private_data ,
handler_name , location ) ;
2007-01-05 09:35:49 +00:00
if ( ! fde ) return NULL ;
talloc_set_destructor ( fde , epoll_event_fd_destructor ) ;
epoll_add_event ( epoll_ev , fde ) ;
return fde ;
}
/*
set the fd event flags
*/
2008-12-29 20:24:57 +01:00
static void epoll_event_set_fd_flags ( struct tevent_fd * fde , uint16_t flags )
2007-01-05 09:35:49 +00:00
{
2008-12-29 20:24:57 +01:00
struct tevent_context * ev ;
2007-01-05 09:35:49 +00:00
struct epoll_event_context * epoll_ev ;
if ( fde - > flags = = flags ) return ;
ev = fde - > event_ctx ;
epoll_ev = talloc_get_type ( ev - > additional_data , struct epoll_event_context ) ;
fde - > flags = flags ;
2007-05-17 07:52:33 +00:00
epoll_check_reopen ( epoll_ev ) ;
2007-01-05 09:35:49 +00:00
epoll_change_event ( epoll_ev , fde ) ;
}
/*
do a single event loop using the events defined in ev
*/
2009-03-12 09:33:26 +01:00
static int epoll_event_loop_once ( struct tevent_context * ev , const char * location )
2007-01-05 09:35:49 +00:00
{
struct epoll_event_context * epoll_ev = talloc_get_type ( ev - > additional_data ,
struct epoll_event_context ) ;
struct timeval tval ;
2009-03-13 15:47:33 +01:00
if ( ev - > signal_events & &
tevent_common_check_signal ( ev ) ) {
return 0 ;
}
if ( ev - > immediate_events & &
tevent_common_loop_immediate ( ev ) ) {
2009-03-16 12:45:48 +01:00
return 0 ;
}
2009-01-02 13:26:32 +01:00
tval = tevent_common_loop_timer_delay ( ev ) ;
2009-02-16 23:34:15 +01:00
if ( tevent_timeval_is_zero ( & tval ) ) {
2007-01-05 09:35:49 +00:00
return 0 ;
}
2007-05-17 07:52:33 +00:00
epoll_check_reopen ( epoll_ev ) ;
2007-01-05 09:35:49 +00:00
return epoll_event_loop ( epoll_ev , & tval ) ;
}
2009-01-02 13:35:32 +01:00
static const struct tevent_ops epoll_event_ops = {
2009-03-13 15:47:33 +01:00
. context_init = epoll_event_context_init ,
. add_fd = epoll_event_add_fd ,
. set_fd_close_fn = tevent_common_fd_set_close_fn ,
. get_fd_flags = tevent_common_fd_get_flags ,
. set_fd_flags = epoll_event_set_fd_flags ,
. add_timer = tevent_common_add_timer ,
. schedule_immediate = tevent_common_schedule_immediate ,
. add_signal = tevent_common_add_signal ,
. loop_once = epoll_event_loop_once ,
. loop_wait = tevent_common_loop_wait ,
2007-01-05 09:35:49 +00:00
} ;
2009-01-02 13:35:32 +01:00
bool tevent_epoll_init ( void )
2007-01-05 09:35:49 +00:00
{
2009-01-02 13:35:32 +01:00
return tevent_register_backend ( " epoll " , & epoll_event_ops ) ;
2007-01-05 09:35:49 +00:00
}