2007-01-05 12:35:49 +03:00
/*
Unix SMB / CIFS implementation .
main select loop and event handling - epoll implementation
Copyright ( C ) Andrew Tridgell 2003 - 2005
Copyright ( C ) Stefan Metzmacher 2005
This program is free software ; you can redistribute it and / or modify
it under the terms of the GNU General Public License as published by
2007-07-10 06:07:03 +04:00
the Free Software Foundation ; either version 3 of the License , or
2007-01-05 12:35:49 +03:00
( at your option ) any later version .
This program is distributed in the hope that it will be useful ,
but WITHOUT ANY WARRANTY ; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
GNU General Public License for more details .
You should have received a copy of the GNU General Public License
2007-07-10 06:07:03 +04:00
along with this program . If not , see < http : //www.gnu.org/licenses/>.
2007-01-05 12:35:49 +03:00
*/
2008-04-25 01:28:30 +04:00
# include "replace.h"
2007-01-05 12:35:49 +03:00
# include "system/filesys.h"
2008-05-14 19:28:42 +04:00
# include "system/network.h"
2009-01-02 14:53:05 +03:00
# include "system/select.h"
2008-12-16 21:57:09 +03:00
# include "tevent.h"
# include "tevent_internal.h"
# include "tevent_util.h"
2007-01-05 12:35:49 +03:00
struct epoll_event_context {
/* a pointer back to the generic event_context */
2008-12-29 22:24:57 +03:00
struct tevent_context * ev ;
2007-01-05 12:35:49 +03:00
2007-05-17 06:38:07 +04:00
/* list of filedescriptor events */
2008-12-29 22:24:57 +03:00
struct tevent_fd * fd_events ;
2007-05-17 06:38:07 +04:00
2007-01-05 12:35:49 +03:00
/* number of registered fd event handlers */
int num_fd_events ;
/* this is changed by the destructors for the fd event
type . It is used to detect event destruction by event
handlers , which means the code that is calling the event
handler needs to assume that the linked list is no longer
valid
*/
uint32_t destruction_count ;
/* when using epoll this is the handle from epoll_create */
int epoll_fd ;
2007-05-17 06:38:07 +04:00
pid_t pid ;
2007-01-05 12:35:49 +03:00
} ;
/*
called when a epoll call fails , and we should fallback
to using select
*/
2008-04-25 01:28:30 +04:00
static void epoll_panic ( struct epoll_event_context * epoll_ev , const char * reason )
2007-01-05 12:35:49 +03:00
{
2008-06-14 19:23:31 +04:00
ev_debug ( epoll_ev - > ev , EV_DEBUG_FATAL ,
" %s (%s) - calling abort() \n " , reason , strerror ( errno ) ) ;
2007-08-17 09:21:05 +04:00
abort ( ) ;
2007-01-05 12:35:49 +03:00
}
/*
map from EVENT_FD_ * to EPOLLIN / EPOLLOUT
*/
static uint32_t epoll_map_flags ( uint16_t flags )
{
uint32_t ret = 0 ;
if ( flags & EVENT_FD_READ ) ret | = ( EPOLLIN | EPOLLERR | EPOLLHUP ) ;
if ( flags & EVENT_FD_WRITE ) ret | = ( EPOLLOUT | EPOLLERR | EPOLLHUP ) ;
return ret ;
}
/*
free the epoll fd
*/
static int epoll_ctx_destructor ( struct epoll_event_context * epoll_ev )
{
close ( epoll_ev - > epoll_fd ) ;
epoll_ev - > epoll_fd = - 1 ;
return 0 ;
}
/*
init the epoll fd
*/
2007-08-17 09:21:05 +04:00
static int epoll_init_ctx ( struct epoll_event_context * epoll_ev )
2007-01-05 12:35:49 +03:00
{
epoll_ev - > epoll_fd = epoll_create ( 64 ) ;
2007-05-17 06:38:07 +04:00
epoll_ev - > pid = getpid ( ) ;
2007-01-05 12:35:49 +03:00
talloc_set_destructor ( epoll_ev , epoll_ctx_destructor ) ;
2007-08-17 09:21:05 +04:00
if ( epoll_ev - > epoll_fd = = - 1 ) {
return - 1 ;
}
return 0 ;
2007-01-05 12:35:49 +03:00
}
2008-12-29 22:24:57 +03:00
static void epoll_add_event ( struct epoll_event_context * epoll_ev , struct tevent_fd * fde ) ;
2007-05-17 06:38:07 +04:00
/*
reopen the epoll handle when our pid changes
see http : //junkcode.samba.org/ftp/unpacked/junkcode/epoll_fork.c for an
demonstration of why this is needed
*/
2007-05-17 11:52:33 +04:00
static void epoll_check_reopen ( struct epoll_event_context * epoll_ev )
2007-05-17 06:38:07 +04:00
{
2008-12-29 22:24:57 +03:00
struct tevent_fd * fde ;
2007-05-17 06:38:07 +04:00
2007-05-17 11:52:33 +04:00
if ( epoll_ev - > pid = = getpid ( ) ) {
return ;
}
2007-05-17 06:38:07 +04:00
close ( epoll_ev - > epoll_fd ) ;
epoll_ev - > epoll_fd = epoll_create ( 64 ) ;
if ( epoll_ev - > epoll_fd = = - 1 ) {
2008-06-14 19:23:31 +04:00
ev_debug ( epoll_ev - > ev , EV_DEBUG_FATAL ,
" Failed to recreate epoll handle after fork \n " ) ;
2007-05-17 06:38:07 +04:00
return ;
}
epoll_ev - > pid = getpid ( ) ;
for ( fde = epoll_ev - > fd_events ; fde ; fde = fde - > next ) {
epoll_add_event ( epoll_ev , fde ) ;
}
}
2007-01-05 12:35:49 +03:00
# define EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT (1<<0)
# define EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR (1<<1)
# define EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR (1<<2)
/*
add the epoll event to the given fd_event
*/
2008-12-29 22:24:57 +03:00
static void epoll_add_event ( struct epoll_event_context * epoll_ev , struct tevent_fd * fde )
2007-01-05 12:35:49 +03:00
{
struct epoll_event event ;
2007-05-17 06:38:07 +04:00
2007-01-05 12:35:49 +03:00
if ( epoll_ev - > epoll_fd = = - 1 ) return ;
fde - > additional_flags & = ~ EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR ;
/* if we don't want events yet, don't add an epoll_event */
if ( fde - > flags = = 0 ) return ;
ZERO_STRUCT ( event ) ;
event . events = epoll_map_flags ( fde - > flags ) ;
event . data . ptr = fde ;
if ( epoll_ctl ( epoll_ev - > epoll_fd , EPOLL_CTL_ADD , fde - > fd , & event ) ! = 0 ) {
2007-08-17 09:21:05 +04:00
epoll_panic ( epoll_ev , " EPOLL_CTL_ADD failed " ) ;
2007-01-05 12:35:49 +03:00
}
fde - > additional_flags | = EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT ;
/* only if we want to read we want to tell the event handler about errors */
if ( fde - > flags & EVENT_FD_READ ) {
fde - > additional_flags | = EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR ;
}
}
/*
delete the epoll event for given fd_event
*/
2008-12-29 22:24:57 +03:00
static void epoll_del_event ( struct epoll_event_context * epoll_ev , struct tevent_fd * fde )
2007-01-05 12:35:49 +03:00
{
struct epoll_event event ;
2007-05-17 06:38:07 +04:00
2007-01-05 12:35:49 +03:00
if ( epoll_ev - > epoll_fd = = - 1 ) return ;
fde - > additional_flags & = ~ EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR ;
/* if there's no epoll_event, we don't need to delete it */
if ( ! ( fde - > additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT ) ) return ;
ZERO_STRUCT ( event ) ;
event . events = epoll_map_flags ( fde - > flags ) ;
event . data . ptr = fde ;
2007-05-14 04:57:48 +04:00
if ( epoll_ctl ( epoll_ev - > epoll_fd , EPOLL_CTL_DEL , fde - > fd , & event ) ! = 0 ) {
2008-06-14 19:23:31 +04:00
ev_debug ( epoll_ev - > ev , EV_DEBUG_FATAL ,
" epoll_del_event failed! probable early close bug (%s) \n " ,
strerror ( errno ) ) ;
2007-05-14 04:57:48 +04:00
}
2007-01-05 12:35:49 +03:00
fde - > additional_flags & = ~ EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT ;
}
/*
change the epoll event to the given fd_event
*/
2008-12-29 22:24:57 +03:00
static void epoll_mod_event ( struct epoll_event_context * epoll_ev , struct tevent_fd * fde )
2007-01-05 12:35:49 +03:00
{
struct epoll_event event ;
if ( epoll_ev - > epoll_fd = = - 1 ) return ;
fde - > additional_flags & = ~ EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR ;
ZERO_STRUCT ( event ) ;
event . events = epoll_map_flags ( fde - > flags ) ;
event . data . ptr = fde ;
if ( epoll_ctl ( epoll_ev - > epoll_fd , EPOLL_CTL_MOD , fde - > fd , & event ) ! = 0 ) {
2007-08-17 09:21:05 +04:00
epoll_panic ( epoll_ev , " EPOLL_CTL_MOD failed " ) ;
2007-01-05 12:35:49 +03:00
}
/* only if we want to read we want to tell the event handler about errors */
if ( fde - > flags & EVENT_FD_READ ) {
fde - > additional_flags | = EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR ;
}
}
2008-12-29 22:24:57 +03:00
static void epoll_change_event ( struct epoll_event_context * epoll_ev , struct tevent_fd * fde )
2007-01-05 12:35:49 +03:00
{
2007-05-02 01:29:42 +04:00
bool got_error = ( fde - > additional_flags & EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR ) ;
bool want_read = ( fde - > flags & EVENT_FD_READ ) ;
bool want_write = ( fde - > flags & EVENT_FD_WRITE ) ;
2007-01-05 12:35:49 +03:00
if ( epoll_ev - > epoll_fd = = - 1 ) return ;
fde - > additional_flags & = ~ EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR ;
/* there's already an event */
if ( fde - > additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT ) {
if ( want_read | | ( want_write & & ! got_error ) ) {
epoll_mod_event ( epoll_ev , fde ) ;
return ;
}
/*
* if we want to match the select behavior , we need to remove the epoll_event
* when the caller isn ' t interested in events .
*
* this is because epoll reports EPOLLERR and EPOLLHUP , even without asking for them
*/
epoll_del_event ( epoll_ev , fde ) ;
return ;
}
/* there's no epoll_event attached to the fde */
if ( want_read | | ( want_write & & ! got_error ) ) {
epoll_add_event ( epoll_ev , fde ) ;
return ;
}
}
/*
event loop handling using epoll
*/
static int epoll_event_loop ( struct epoll_event_context * epoll_ev , struct timeval * tvalp )
{
int ret , i ;
2007-05-14 04:57:48 +04:00
# define MAXEVENTS 32
2007-01-05 12:35:49 +03:00
struct epoll_event events [ MAXEVENTS ] ;
2007-02-06 07:43:48 +03:00
uint32_t destruction_count = + + epoll_ev - > destruction_count ;
2007-01-05 12:35:49 +03:00
int timeout = - 1 ;
if ( epoll_ev - > epoll_fd = = - 1 ) return - 1 ;
if ( tvalp ) {
/* it's better to trigger timed events a bit later than to early */
timeout = ( ( tvalp - > tv_usec + 999 ) / 1000 ) + ( tvalp - > tv_sec * 1000 ) ;
}
2007-01-21 11:23:14 +03:00
if ( epoll_ev - > ev - > num_signal_handlers & &
2009-01-02 15:26:32 +03:00
tevent_common_check_signal ( epoll_ev - > ev ) ) {
2007-01-21 11:23:14 +03:00
return 0 ;
}
2007-01-05 12:35:49 +03:00
ret = epoll_wait ( epoll_ev - > epoll_fd , events , MAXEVENTS , timeout ) ;
2007-01-21 11:23:14 +03:00
if ( ret = = - 1 & & errno = = EINTR & & epoll_ev - > ev - > num_signal_handlers ) {
2009-01-02 15:26:32 +03:00
if ( tevent_common_check_signal ( epoll_ev - > ev ) ) {
2007-01-21 11:23:14 +03:00
return 0 ;
}
}
2007-01-05 12:35:49 +03:00
if ( ret = = - 1 & & errno ! = EINTR ) {
2007-08-17 09:21:05 +04:00
epoll_panic ( epoll_ev , " epoll_wait() failed " ) ;
2007-01-05 12:35:49 +03:00
return - 1 ;
}
if ( ret = = 0 & & tvalp ) {
r22661: optimize the handling of directly triggered timed events:
- if someone adds a timed_event with a zero timeval
we now avoid serval gettimeofday() calls and the
event handler doesn't get the current time when it's
called, instead we also pass a zero timeval
- this also makes sure multiple timed events with a zero timeval
are processed in the order there're added.
the little benchmark shows that processing 2000000 directly timed events
is now much faster, while avoiding syscalls at all!
> time ./evtest (with the old code)
real 0m6.388s
user 0m1.740s
sys 0m4.632s
> time ./evtest (with the new code)
real 0m1.498s
user 0m1.496s
sys 0m0.004s
metze@SERNOX:~/devel/samba/4.0/samba4-ci/source> cat evtest.c
#include <stdio.h>
#include <stdint.h>
#include <sys/time.h>
#include <talloc.h>
#include <events.h>
static void dummy_fde_handler(struct event_context *ev_ctx, struct fd_event *fde,
uint16_t flags, void *private_data)
{
}
static void timeout_handler(struct event_context *ev, struct timed_event *te,
struct timeval tval, void *private_data)
{
uint32_t *countp = (uint32_t *)private_data;
(*countp)++;
if (*countp > 2000000) exit(0);
event_add_timed(ev, ev, tval, timeout_handler, countp);
}
int main(void)
{
struct event_context *ev;
struct timeval tval = { 0, 0 };
uint32_t count = 0;
ev = event_context_init(NULL);
event_add_fd(ev, ev, 0, 0, dummy_fde_handler, NULL);
event_add_timed(ev, ev, tval, timeout_handler, &count);
return event_loop_wait(ev);
}
(This used to be commit 4db64b4ce2320b88d648078cbf86385f6fb44f1f)
2007-05-04 13:22:52 +04:00
/* we don't care about a possible delay here */
2009-01-02 15:26:32 +03:00
tevent_common_loop_timer_delay ( epoll_ev - > ev ) ;
2007-01-05 12:35:49 +03:00
return 0 ;
}
for ( i = 0 ; i < ret ; i + + ) {
2008-12-29 22:24:57 +03:00
struct tevent_fd * fde = talloc_get_type ( events [ i ] . data . ptr ,
struct tevent_fd ) ;
2007-01-05 12:35:49 +03:00
uint16_t flags = 0 ;
if ( fde = = NULL ) {
2007-08-17 09:21:05 +04:00
epoll_panic ( epoll_ev , " epoll_wait() gave bad data " ) ;
2007-01-05 12:35:49 +03:00
return - 1 ;
}
if ( events [ i ] . events & ( EPOLLHUP | EPOLLERR ) ) {
fde - > additional_flags | = EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR ;
/*
* if we only wait for EVENT_FD_WRITE , we should not tell the
* event handler about it , and remove the epoll_event ,
* as we only report errors when waiting for read events ,
* to match the select ( ) behavior
*/
if ( ! ( fde - > additional_flags & EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR ) ) {
epoll_del_event ( epoll_ev , fde ) ;
continue ;
}
flags | = EVENT_FD_READ ;
}
if ( events [ i ] . events & EPOLLIN ) flags | = EVENT_FD_READ ;
if ( events [ i ] . events & EPOLLOUT ) flags | = EVENT_FD_WRITE ;
if ( flags ) {
fde - > handler ( epoll_ev - > ev , fde , flags , fde - > private_data ) ;
if ( destruction_count ! = epoll_ev - > destruction_count ) {
break ;
}
}
}
return 0 ;
}
/*
create a epoll_event_context structure .
*/
2008-12-29 22:24:57 +03:00
static int epoll_event_context_init ( struct tevent_context * ev )
2007-01-05 12:35:49 +03:00
{
2007-08-17 09:21:05 +04:00
int ret ;
2007-01-05 12:35:49 +03:00
struct epoll_event_context * epoll_ev ;
epoll_ev = talloc_zero ( ev , struct epoll_event_context ) ;
if ( ! epoll_ev ) return - 1 ;
epoll_ev - > ev = ev ;
epoll_ev - > epoll_fd = - 1 ;
2007-08-17 09:21:05 +04:00
ret = epoll_init_ctx ( epoll_ev ) ;
if ( ret ! = 0 ) {
talloc_free ( epoll_ev ) ;
return ret ;
}
2007-01-05 12:35:49 +03:00
ev - > additional_data = epoll_ev ;
return 0 ;
}
/*
destroy an fd_event
*/
2008-12-29 22:24:57 +03:00
static int epoll_event_fd_destructor ( struct tevent_fd * fde )
2007-01-05 12:35:49 +03:00
{
2008-12-29 22:24:57 +03:00
struct tevent_context * ev = fde - > event_ctx ;
2007-01-05 12:35:49 +03:00
struct epoll_event_context * epoll_ev = talloc_get_type ( ev - > additional_data ,
struct epoll_event_context ) ;
2007-06-02 04:32:49 +04:00
epoll_check_reopen ( epoll_ev ) ;
2007-01-05 12:35:49 +03:00
epoll_ev - > num_fd_events - - ;
epoll_ev - > destruction_count + + ;
2007-10-17 14:00:30 +04:00
DLIST_REMOVE ( epoll_ev - > fd_events , fde ) ;
2007-01-05 12:35:49 +03:00
epoll_del_event ( epoll_ev , fde ) ;
2007-05-14 04:57:48 +04:00
if ( fde - > flags & EVENT_FD_AUTOCLOSE ) {
close ( fde - > fd ) ;
fde - > fd = - 1 ;
}
2007-01-05 12:35:49 +03:00
return 0 ;
}
/*
add a fd based event
return NULL on failure ( memory allocation error )
*/
2008-12-29 22:24:57 +03:00
static struct tevent_fd * epoll_event_add_fd ( struct tevent_context * ev , TALLOC_CTX * mem_ctx ,
2009-01-02 15:26:32 +03:00
int fd , uint16_t flags ,
tevent_fd_handler_t handler ,
void * private_data ,
const char * handler_name ,
const char * location )
2007-01-05 12:35:49 +03:00
{
struct epoll_event_context * epoll_ev = talloc_get_type ( ev - > additional_data ,
struct epoll_event_context ) ;
2008-12-29 22:24:57 +03:00
struct tevent_fd * fde ;
2007-01-05 12:35:49 +03:00
2007-05-17 11:52:33 +04:00
epoll_check_reopen ( epoll_ev ) ;
2008-12-29 22:24:57 +03:00
fde = talloc ( mem_ctx ? mem_ctx : ev , struct tevent_fd ) ;
2007-01-05 12:35:49 +03:00
if ( ! fde ) return NULL ;
fde - > event_ctx = ev ;
fde - > fd = fd ;
fde - > flags = flags ;
fde - > handler = handler ;
fde - > private_data = private_data ;
2009-01-02 15:26:32 +03:00
fde - > handler_name = handler_name ;
fde - > location = location ;
2007-01-05 12:35:49 +03:00
fde - > additional_flags = 0 ;
fde - > additional_data = NULL ;
epoll_ev - > num_fd_events + + ;
talloc_set_destructor ( fde , epoll_event_fd_destructor ) ;
2007-05-17 06:38:07 +04:00
DLIST_ADD ( epoll_ev - > fd_events , fde ) ;
2007-01-05 12:35:49 +03:00
epoll_add_event ( epoll_ev , fde ) ;
return fde ;
}
/*
return the fd event flags
*/
2008-12-29 22:24:57 +03:00
static uint16_t epoll_event_get_fd_flags ( struct tevent_fd * fde )
2007-01-05 12:35:49 +03:00
{
return fde - > flags ;
}
/*
set the fd event flags
*/
2008-12-29 22:24:57 +03:00
static void epoll_event_set_fd_flags ( struct tevent_fd * fde , uint16_t flags )
2007-01-05 12:35:49 +03:00
{
2008-12-29 22:24:57 +03:00
struct tevent_context * ev ;
2007-01-05 12:35:49 +03:00
struct epoll_event_context * epoll_ev ;
if ( fde - > flags = = flags ) return ;
ev = fde - > event_ctx ;
epoll_ev = talloc_get_type ( ev - > additional_data , struct epoll_event_context ) ;
fde - > flags = flags ;
2007-05-17 11:52:33 +04:00
epoll_check_reopen ( epoll_ev ) ;
2007-01-05 12:35:49 +03:00
epoll_change_event ( epoll_ev , fde ) ;
}
/*
do a single event loop using the events defined in ev
*/
2008-12-29 22:24:57 +03:00
static int epoll_event_loop_once ( struct tevent_context * ev )
2007-01-05 12:35:49 +03:00
{
struct epoll_event_context * epoll_ev = talloc_get_type ( ev - > additional_data ,
struct epoll_event_context ) ;
struct timeval tval ;
2009-01-02 15:26:32 +03:00
tval = tevent_common_loop_timer_delay ( ev ) ;
2008-06-14 19:23:31 +04:00
if ( ev_timeval_is_zero ( & tval ) ) {
2007-01-05 12:35:49 +03:00
return 0 ;
}
2007-05-17 11:52:33 +04:00
epoll_check_reopen ( epoll_ev ) ;
2007-01-05 12:35:49 +03:00
return epoll_event_loop ( epoll_ev , & tval ) ;
}
/*
return on failure or ( with 0 ) if all fd events are removed
*/
2008-12-29 22:24:57 +03:00
static int epoll_event_loop_wait ( struct tevent_context * ev )
2007-01-05 12:35:49 +03:00
{
struct epoll_event_context * epoll_ev = talloc_get_type ( ev - > additional_data ,
struct epoll_event_context ) ;
while ( epoll_ev - > num_fd_events ) {
if ( epoll_event_loop_once ( ev ) ! = 0 ) {
break ;
}
}
return 0 ;
}
2009-01-02 15:35:32 +03:00
static const struct tevent_ops epoll_event_ops = {
2007-01-05 12:35:49 +03:00
. context_init = epoll_event_context_init ,
. add_fd = epoll_event_add_fd ,
. get_fd_flags = epoll_event_get_fd_flags ,
. set_fd_flags = epoll_event_set_fd_flags ,
2009-01-02 15:26:32 +03:00
. add_timer = tevent_common_add_timer ,
. add_signal = tevent_common_add_signal ,
2007-01-05 12:35:49 +03:00
. loop_once = epoll_event_loop_once ,
. loop_wait = epoll_event_loop_wait ,
} ;
2009-01-02 15:35:32 +03:00
bool tevent_epoll_init ( void )
2007-01-05 12:35:49 +03:00
{
2009-01-02 15:35:32 +03:00
return tevent_register_backend ( " epoll " , & epoll_event_ops ) ;
2007-01-05 12:35:49 +03:00
}