mirror of
https://github.com/samba-team/samba.git
synced 2025-01-08 21:18:16 +03:00
lib/tsocket: avoid endless cpu-spinning in tstream_bsd_fde_handler()
There were some reports that strace output an LDAP server socket is in CLOSE_WAIT state, returning EAGAIN for writev over and over (after a call to epoll() each time). In the tstream_bsd code the problem happens when we have a pending writev_send, while there's no readv_send pending. In that case we still ask for TEVENT_FD_READ in order to notice connection errors early, so we try to call writev even if the socket doesn't report TEVENT_FD_WRITE. And there are situations where we do that over and over again. It happens like this with a Linux kernel: tcp_fin() has this: struct tcp_sock *tp = tcp_sk(sk); inet_csk_schedule_ack(sk); sk->sk_shutdown |= RCV_SHUTDOWN; sock_set_flag(sk, SOCK_DONE); switch (sk->sk_state) { case TCP_SYN_RECV: case TCP_ESTABLISHED: /* Move to CLOSE_WAIT */ tcp_set_state(sk, TCP_CLOSE_WAIT); inet_csk_enter_pingpong_mode(sk); break; It means RCV_SHUTDOWN gets set as well as TCP_CLOSE_WAIT, but sk->sk_err is not changed to indicate an error. tcp_sendmsg_locked has this: ... err = -EPIPE; if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) goto do_error; while (msg_data_left(msg)) { int copy = 0; skb = tcp_write_queue_tail(sk); if (skb) copy = size_goal - skb->len; if (copy <= 0 || !tcp_skb_can_collapse_to(skb)) { bool first_skb; new_segment: if (!sk_stream_memory_free(sk)) goto wait_for_space; ... wait_for_space: set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); if (copied) tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH, size_goal); err = sk_stream_wait_memory(sk, &timeo); if (err != 0) goto do_error; It means if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) doesn't hit as we only have RCV_SHUTDOWN and sk_stream_wait_memory returns -EAGAIN. tcp_poll has this: if (sk->sk_shutdown & RCV_SHUTDOWN) mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP; So we'll get EPOLLIN | EPOLLRDNORM | EPOLLRDHUP triggering TEVENT_FD_READ and writev/sendmsg keeps getting EAGAIN. So we need to always clear TEVENT_FD_READ if we don't have readable handler in order to avoid burning cpu. But we turn it on again after a timeout of 1 second in order to monitor the error state of the connection. And now that our tsocket_bsd_error() helper checks for POLLRDHUP, we can check if the socket is in an error state before calling the writable handler when TEVENT_FD_READ was reported. Only on error we'll call the writable handler, which will pick the error without calling writev(). BUG: https://bugzilla.samba.org/show_bug.cgi?id=15202 Signed-off-by: Stefan Metzmacher <metze@samba.org> Reviewed-by: Ralph Boehme <slow@samba.org>
This commit is contained in:
parent
4c7e2b9b60
commit
e232ba946f
@ -1754,6 +1754,9 @@ struct tstream_bsd {
|
||||
void (*readable_handler)(void *private_data);
|
||||
void *writeable_private;
|
||||
void (*writeable_handler)(void *private_data);
|
||||
|
||||
struct tevent_context *error_ctx;
|
||||
struct tevent_timer *error_timer;
|
||||
};
|
||||
|
||||
bool tstream_bsd_optimize_readv(struct tstream_context *stream,
|
||||
@ -1775,6 +1778,28 @@ bool tstream_bsd_optimize_readv(struct tstream_context *stream,
|
||||
return old;
|
||||
}
|
||||
|
||||
static void tstream_bsd_error_timer(struct tevent_context *ev,
|
||||
struct tevent_timer *te,
|
||||
struct timeval current_time,
|
||||
void *private_data)
|
||||
{
|
||||
struct tstream_bsd *bsds =
|
||||
talloc_get_type(private_data,
|
||||
struct tstream_bsd);
|
||||
|
||||
TALLOC_FREE(bsds->error_timer);
|
||||
|
||||
/*
|
||||
* Turn on TEVENT_FD_READABLE() again
|
||||
* if we have a writeable_handler that
|
||||
* wants to monitor the connection
|
||||
* for errors.
|
||||
*/
|
||||
if (bsds->writeable_handler != NULL) {
|
||||
TEVENT_FD_READABLE(bsds->fde);
|
||||
}
|
||||
}
|
||||
|
||||
static void tstream_bsd_fde_handler(struct tevent_context *ev,
|
||||
struct tevent_fd *fde,
|
||||
uint16_t flags,
|
||||
@ -1789,11 +1814,74 @@ static void tstream_bsd_fde_handler(struct tevent_context *ev,
|
||||
}
|
||||
if (flags & TEVENT_FD_READ) {
|
||||
if (!bsds->readable_handler) {
|
||||
if (bsds->writeable_handler) {
|
||||
struct timeval recheck_time;
|
||||
|
||||
/*
|
||||
* In order to avoid cpu-spinning
|
||||
* we no longer want to get TEVENT_FD_READ
|
||||
*/
|
||||
TEVENT_FD_NOT_READABLE(bsds->fde);
|
||||
|
||||
if (!bsds->writeable_handler) {
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we have a writeable handler we
|
||||
* want that to report connection errors
|
||||
* early.
|
||||
*
|
||||
* So we check if the socket is in an
|
||||
* error state.
|
||||
*/
|
||||
if (bsds->error == 0) {
|
||||
int ret = tsocket_bsd_error(bsds->fd);
|
||||
|
||||
if (ret == -1) {
|
||||
bsds->error = errno;
|
||||
}
|
||||
}
|
||||
|
||||
if (bsds->error != 0) {
|
||||
/*
|
||||
* Let the writeable handler report the error
|
||||
*/
|
||||
bsds->writeable_handler(bsds->writeable_private);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Here we called TEVENT_FD_NOT_READABLE() without
|
||||
* calling into the writeable handler.
|
||||
*
|
||||
* So we may have to wait for the kernels tcp stack
|
||||
* to report TEVENT_FD_WRITE in order to let
|
||||
* make progress and turn on TEVENT_FD_READABLE()
|
||||
* again.
|
||||
*
|
||||
* As a fallback we use a timer that turns on
|
||||
* TEVENT_FD_READABLE() again after a timeout of
|
||||
* 1 second.
|
||||
*/
|
||||
|
||||
if (bsds->error_timer != NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
recheck_time = timeval_current_ofs(1, 0);
|
||||
bsds->error_timer = tevent_add_timer(bsds->error_ctx,
|
||||
bsds,
|
||||
recheck_time,
|
||||
tstream_bsd_error_timer,
|
||||
bsds);
|
||||
if (bsds->error_timer == NULL) {
|
||||
bsds->error = ENOMEM;
|
||||
/*
|
||||
* Let the writeable handler report the error
|
||||
*/
|
||||
bsds->writeable_handler(bsds->writeable_private);
|
||||
return;
|
||||
}
|
||||
TEVENT_FD_NOT_READABLE(bsds->fde);
|
||||
return;
|
||||
}
|
||||
bsds->readable_handler(bsds->readable_private);
|
||||
@ -1848,6 +1936,8 @@ static int tstream_bsd_set_readable_handler(struct tstream_bsd *bsds,
|
||||
TEVENT_FD_READABLE(bsds->fde);
|
||||
}
|
||||
|
||||
TALLOC_FREE(bsds->error_timer);
|
||||
|
||||
bsds->readable_handler = handler;
|
||||
bsds->readable_private = private_data;
|
||||
|
||||
@ -1870,7 +1960,8 @@ static int tstream_bsd_set_writeable_handler(struct tstream_bsd *bsds,
|
||||
bsds->writeable_handler = NULL;
|
||||
bsds->writeable_private = NULL;
|
||||
TEVENT_FD_NOT_WRITEABLE(bsds->fde);
|
||||
|
||||
TALLOC_FREE(bsds->error_timer);
|
||||
bsds->error_ctx = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1882,6 +1973,8 @@ static int tstream_bsd_set_writeable_handler(struct tstream_bsd *bsds,
|
||||
}
|
||||
bsds->event_ptr = NULL;
|
||||
TALLOC_FREE(bsds->fde);
|
||||
TALLOC_FREE(bsds->error_timer);
|
||||
bsds->error_ctx = NULL;
|
||||
}
|
||||
|
||||
if (tevent_fd_get_flags(bsds->fde) == 0) {
|
||||
@ -1907,6 +2000,7 @@ static int tstream_bsd_set_writeable_handler(struct tstream_bsd *bsds,
|
||||
|
||||
bsds->writeable_handler = handler;
|
||||
bsds->writeable_private = private_data;
|
||||
bsds->error_ctx = ev;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -2212,7 +2306,14 @@ static void tstream_bsd_writev_handler(void *private_data)
|
||||
}
|
||||
err = tsocket_bsd_error_from_errno(ret, errno, &retry);
|
||||
if (retry) {
|
||||
/* retry later */
|
||||
/*
|
||||
* retry later...
|
||||
*
|
||||
* make sure we also wait readable again
|
||||
* in order to notice errors early
|
||||
*/
|
||||
TEVENT_FD_READABLE(bsds->fde);
|
||||
TALLOC_FREE(bsds->error_timer);
|
||||
return;
|
||||
}
|
||||
if (err != 0) {
|
||||
@ -2238,7 +2339,13 @@ static void tstream_bsd_writev_handler(void *private_data)
|
||||
}
|
||||
|
||||
if (state->count > 0) {
|
||||
/* we have more to read */
|
||||
/*
|
||||
* we have more to write
|
||||
*
|
||||
* make sure we also wait readable again
|
||||
* in order to notice errors early
|
||||
*/
|
||||
TEVENT_FD_READABLE(bsds->fde);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -2286,6 +2393,8 @@ static struct tevent_req *tstream_bsd_disconnect_send(TALLOC_CTX *mem_ctx,
|
||||
goto post;
|
||||
}
|
||||
|
||||
TALLOC_FREE(bsds->error_timer);
|
||||
bsds->error_ctx = NULL;
|
||||
TALLOC_FREE(bsds->fde);
|
||||
ret = close(bsds->fd);
|
||||
bsds->fd = -1;
|
||||
@ -2328,6 +2437,8 @@ static const struct tstream_context_ops tstream_bsd_ops = {
|
||||
|
||||
static int tstream_bsd_destructor(struct tstream_bsd *bsds)
|
||||
{
|
||||
TALLOC_FREE(bsds->error_timer);
|
||||
bsds->error_ctx = NULL;
|
||||
TALLOC_FREE(bsds->fde);
|
||||
if (bsds->fd != -1) {
|
||||
close(bsds->fd);
|
||||
|
@ -1,6 +0,0 @@
|
||||
^samba.unittests.tsocket_tstream.test_tstream_disconnected_tcp_client_spin
|
||||
^samba.unittests.tsocket_tstream.test_tstream_disconnected_unix_client_spin
|
||||
^samba.unittests.tsocket_tstream.test_tstream_more_tcp_client_spin
|
||||
^samba.unittests.tsocket_tstream.test_tstream_more_unix_client_spin
|
||||
^samba.unittests.tsocket_tstream.test_tstream_more_disconnect_tcp_client_spin
|
||||
^samba.unittests.tsocket_tstream.test_tstream_more_disconnect_unix_client_spin
|
Loading…
Reference in New Issue
Block a user