1
0
mirror of https://github.com/samba-team/samba.git synced 2025-02-02 09:47:23 +03:00

better timeout handling for calls, controls and traverses

(This used to be ctdb commit 63346a6c59d4821b4c443939b5d88db8cd20f5fe)
This commit is contained in:
Andrew Tridgell 2007-05-10 14:06:48 +10:00
parent 31cd92dc7e
commit 15bc97cdaa
6 changed files with 84 additions and 28 deletions

View File

@ -613,17 +613,36 @@ static int ctdb_call_destructor(struct ctdb_call_state *state)
/*
called when a ctdb_call times out
*/
void ctdb_call_timeout(struct event_context *ev, struct timed_event *te,
struct timeval t, void *private_data)
static void ctdb_call_timeout(struct event_context *ev, struct timed_event *te,
struct timeval t, void *private_data)
{
struct ctdb_call_state *state = talloc_get_type(private_data, struct ctdb_call_state);
DEBUG(0,(__location__ " call timeout for reqid %d\n", state->c->hdr.reqid));
state->state = CTDB_CALL_ERROR;
ctdb_set_error(state->ctdb_db->ctdb, "ctdb_call %u timed out",
state->c->hdr.reqid);
if (state->async.fn) {
state->async.fn(state);
struct ctdb_context *ctdb = state->ctdb_db->ctdb;
ctdb->status.timeouts.call++;
event_add_timed(ctdb->ev, state, timeval_current_ofs(CTDB_CALL_TIMEOUT, 0),
ctdb_call_timeout, state);
if (ctdb->vnn_map->generation == state->generation ||
ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
/* the call is just being slow, or we are curently
recovering, give it more time */
return;
}
/* the generation count changed - the call must be re-issued */
state->generation = ctdb->vnn_map->generation;
/* use a new reqid, in case the old reply does eventually come in */
ctdb_reqid_remove(ctdb, state->reqid);
state->reqid = ctdb_reqid_new(ctdb, state);
state->c->hdr.reqid = state->reqid;
/* send the packet to ourselves, it will be redirected appropriately */
state->c->hdr.destnode = ctdb->vnn;
ctdb_queue_packet(ctdb, &state->c->hdr);
}
/*
@ -697,15 +716,6 @@ struct ctdb_call_state *ctdb_daemon_call_send_remote(struct ctdb_db_context *ctd
CTDB_NO_MEMORY_NULL(ctdb, state->c);
state->c->hdr.destnode = header->dmaster;
#if 0
/*always sending the remote call straight to the lmaster
improved performance slightly in some tests.
worth investigating further in the future
*/
state->c->hdr.destnode = ctdb_lmaster(ctdb_db->ctdb, &(call->key));
#endif
/* this limits us to 16k outstanding messages - not unreasonable */
state->c->hdr.reqid = state->reqid;
state->c->flags = call->flags;
@ -723,13 +733,12 @@ struct ctdb_call_state *ctdb_daemon_call_send_remote(struct ctdb_db_context *ctd
state->state = CTDB_CALL_WAIT;
state->ctdb_db = ctdb_db;
state->generation = ctdb->vnn_map->generation;
ctdb_queue_packet(ctdb, &state->c->hdr);
#if CTDB_REQ_TIMEOUT
event_add_timed(ctdb->ev, state, timeval_current_ofs(CTDB_REQ_TIMEOUT, 0),
event_add_timed(ctdb->ev, state, timeval_current_ofs(CTDB_CALL_TIMEOUT, 0),
ctdb_call_timeout, state);
#endif
return state;
}

View File

@ -531,6 +531,21 @@ static int ctdb_control_destructor(struct ctdb_control_state *state)
return 0;
}
/*
handle a timeout of a control
*/
static void ctdb_control_timeout(struct event_context *ev, struct timed_event *te,
struct timeval t, void *private_data)
{
struct ctdb_control_state *state = talloc_get_type(private_data, struct ctdb_control_state);
state->ctdb->status.timeouts.control++;
state->callback(state->ctdb, -1, tdb_null, state->private_data);
talloc_free(state);
}
/*
send a control message to a node
*/
@ -586,8 +601,8 @@ int ctdb_daemon_send_control(struct ctdb_context *ctdb, uint32_t destnode,
return 0;
}
#if CTDB_REQ_TIMEOUT
event_add_timed(ctdb->ev, state, timeval_current_ofs(CTDB_REQ_TIMEOUT, 0),
#if CTDB_CONTROL_TIMEOUT
event_add_timed(ctdb->ev, state, timeval_current_ofs(CTDB_CONTROL_TIMEOUT, 0),
ctdb_control_timeout, state);
#endif

View File

@ -147,8 +147,6 @@ struct ctdb_traverse_local_handle *ctdb_traverse_local(struct ctdb_db_context *c
struct ctdb_traverse_local_handle *h;
int ret;
ctdb_db->ctdb->status.traverse_calls++;
h = talloc_zero(ctdb_db, struct ctdb_traverse_local_handle);
if (h == NULL) {
return NULL;
@ -223,6 +221,18 @@ struct ctdb_traverse_all {
uint32_t vnn;
};
/* called when a traverse times out */
static void ctdb_traverse_all_timeout(struct event_context *ev, struct timed_event *te,
struct timeval t, void *private_data)
{
struct ctdb_traverse_all_handle *state = talloc_get_type(private_data, struct ctdb_traverse_all_handle);
state->ctdb->status.timeouts.traverse++;
state->callback(state->private_data, tdb_null, tdb_null);
talloc_free(state);
}
/*
setup a cluster-wide non-blocking traverse of a ctdb. The
callback function will be called on every record in the local
@ -269,6 +279,10 @@ struct ctdb_traverse_all_handle *ctdb_daemon_traverse_all(struct ctdb_db_context
return NULL;
}
/* timeout the traverse */
event_add_timed(ctdb->ev, state, timeval_current_ofs(CTDB_TRAVERSE_TIMEOUT, 0),
ctdb_traverse_all_timeout, state);
return state;
}

View File

@ -21,6 +21,7 @@
#include "includes.h"
#include "lib/events/events.h"
#include "system/filesys.h"
#include "system/time.h"
#include "popt.h"
#include "cmdline.h"
#include "../include/ctdb.h"

View File

@ -175,10 +175,14 @@ struct ctdb_status {
uint32_t register_srvid;
uint32_t deregister_srvid;
} controls;
struct {
uint32_t call;
uint32_t control;
uint32_t traverse;
} timeouts;
uint32_t total_calls;
uint32_t pending_calls;
uint32_t lockwait_calls;
uint32_t traverse_calls;
uint32_t pending_lockwait_calls;
uint32_t memory_used;
uint32_t __last_counter; /* hack for control_status_all */
@ -279,8 +283,18 @@ struct ctdb_db_context {
ctdb_fatal(ctdb, "Out of memory in " __location__ ); \
}} while (0)
/* arbitrary maximum timeout for ctdb operations */
#define CTDB_REQ_TIMEOUT 0
/* timeout for ctdb call operations. When this timeout expires we
check if the generation count has changed, and if it has then
re-issue the call */
#define CTDB_CALL_TIMEOUT 2
/* timeout for ctdb control calls */
#define CTDB_CONTROL_TIMEOUT 10
/* timeout for ctdb traverse calls. When this is reached we cut short
the traverse */
#define CTDB_TRAVERSE_TIMEOUT 20
/* number of consecutive calls from the same node before we give them
the record */
@ -356,6 +370,7 @@ struct ctdb_call_state {
struct ctdb_db_context *ctdb_db;
const char *errmsg;
struct ctdb_call call;
uint32_t generation;
struct {
void (*fn)(struct ctdb_call_state *);
void *private_data;

View File

@ -135,10 +135,12 @@ static void show_status(struct ctdb_status *s)
STATUS_FIELD(controls.set_seqnum_frequency),
STATUS_FIELD(controls.register_srvid),
STATUS_FIELD(controls.deregister_srvid),
STATUS_FIELD(timeouts.call),
STATUS_FIELD(timeouts.control),
STATUS_FIELD(timeouts.traverse),
STATUS_FIELD(total_calls),
STATUS_FIELD(pending_calls),
STATUS_FIELD(lockwait_calls),
STATUS_FIELD(traverse_calls),
STATUS_FIELD(pending_lockwait_calls),
STATUS_FIELD(memory_used),
STATUS_FIELD(max_hop_count),