1
0
mirror of https://github.com/samba-team/samba.git synced 2024-12-23 17:34:34 +03:00

s3: Add parameter "ctdb timeout"

When something in the cluster blocks, it can happen that we wait indefinitely
long for ctdb, just adding to the blocking condition. In theory, nothing should
block, but as someone said "In practice the difference between theory and
practice is larger than in theory". This adds a timeout parameter in seconds,
after which we stop waiting for ctdb and panic.
This commit is contained in:
Volker Lendecke 2009-11-03 05:41:02 +01:00
parent 49397a8b3e
commit d415d4d32f
6 changed files with 73 additions and 5 deletions

View File

@ -0,0 +1,37 @@
<samba:parameter name="ctdb timeout"
context="G"
type="integer"
advanced="1"
xmlns:samba="http://www.samba.org/samba/DTD/samba-doc">
<description>
<para>This parameter specifies a timeout in seconds for the
connection between Samba and ctdb. It is only valid if you
have compiled Samba with clustering and if you have
set <parameter>clustering=yes</parameter>.
</para>
<para>When something in the cluster blocks, it can happen that
we wait indefinitely long for ctdb, just adding to the
blocking condition. In a well-running cluster this should
never happen, but there are too many components in a cluster
that might have hickups. Choosing the right balance for this
value is very tricky, because on a busy cluster long service
times to transfer something across the cluster might be
valid. Setting it too short will degrade the service your
cluster presents, setting it too long might make the cluster
itself not recover from something severely broken for too
long.
</para>
<para>
Be aware that if you set this parameter, this needs to be in
the file smb.conf, it is not really helpful to put this into
a registry configuration (typical on a cluster), because to
access the registry contact to ctdb is requred.
</para>
<para>Setting <parameter>ctdb timeout</parameter> to n makes
any process waiting longer than n seconds for a reply by the
cluster panic. Setting it to 0 (the default) makes Samba
block forever, which is the highly recommended default.
</para>
</description>
<value type="default">0</value>
</samba:parameter>

View File

@ -38,7 +38,8 @@ NTSTATUS packet_fd_read(struct packet_context *ctx);
/*
* Sync read, wait for the next chunk
*/
NTSTATUS packet_fd_read_sync(struct packet_context *ctx);
NTSTATUS packet_fd_read_sync(struct packet_context *ctx,
struct timeval *timeout);
/*
* Handle an incoming packet:

View File

@ -4130,6 +4130,7 @@ int lp_cups_connection_timeout(void);
const char *lp_ctdbd_socket(void);
const char **lp_cluster_addresses(void);
bool lp_clustering(void);
int lp_ctdb_timeout(void);
char *lp_printcommand(int );
char *lp_lpqcommand(int );
char *lp_lprmcommand(int );

View File

@ -275,6 +275,17 @@ static struct messaging_rec *ctdb_pull_messaging_rec(TALLOC_CTX *mem_ctx,
return result;
}
static NTSTATUS ctdb_packet_fd_read_sync(struct packet_context *ctx)
{
struct timeval timeout;
struct timeval *ptimeout;
timeout = timeval_set(lp_ctdb_timeout(), 0);
ptimeout = (timeout.tv_sec != 0) ? &timeout : NULL;
return packet_fd_read_sync(ctx, ptimeout);
}
/*
* Read a full ctdbd request. If we have a messaging context, defer incoming
* messages that might come in between.
@ -289,7 +300,7 @@ static NTSTATUS ctdb_read_req(struct ctdbd_connection *conn, uint32 reqid,
again:
status = packet_fd_read_sync(conn->pkt);
status = ctdb_packet_fd_read_sync(conn->pkt);
if (NT_STATUS_EQUAL(status, NT_STATUS_NETWORK_BUSY)) {
/* EAGAIN */
@ -1156,7 +1167,7 @@ NTSTATUS ctdbd_traverse(uint32 db_id,
break;
}
status = packet_fd_read_sync(conn->pkt);
status = ctdb_packet_fd_read_sync(conn->pkt);
if (NT_STATUS_EQUAL(status, NT_STATUS_RETRY)) {
/*

View File

@ -101,7 +101,8 @@ NTSTATUS packet_fd_read(struct packet_context *ctx)
return NT_STATUS_OK;
}
NTSTATUS packet_fd_read_sync(struct packet_context *ctx)
NTSTATUS packet_fd_read_sync(struct packet_context *ctx,
struct timeval *timeout)
{
int res;
fd_set r_fds;
@ -109,7 +110,12 @@ NTSTATUS packet_fd_read_sync(struct packet_context *ctx)
FD_ZERO(&r_fds);
FD_SET(ctx->fd, &r_fds);
res = sys_select(ctx->fd+1, &r_fds, NULL, NULL, NULL);
res = sys_select(ctx->fd+1, &r_fds, NULL, NULL, timeout);
if (res == 0) {
DEBUG(10, ("select timed out\n"));
return NT_STATUS_IO_TIMEOUT;
}
if (res == -1) {
DEBUG(10, ("select returned %s\n", strerror(errno)));

View File

@ -271,6 +271,7 @@ struct global {
char *ctdbdSocket;
char **szClusterAddresses;
bool clustering;
int ctdb_timeout;
int ldap_passwd_sync;
int ldap_replication_sleep;
int ldap_timeout; /* This is initialised in init_globals */
@ -2541,6 +2542,15 @@ static struct parm_struct parm_table[] = {
.enum_list = NULL,
.flags = FLAG_ADVANCED | FLAG_GLOBAL,
},
{
.label = "ctdb timeout",
.type = P_INTEGER,
.p_class = P_GLOBAL,
.ptr = &Globals.ctdb_timeout,
.special = NULL,
.enum_list = NULL,
.flags = FLAG_ADVANCED | FLAG_GLOBAL,
},
{N_("Printing Options"), P_SEP, P_SEPARATOR},
@ -5107,6 +5117,7 @@ static void init_globals(bool first_time_only)
string_set(&Globals.ctdbdSocket, "");
Globals.szClusterAddresses = NULL;
Globals.clustering = False;
Globals.ctdb_timeout = 0;
Globals.winbind_cache_time = 300; /* 5 minutes */
Globals.winbind_reconnect_delay = 30; /* 30 seconds */
@ -5557,6 +5568,7 @@ FN_GLOBAL_INTEGER(lp_cups_connection_timeout, &Globals.cups_connection_timeout)
FN_GLOBAL_CONST_STRING(lp_ctdbd_socket, &Globals.ctdbdSocket)
FN_GLOBAL_LIST(lp_cluster_addresses, &Globals.szClusterAddresses)
FN_GLOBAL_BOOL(lp_clustering, &Globals.clustering)
FN_GLOBAL_INTEGER(lp_ctdb_timeout, &Globals.ctdb_timeout)
FN_LOCAL_STRING(lp_printcommand, szPrintcommand)
FN_LOCAL_STRING(lp_lpqcommand, szLpqcommand)
FN_LOCAL_STRING(lp_lprmcommand, szLprmcommand)