1
0
mirror of https://github.com/samba-team/samba.git synced 2025-01-25 06:04:04 +03:00

make the running of the takeover and release event scripts async, to prevent outages due to slow scripts

(This used to be ctdb commit 4189be97eee7ab2a50335c860f2fcd9566667d01)
This commit is contained in:
Andrew Tridgell 2007-06-01 19:05:41 +10:00
parent 869d70d9c9
commit 7db1d04d5c
6 changed files with 165 additions and 55 deletions

View File

@ -41,7 +41,7 @@ int ctdb_set_transport(struct ctdb_context *ctdb, const char *transport)
int ctdb_set_logfile(struct ctdb_context *ctdb, const char *logfile)
{
ctdb->logfile = talloc_strdup(ctdb, logfile);
if (ctdb->logfile != NULL) {
if (ctdb->logfile != NULL && strcmp(logfile, "-") != 0) {
int fd;
close(1);
close(2);

View File

@ -265,11 +265,11 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb,
case CTDB_CONTROL_TAKEOVER_IP:
CHECK_CONTROL_DATA_SIZE(sizeof(struct sockaddr));
return ctdb_control_takeover_ip(ctdb, indata);
return ctdb_control_takeover_ip(ctdb, c, indata, async_reply);
case CTDB_CONTROL_RELEASE_IP:
CHECK_CONTROL_DATA_SIZE(sizeof(struct sockaddr));
return ctdb_control_release_ip(ctdb, indata);
return ctdb_control_release_ip(ctdb, c, indata, async_reply);
case CTDB_CONTROL_DELETE_LOW_RSN:
CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_control_delete_low_rsn));

View File

@ -32,7 +32,7 @@
static void daemon_incoming_packet(void *, struct ctdb_req_header *);
/* called when the "startup" event script has finished */
static void ctdb_start_transport(struct ctdb_context *ctdb, int status)
static void ctdb_start_transport(struct ctdb_context *ctdb, int status, void *p)
{
if (status != 0) {
DEBUG(0,("startup event failed!\n"));
@ -87,7 +87,8 @@ static void ctdb_main_loop(struct ctdb_context *ctdb)
CTDB_CTRL_FLAG_NOREPLY,
tdb_null, NULL, NULL);
ret = ctdb_event_script_callback(ctdb, ctdb_start_transport, "startup");
ret = ctdb_event_script_callback(ctdb, ctdb,
ctdb_start_transport, NULL, "startup");
if (ret != 0) {
DEBUG(0,("Failed startup event script\n"));
return;

View File

@ -925,10 +925,16 @@ int ctdb_ctrl_set_rsn_nonempty(struct ctdb_context *ctdb, struct timeval timeout
int ctdb_ctrl_delete_low_rsn(struct ctdb_context *ctdb, struct timeval timeout,
uint32_t destnode, uint32_t db_id, uint64_t rsn);
void ctdb_set_realtime(void);
int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb, TDB_DATA indata);
int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
struct ctdb_req_control *c,
TDB_DATA indata,
bool *async_reply);
int ctdb_ctrl_takeover_ip(struct ctdb_context *ctdb, struct timeval timeout,
uint32_t destnode, const char *ip);
int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, TDB_DATA indata);
int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
struct ctdb_req_control *c,
TDB_DATA indata,
bool *async_reply);
int ctdb_ctrl_release_ip(struct ctdb_context *ctdb, struct timeval timeout,
uint32_t destnode, const char *ip);
@ -951,8 +957,10 @@ int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t vnn);
void ctdb_takeover_client_destructor_hook(struct ctdb_client *client);
int ctdb_event_script(struct ctdb_context *ctdb, const char *fmt, ...) PRINTF_ATTRIBUTE(2,3);
int ctdb_event_script_callback(struct ctdb_context *ctdb,
void (*callback)(struct ctdb_context *, int),
const char *fmt, ...) PRINTF_ATTRIBUTE(3,4);
TALLOC_CTX *mem_ctx,
void (*callback)(struct ctdb_context *, int, void *),
void *private_data,
const char *fmt, ...) PRINTF_ATTRIBUTE(5,6);
void ctdb_release_all_ips(struct ctdb_context *ctdb);
void set_nonblocking(int fd);

View File

@ -91,52 +91,48 @@ static void ctdb_control_send_arp(struct event_context *ev, struct timed_event *
ctdb_control_send_arp, arp);
}
struct takeover_callback_state {
struct ctdb_req_control *c;
struct sockaddr_in *sin;
};
/*
take over an ip address
called when takeip event finishes
*/
int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb, TDB_DATA indata)
static void takeover_ip_callback(struct ctdb_context *ctdb, int status,
void *private_data)
{
int ret;
struct sockaddr_in *sin = (struct sockaddr_in *)indata.dptr;
struct takeover_callback_state *state =
talloc_get_type(private_data, struct takeover_callback_state);
struct ctdb_takeover_arp *arp;
char *ip = inet_ntoa(sin->sin_addr);
char *ip = inet_ntoa(state->sin->sin_addr);
struct ctdb_tcp_list *tcp;
if (ctdb_sys_have_ip(ip)) {
return 0;
}
DEBUG(0,("Takover of IP %s/%u on interface %s\n",
ip, ctdb->nodes[ctdb->vnn]->public_netmask_bits,
ctdb->takeover.interface));
ret = ctdb_event_script(ctdb, "takeip %s %s %u",
ctdb->takeover.interface,
ip,
ctdb->nodes[ctdb->vnn]->public_netmask_bits);
if (ret != 0) {
if (status != 0) {
DEBUG(0,(__location__ " Failed to takeover IP %s on interface %s\n",
ip, ctdb->takeover.interface));
return -1;
ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
talloc_free(state);
return;
}
if (!ctdb->takeover.last_ctx) {
ctdb->takeover.last_ctx = talloc_new(ctdb);
CTDB_NO_MEMORY(ctdb, ctdb->takeover.last_ctx);
if (!ctdb->takeover.last_ctx) goto failed;
}
arp = talloc_zero(ctdb->takeover.last_ctx, struct ctdb_takeover_arp);
CTDB_NO_MEMORY(ctdb, arp);
if (!arp) goto failed;
arp->ctdb = ctdb;
arp->sin = *sin;
arp->sin = *state->sin;
/* add all of the known tcp connections for this IP to the
list of tcp connections to send tickle acks for */
for (tcp=ctdb->tcp_list;tcp;tcp=tcp->next) {
if (sin->sin_addr.s_addr == tcp->daddr.sin_addr.s_addr) {
if (state->sin->sin_addr.s_addr == tcp->daddr.sin_addr.s_addr) {
struct ctdb_tcp_list *t2 = talloc(arp, struct ctdb_tcp_list);
CTDB_NO_MEMORY(ctdb, t2);
if (t2 == NULL) goto failed;
*t2 = *tcp;
DLIST_ADD(arp->tcp_list, t2);
}
@ -145,42 +141,78 @@ int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb, TDB_DATA indata)
event_add_timed(arp->ctdb->ev, arp->ctdb->takeover.last_ctx,
timeval_zero(), ctdb_control_send_arp, arp);
return ret;
/* the control succeeded */
ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
talloc_free(state);
return;
failed:
ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
talloc_free(state);
return;
}
/*
release an ip address
take over an ip address
*/
int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, TDB_DATA indata)
int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
struct ctdb_req_control *c,
TDB_DATA indata,
bool *async_reply)
{
struct sockaddr_in *sin = (struct sockaddr_in *)indata.dptr;
TDB_DATA data;
char *ip = inet_ntoa(sin->sin_addr);
int ret;
struct ctdb_tcp_list *tcp;
struct takeover_callback_state *state;
struct sockaddr_in *sin = (struct sockaddr_in *)indata.dptr;
char *ip = inet_ntoa(sin->sin_addr);
if (!ctdb_sys_have_ip(ip)) {
/* if our kernel already has this IP, do nothing */
if (ctdb_sys_have_ip(ip)) {
return 0;
}
DEBUG(0,("Release of IP %s/%u on interface %s\n",
state = talloc(ctdb, struct takeover_callback_state);
CTDB_NO_MEMORY(ctdb, state);
state->c = talloc_steal(ctdb, c);
state->sin = talloc(ctdb, struct sockaddr_in);
CTDB_NO_MEMORY(ctdb, state->sin);
*state->sin = *(struct sockaddr_in *)indata.dptr;
DEBUG(0,("Takover of IP %s/%u on interface %s\n",
ip, ctdb->nodes[ctdb->vnn]->public_netmask_bits,
ctdb->takeover.interface));
/* stop any previous arps */
talloc_free(ctdb->takeover.last_ctx);
ctdb->takeover.last_ctx = NULL;
ret = ctdb_event_script(ctdb, "releaseip %s %s %u",
ret = ctdb_event_script_callback(ctdb, state, takeover_ip_callback, state,
"takeip %s %s %u",
ctdb->takeover.interface,
ip,
ctdb->nodes[ctdb->vnn]->public_netmask_bits);
if (ret != 0) {
DEBUG(0,(__location__ " Failed to release IP %s on interface %s\n",
DEBUG(0,(__location__ " Failed to takeover IP %s on interface %s\n",
ip, ctdb->takeover.interface));
talloc_free(state);
return -1;
}
/* tell ctdb_control.c that we will be replying asynchronously */
*async_reply = true;
return 0;
}
/*
called when releaseip event finishes
*/
static void release_ip_callback(struct ctdb_context *ctdb, int status,
void *private_data)
{
struct takeover_callback_state *state =
talloc_get_type(private_data, struct takeover_callback_state);
char *ip = inet_ntoa(state->sin->sin_addr);
TDB_DATA data;
struct ctdb_tcp_list *tcp;
/* send a message to all clients of this node telling them
that the cluster has been reconfigured and they should
release any sockets on this IP */
@ -192,7 +224,7 @@ int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, TDB_DATA indata)
/* tell other nodes about any tcp connections we were holding with this IP */
for (tcp=ctdb->tcp_list;tcp;tcp=tcp->next) {
if (tcp->vnn == ctdb->vnn &&
sin->sin_addr.s_addr == tcp->daddr.sin_addr.s_addr) {
state->sin->sin_addr.s_addr == tcp->daddr.sin_addr.s_addr) {
struct ctdb_control_tcp_vnn t;
t.vnn = ctdb->vnn;
@ -208,6 +240,59 @@ int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, TDB_DATA indata)
}
}
/* the control succeeded */
ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
talloc_free(state);
}
/*
release an ip address
*/
int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
struct ctdb_req_control *c,
TDB_DATA indata,
bool *async_reply)
{
int ret;
struct takeover_callback_state *state;
struct sockaddr_in *sin = (struct sockaddr_in *)indata.dptr;
char *ip = inet_ntoa(sin->sin_addr);
if (!ctdb_sys_have_ip(ip)) {
return 0;
}
DEBUG(0,("Release of IP %s/%u on interface %s\n",
ip, ctdb->nodes[ctdb->vnn]->public_netmask_bits,
ctdb->takeover.interface));
/* stop any previous arps */
talloc_free(ctdb->takeover.last_ctx);
ctdb->takeover.last_ctx = NULL;
state = talloc(ctdb, struct takeover_callback_state);
CTDB_NO_MEMORY(ctdb, state);
state->c = talloc_steal(state, c);
state->sin = talloc(state, struct sockaddr_in);
CTDB_NO_MEMORY(ctdb, state->sin);
*state->sin = *(struct sockaddr_in *)indata.dptr;
ret = ctdb_event_script_callback(ctdb, state, release_ip_callback, state,
"releaseip %s %s %u",
ctdb->takeover.interface,
ip,
ctdb->nodes[ctdb->vnn]->public_netmask_bits);
if (ret != 0) {
DEBUG(0,(__location__ " Failed to release IP %s on interface %s\n",
ip, ctdb->takeover.interface));
talloc_free(state);
return -1;
}
/* tell the control that we will be reply asynchronously */
*async_reply = true;
return 0;
}

View File

@ -312,8 +312,9 @@ int ctdb_event_script(struct ctdb_context *ctdb, const char *fmt, ...)
struct ctdb_event_script_state {
struct ctdb_context *ctdb;
pid_t child;
void (*callback)(struct ctdb_context *, int);
void (*callback)(struct ctdb_context *, int, void *);
int fd[2];
void *private_data;
};
/* called when child is finished */
@ -327,28 +328,41 @@ static void ctdb_event_script_handler(struct event_context *ev, struct fd_event
if (status != -1) {
status = WEXITSTATUS(status);
}
state->callback(state->ctdb, status);
state->callback(state->ctdb, status, state->private_data);
talloc_set_destructor(state, NULL);
talloc_free(state);
}
/*
destroy a running event script
*/
static int event_script_destructor(struct ctdb_event_script_state *state)
{
kill(state->child, SIGKILL);
waitpid(state->child, NULL, 0);
return 0;
}
/*
run the event script in the background, calling the callback when
finished
*/
int ctdb_event_script_callback(struct ctdb_context *ctdb,
void (*callback)(struct ctdb_context *, int),
TALLOC_CTX *mem_ctx,
void (*callback)(struct ctdb_context *, int, void *),
void *private_data,
const char *fmt, ...)
{
struct ctdb_event_script_state *state;
va_list ap;
int ret;
state = talloc(ctdb, struct ctdb_event_script_state);
state = talloc(mem_ctx, struct ctdb_event_script_state);
CTDB_NO_MEMORY(ctdb, state);
state->ctdb = ctdb;
state->callback = callback;
state->private_data = private_data;
ret = pipe(state->fd);
if (ret != 0) {
@ -373,6 +387,8 @@ int ctdb_event_script_callback(struct ctdb_context *ctdb,
_exit(ret);
}
talloc_set_destructor(state, event_script_destructor);
close(state->fd[1]);
event_add_fd(ctdb->ev, state, state->fd[0], EVENT_FD_READ|EVENT_FD_AUTOCLOSE,