mirror of
https://github.com/samba-team/samba.git
synced 2025-01-25 06:04:04 +03:00
make the running of the takeover and release event scripts async, to prevent outages due to slow scripts
(This used to be ctdb commit 4189be97eee7ab2a50335c860f2fcd9566667d01)
This commit is contained in:
parent
869d70d9c9
commit
7db1d04d5c
@ -41,7 +41,7 @@ int ctdb_set_transport(struct ctdb_context *ctdb, const char *transport)
|
||||
int ctdb_set_logfile(struct ctdb_context *ctdb, const char *logfile)
|
||||
{
|
||||
ctdb->logfile = talloc_strdup(ctdb, logfile);
|
||||
if (ctdb->logfile != NULL) {
|
||||
if (ctdb->logfile != NULL && strcmp(logfile, "-") != 0) {
|
||||
int fd;
|
||||
close(1);
|
||||
close(2);
|
||||
|
@ -265,11 +265,11 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb,
|
||||
|
||||
case CTDB_CONTROL_TAKEOVER_IP:
|
||||
CHECK_CONTROL_DATA_SIZE(sizeof(struct sockaddr));
|
||||
return ctdb_control_takeover_ip(ctdb, indata);
|
||||
return ctdb_control_takeover_ip(ctdb, c, indata, async_reply);
|
||||
|
||||
case CTDB_CONTROL_RELEASE_IP:
|
||||
CHECK_CONTROL_DATA_SIZE(sizeof(struct sockaddr));
|
||||
return ctdb_control_release_ip(ctdb, indata);
|
||||
return ctdb_control_release_ip(ctdb, c, indata, async_reply);
|
||||
|
||||
case CTDB_CONTROL_DELETE_LOW_RSN:
|
||||
CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_control_delete_low_rsn));
|
||||
|
@ -32,7 +32,7 @@
|
||||
static void daemon_incoming_packet(void *, struct ctdb_req_header *);
|
||||
|
||||
/* called when the "startup" event script has finished */
|
||||
static void ctdb_start_transport(struct ctdb_context *ctdb, int status)
|
||||
static void ctdb_start_transport(struct ctdb_context *ctdb, int status, void *p)
|
||||
{
|
||||
if (status != 0) {
|
||||
DEBUG(0,("startup event failed!\n"));
|
||||
@ -87,7 +87,8 @@ static void ctdb_main_loop(struct ctdb_context *ctdb)
|
||||
CTDB_CTRL_FLAG_NOREPLY,
|
||||
tdb_null, NULL, NULL);
|
||||
|
||||
ret = ctdb_event_script_callback(ctdb, ctdb_start_transport, "startup");
|
||||
ret = ctdb_event_script_callback(ctdb, ctdb,
|
||||
ctdb_start_transport, NULL, "startup");
|
||||
if (ret != 0) {
|
||||
DEBUG(0,("Failed startup event script\n"));
|
||||
return;
|
||||
|
@ -925,10 +925,16 @@ int ctdb_ctrl_set_rsn_nonempty(struct ctdb_context *ctdb, struct timeval timeout
|
||||
int ctdb_ctrl_delete_low_rsn(struct ctdb_context *ctdb, struct timeval timeout,
|
||||
uint32_t destnode, uint32_t db_id, uint64_t rsn);
|
||||
void ctdb_set_realtime(void);
|
||||
int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb, TDB_DATA indata);
|
||||
int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
|
||||
struct ctdb_req_control *c,
|
||||
TDB_DATA indata,
|
||||
bool *async_reply);
|
||||
int ctdb_ctrl_takeover_ip(struct ctdb_context *ctdb, struct timeval timeout,
|
||||
uint32_t destnode, const char *ip);
|
||||
int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, TDB_DATA indata);
|
||||
int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
|
||||
struct ctdb_req_control *c,
|
||||
TDB_DATA indata,
|
||||
bool *async_reply);
|
||||
int ctdb_ctrl_release_ip(struct ctdb_context *ctdb, struct timeval timeout,
|
||||
uint32_t destnode, const char *ip);
|
||||
|
||||
@ -951,8 +957,10 @@ int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t vnn);
|
||||
void ctdb_takeover_client_destructor_hook(struct ctdb_client *client);
|
||||
int ctdb_event_script(struct ctdb_context *ctdb, const char *fmt, ...) PRINTF_ATTRIBUTE(2,3);
|
||||
int ctdb_event_script_callback(struct ctdb_context *ctdb,
|
||||
void (*callback)(struct ctdb_context *, int),
|
||||
const char *fmt, ...) PRINTF_ATTRIBUTE(3,4);
|
||||
TALLOC_CTX *mem_ctx,
|
||||
void (*callback)(struct ctdb_context *, int, void *),
|
||||
void *private_data,
|
||||
const char *fmt, ...) PRINTF_ATTRIBUTE(5,6);
|
||||
void ctdb_release_all_ips(struct ctdb_context *ctdb);
|
||||
|
||||
void set_nonblocking(int fd);
|
||||
|
@ -91,52 +91,48 @@ static void ctdb_control_send_arp(struct event_context *ev, struct timed_event *
|
||||
ctdb_control_send_arp, arp);
|
||||
}
|
||||
|
||||
struct takeover_callback_state {
|
||||
struct ctdb_req_control *c;
|
||||
struct sockaddr_in *sin;
|
||||
};
|
||||
|
||||
/*
|
||||
take over an ip address
|
||||
called when takeip event finishes
|
||||
*/
|
||||
int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb, TDB_DATA indata)
|
||||
static void takeover_ip_callback(struct ctdb_context *ctdb, int status,
|
||||
void *private_data)
|
||||
{
|
||||
int ret;
|
||||
struct sockaddr_in *sin = (struct sockaddr_in *)indata.dptr;
|
||||
struct takeover_callback_state *state =
|
||||
talloc_get_type(private_data, struct takeover_callback_state);
|
||||
struct ctdb_takeover_arp *arp;
|
||||
char *ip = inet_ntoa(sin->sin_addr);
|
||||
char *ip = inet_ntoa(state->sin->sin_addr);
|
||||
struct ctdb_tcp_list *tcp;
|
||||
|
||||
if (ctdb_sys_have_ip(ip)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
DEBUG(0,("Takover of IP %s/%u on interface %s\n",
|
||||
ip, ctdb->nodes[ctdb->vnn]->public_netmask_bits,
|
||||
ctdb->takeover.interface));
|
||||
ret = ctdb_event_script(ctdb, "takeip %s %s %u",
|
||||
ctdb->takeover.interface,
|
||||
ip,
|
||||
ctdb->nodes[ctdb->vnn]->public_netmask_bits);
|
||||
if (ret != 0) {
|
||||
if (status != 0) {
|
||||
DEBUG(0,(__location__ " Failed to takeover IP %s on interface %s\n",
|
||||
ip, ctdb->takeover.interface));
|
||||
return -1;
|
||||
ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
|
||||
talloc_free(state);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!ctdb->takeover.last_ctx) {
|
||||
ctdb->takeover.last_ctx = talloc_new(ctdb);
|
||||
CTDB_NO_MEMORY(ctdb, ctdb->takeover.last_ctx);
|
||||
if (!ctdb->takeover.last_ctx) goto failed;
|
||||
}
|
||||
|
||||
arp = talloc_zero(ctdb->takeover.last_ctx, struct ctdb_takeover_arp);
|
||||
CTDB_NO_MEMORY(ctdb, arp);
|
||||
if (!arp) goto failed;
|
||||
|
||||
arp->ctdb = ctdb;
|
||||
arp->sin = *sin;
|
||||
arp->sin = *state->sin;
|
||||
|
||||
/* add all of the known tcp connections for this IP to the
|
||||
list of tcp connections to send tickle acks for */
|
||||
for (tcp=ctdb->tcp_list;tcp;tcp=tcp->next) {
|
||||
if (sin->sin_addr.s_addr == tcp->daddr.sin_addr.s_addr) {
|
||||
if (state->sin->sin_addr.s_addr == tcp->daddr.sin_addr.s_addr) {
|
||||
struct ctdb_tcp_list *t2 = talloc(arp, struct ctdb_tcp_list);
|
||||
CTDB_NO_MEMORY(ctdb, t2);
|
||||
if (t2 == NULL) goto failed;
|
||||
*t2 = *tcp;
|
||||
DLIST_ADD(arp->tcp_list, t2);
|
||||
}
|
||||
@ -145,42 +141,78 @@ int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb, TDB_DATA indata)
|
||||
event_add_timed(arp->ctdb->ev, arp->ctdb->takeover.last_ctx,
|
||||
timeval_zero(), ctdb_control_send_arp, arp);
|
||||
|
||||
return ret;
|
||||
/* the control succeeded */
|
||||
ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
|
||||
talloc_free(state);
|
||||
return;
|
||||
|
||||
failed:
|
||||
ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
|
||||
talloc_free(state);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
release an ip address
|
||||
take over an ip address
|
||||
*/
|
||||
int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, TDB_DATA indata)
|
||||
int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
|
||||
struct ctdb_req_control *c,
|
||||
TDB_DATA indata,
|
||||
bool *async_reply)
|
||||
{
|
||||
struct sockaddr_in *sin = (struct sockaddr_in *)indata.dptr;
|
||||
TDB_DATA data;
|
||||
char *ip = inet_ntoa(sin->sin_addr);
|
||||
int ret;
|
||||
struct ctdb_tcp_list *tcp;
|
||||
struct takeover_callback_state *state;
|
||||
struct sockaddr_in *sin = (struct sockaddr_in *)indata.dptr;
|
||||
char *ip = inet_ntoa(sin->sin_addr);
|
||||
|
||||
if (!ctdb_sys_have_ip(ip)) {
|
||||
/* if our kernel already has this IP, do nothing */
|
||||
if (ctdb_sys_have_ip(ip)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
DEBUG(0,("Release of IP %s/%u on interface %s\n",
|
||||
state = talloc(ctdb, struct takeover_callback_state);
|
||||
CTDB_NO_MEMORY(ctdb, state);
|
||||
|
||||
state->c = talloc_steal(ctdb, c);
|
||||
state->sin = talloc(ctdb, struct sockaddr_in);
|
||||
CTDB_NO_MEMORY(ctdb, state->sin);
|
||||
*state->sin = *(struct sockaddr_in *)indata.dptr;
|
||||
|
||||
DEBUG(0,("Takover of IP %s/%u on interface %s\n",
|
||||
ip, ctdb->nodes[ctdb->vnn]->public_netmask_bits,
|
||||
ctdb->takeover.interface));
|
||||
|
||||
/* stop any previous arps */
|
||||
talloc_free(ctdb->takeover.last_ctx);
|
||||
ctdb->takeover.last_ctx = NULL;
|
||||
|
||||
ret = ctdb_event_script(ctdb, "releaseip %s %s %u",
|
||||
ret = ctdb_event_script_callback(ctdb, state, takeover_ip_callback, state,
|
||||
"takeip %s %s %u",
|
||||
ctdb->takeover.interface,
|
||||
ip,
|
||||
ctdb->nodes[ctdb->vnn]->public_netmask_bits);
|
||||
if (ret != 0) {
|
||||
DEBUG(0,(__location__ " Failed to release IP %s on interface %s\n",
|
||||
DEBUG(0,(__location__ " Failed to takeover IP %s on interface %s\n",
|
||||
ip, ctdb->takeover.interface));
|
||||
talloc_free(state);
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* tell ctdb_control.c that we will be replying asynchronously */
|
||||
*async_reply = true;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
called when releaseip event finishes
|
||||
*/
|
||||
static void release_ip_callback(struct ctdb_context *ctdb, int status,
|
||||
void *private_data)
|
||||
{
|
||||
struct takeover_callback_state *state =
|
||||
talloc_get_type(private_data, struct takeover_callback_state);
|
||||
char *ip = inet_ntoa(state->sin->sin_addr);
|
||||
TDB_DATA data;
|
||||
struct ctdb_tcp_list *tcp;
|
||||
|
||||
/* send a message to all clients of this node telling them
|
||||
that the cluster has been reconfigured and they should
|
||||
release any sockets on this IP */
|
||||
@ -192,7 +224,7 @@ int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, TDB_DATA indata)
|
||||
/* tell other nodes about any tcp connections we were holding with this IP */
|
||||
for (tcp=ctdb->tcp_list;tcp;tcp=tcp->next) {
|
||||
if (tcp->vnn == ctdb->vnn &&
|
||||
sin->sin_addr.s_addr == tcp->daddr.sin_addr.s_addr) {
|
||||
state->sin->sin_addr.s_addr == tcp->daddr.sin_addr.s_addr) {
|
||||
struct ctdb_control_tcp_vnn t;
|
||||
|
||||
t.vnn = ctdb->vnn;
|
||||
@ -208,6 +240,59 @@ int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, TDB_DATA indata)
|
||||
}
|
||||
}
|
||||
|
||||
/* the control succeeded */
|
||||
ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
|
||||
talloc_free(state);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
release an ip address
|
||||
*/
|
||||
int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
|
||||
struct ctdb_req_control *c,
|
||||
TDB_DATA indata,
|
||||
bool *async_reply)
|
||||
{
|
||||
int ret;
|
||||
struct takeover_callback_state *state;
|
||||
struct sockaddr_in *sin = (struct sockaddr_in *)indata.dptr;
|
||||
char *ip = inet_ntoa(sin->sin_addr);
|
||||
|
||||
if (!ctdb_sys_have_ip(ip)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
DEBUG(0,("Release of IP %s/%u on interface %s\n",
|
||||
ip, ctdb->nodes[ctdb->vnn]->public_netmask_bits,
|
||||
ctdb->takeover.interface));
|
||||
|
||||
/* stop any previous arps */
|
||||
talloc_free(ctdb->takeover.last_ctx);
|
||||
ctdb->takeover.last_ctx = NULL;
|
||||
|
||||
state = talloc(ctdb, struct takeover_callback_state);
|
||||
CTDB_NO_MEMORY(ctdb, state);
|
||||
|
||||
state->c = talloc_steal(state, c);
|
||||
state->sin = talloc(state, struct sockaddr_in);
|
||||
CTDB_NO_MEMORY(ctdb, state->sin);
|
||||
*state->sin = *(struct sockaddr_in *)indata.dptr;
|
||||
|
||||
ret = ctdb_event_script_callback(ctdb, state, release_ip_callback, state,
|
||||
"releaseip %s %s %u",
|
||||
ctdb->takeover.interface,
|
||||
ip,
|
||||
ctdb->nodes[ctdb->vnn]->public_netmask_bits);
|
||||
if (ret != 0) {
|
||||
DEBUG(0,(__location__ " Failed to release IP %s on interface %s\n",
|
||||
ip, ctdb->takeover.interface));
|
||||
talloc_free(state);
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* tell the control that we will be reply asynchronously */
|
||||
*async_reply = true;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -312,8 +312,9 @@ int ctdb_event_script(struct ctdb_context *ctdb, const char *fmt, ...)
|
||||
struct ctdb_event_script_state {
|
||||
struct ctdb_context *ctdb;
|
||||
pid_t child;
|
||||
void (*callback)(struct ctdb_context *, int);
|
||||
void (*callback)(struct ctdb_context *, int, void *);
|
||||
int fd[2];
|
||||
void *private_data;
|
||||
};
|
||||
|
||||
/* called when child is finished */
|
||||
@ -327,28 +328,41 @@ static void ctdb_event_script_handler(struct event_context *ev, struct fd_event
|
||||
if (status != -1) {
|
||||
status = WEXITSTATUS(status);
|
||||
}
|
||||
state->callback(state->ctdb, status);
|
||||
state->callback(state->ctdb, status, state->private_data);
|
||||
talloc_set_destructor(state, NULL);
|
||||
talloc_free(state);
|
||||
}
|
||||
|
||||
/*
|
||||
destroy a running event script
|
||||
*/
|
||||
static int event_script_destructor(struct ctdb_event_script_state *state)
|
||||
{
|
||||
kill(state->child, SIGKILL);
|
||||
waitpid(state->child, NULL, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
run the event script in the background, calling the callback when
|
||||
finished
|
||||
*/
|
||||
int ctdb_event_script_callback(struct ctdb_context *ctdb,
|
||||
void (*callback)(struct ctdb_context *, int),
|
||||
TALLOC_CTX *mem_ctx,
|
||||
void (*callback)(struct ctdb_context *, int, void *),
|
||||
void *private_data,
|
||||
const char *fmt, ...)
|
||||
{
|
||||
struct ctdb_event_script_state *state;
|
||||
va_list ap;
|
||||
int ret;
|
||||
|
||||
state = talloc(ctdb, struct ctdb_event_script_state);
|
||||
state = talloc(mem_ctx, struct ctdb_event_script_state);
|
||||
CTDB_NO_MEMORY(ctdb, state);
|
||||
|
||||
state->ctdb = ctdb;
|
||||
state->callback = callback;
|
||||
state->private_data = private_data;
|
||||
|
||||
ret = pipe(state->fd);
|
||||
if (ret != 0) {
|
||||
@ -373,6 +387,8 @@ int ctdb_event_script_callback(struct ctdb_context *ctdb,
|
||||
_exit(ret);
|
||||
}
|
||||
|
||||
talloc_set_destructor(state, event_script_destructor);
|
||||
|
||||
close(state->fd[1]);
|
||||
|
||||
event_add_fd(ctdb->ev, state, state->fd[0], EVENT_FD_READ|EVENT_FD_AUTOCLOSE,
|
||||
|
Loading…
x
Reference in New Issue
Block a user