mirror of
https://github.com/samba-team/samba.git
synced 2024-12-22 13:34:15 +03:00
redesign how reloadnodes is implemented.
modify the transport methods to allow to restart individual connections and set up destructors properly. only tear down/set-up tcp connections to nodes removed from the cluster or nodes added to the cluster. Leave tcp connections to unchanged nodes connected. make "ctdb reloadnodes" explicitely cause a recovery of the cluster once the files have been realoaded (This used to be ctdb commit d1057ed6de7de9f2a64d8fa012c52647e89b515b)
This commit is contained in:
parent
7592a97d16
commit
edb7241c05
@ -236,8 +236,9 @@ struct ctdb_node {
|
||||
*/
|
||||
struct ctdb_methods {
|
||||
int (*initialise)(struct ctdb_context *); /* initialise transport structures */
|
||||
int (*start)(struct ctdb_context *); /* start protocol processing */
|
||||
int (*start)(struct ctdb_context *); /* start the transport */
|
||||
int (*add_node)(struct ctdb_node *); /* setup a new node */
|
||||
int (*connect_node)(struct ctdb_node *); /* connect to node */
|
||||
int (*queue_pkt)(struct ctdb_node *, uint8_t *data, uint32_t length);
|
||||
void *(*allocate_pkt)(TALLOC_CTX *mem_ctx, size_t );
|
||||
void (*shutdown)(struct ctdb_context *); /* shutdown transport */
|
||||
|
@ -213,20 +213,43 @@ static void
|
||||
ctdb_reload_nodes_event(struct event_context *ev, struct timed_event *te,
|
||||
struct timeval t, void *private_data)
|
||||
{
|
||||
int i;
|
||||
|
||||
int i, num_nodes;
|
||||
struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
|
||||
TALLOC_CTX *tmp_ctx;
|
||||
struct ctdb_node **nodes;
|
||||
|
||||
tmp_ctx = talloc_new(ctdb);
|
||||
|
||||
/* steal the old nodes file for a while */
|
||||
talloc_steal(tmp_ctx, ctdb->nodes);
|
||||
nodes = ctdb->nodes;
|
||||
ctdb->nodes = NULL;
|
||||
num_nodes = ctdb->num_nodes;
|
||||
ctdb->num_nodes = 0;
|
||||
|
||||
/* load the new nodes file */
|
||||
ctdb_load_nodes_file(ctdb);
|
||||
|
||||
for (i=0; i<ctdb->num_nodes; i++) {
|
||||
/* keep any identical pre-existing nodes and connections */
|
||||
if ((i < num_nodes) && ctdb_same_address(&ctdb->nodes[i]->address, &nodes[i]->address)) {
|
||||
talloc_free(ctdb->nodes[i]);
|
||||
ctdb->nodes[i] = talloc_steal(ctdb->nodes, nodes[i]);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* any new or different nodes must be added */
|
||||
if (ctdb->methods->add_node(ctdb->nodes[i]) != 0) {
|
||||
DEBUG(DEBUG_CRIT, (__location__ " methods->add_node failed at %d\n", i));
|
||||
ctdb_fatal(ctdb, "failed to add node. shutting down\n");
|
||||
}
|
||||
if (ctdb->methods->connect_node(ctdb->nodes[i]) != 0) {
|
||||
DEBUG(DEBUG_CRIT, (__location__ " methods->add_connect failed at %d\n", i));
|
||||
ctdb_fatal(ctdb, "failed to connect to node. shutting down\n");
|
||||
}
|
||||
}
|
||||
ctdb->methods->start(ctdb);
|
||||
|
||||
talloc_free(tmp_ctx);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -47,6 +47,7 @@ void ctdb_tcp_stop_connection(struct ctdb_node *node)
|
||||
|
||||
/*
|
||||
called when a complete packet has come in - should not happen on this socket
|
||||
unless the other side closes the connection with RST or FIN
|
||||
*/
|
||||
void ctdb_tcp_tnode_cb(uint8_t *data, size_t cnt, void *private_data)
|
||||
{
|
||||
@ -59,7 +60,8 @@ void ctdb_tcp_tnode_cb(uint8_t *data, size_t cnt, void *private_data)
|
||||
}
|
||||
|
||||
ctdb_tcp_stop_connection(node);
|
||||
tnode->connect_te = event_add_timed(node->ctdb->ev, tnode, timeval_zero(),
|
||||
tnode->connect_te = event_add_timed(node->ctdb->ev, tnode,
|
||||
timeval_current_ofs(3, 0),
|
||||
ctdb_tcp_node_connect, node);
|
||||
}
|
||||
|
||||
@ -149,6 +151,7 @@ void ctdb_tcp_node_connect(struct event_context *ev, struct timed_event *te,
|
||||
return;
|
||||
}
|
||||
|
||||
DEBUG(DEBUG_ERR,("create socket...\n"));
|
||||
tnode->fd = socket(sock_out.sa.sa_family, SOCK_STREAM, IPPROTO_TCP);
|
||||
set_nonblocking(tnode->fd);
|
||||
set_close_on_exec(tnode->fd);
|
||||
|
@ -25,6 +25,17 @@
|
||||
#include "../include/ctdb_private.h"
|
||||
#include "ctdb_tcp.h"
|
||||
|
||||
static int tnode_destructor(struct ctdb_tcp_node *tnode)
|
||||
{
|
||||
struct ctdb_node *node = talloc_find_parent_bytype(tnode, struct ctdb_node);
|
||||
|
||||
if (tnode->fd != -1) {
|
||||
close(tnode->fd);
|
||||
tnode->fd = -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
initialise tcp portion of a ctdb node
|
||||
@ -37,6 +48,7 @@ static int ctdb_tcp_add_node(struct ctdb_node *node)
|
||||
|
||||
tnode->fd = -1;
|
||||
node->private_data = tnode;
|
||||
talloc_set_destructor(tnode, tnode_destructor);
|
||||
|
||||
tnode->out_queue = ctdb_queue_setup(node->ctdb, node, tnode->fd, CTDB_TCP_ALIGNMENT,
|
||||
ctdb_tcp_tnode_cb, node);
|
||||
@ -70,21 +82,18 @@ static int ctdb_tcp_initialise(struct ctdb_context *ctdb)
|
||||
/*
|
||||
start the protocol going
|
||||
*/
|
||||
static int ctdb_tcp_start(struct ctdb_context *ctdb)
|
||||
static int ctdb_tcp_connect_node(struct ctdb_node *node)
|
||||
{
|
||||
int i;
|
||||
struct ctdb_context *ctdb = node->ctdb;
|
||||
struct ctdb_tcp_node *tnode = talloc_get_type(
|
||||
node->private_data, struct ctdb_tcp_node);
|
||||
|
||||
/* startup connections to the other servers - will happen on
|
||||
/* startup connection to the other server - will happen on
|
||||
next event loop */
|
||||
for (i=0;i<ctdb->num_nodes;i++) {
|
||||
struct ctdb_node *node = *(ctdb->nodes + i);
|
||||
struct ctdb_tcp_node *tnode = talloc_get_type(
|
||||
node->private_data, struct ctdb_tcp_node);
|
||||
if (!ctdb_same_address(&ctdb->address, &node->address)) {
|
||||
tnode->connect_te = event_add_timed(ctdb->ev, tnode,
|
||||
timeval_zero(),
|
||||
ctdb_tcp_node_connect, node);
|
||||
}
|
||||
if (!ctdb_same_address(&ctdb->address, &node->address)) {
|
||||
tnode->connect_te = event_add_timed(ctdb->ev, tnode,
|
||||
timeval_zero(),
|
||||
ctdb_tcp_node_connect, node);
|
||||
}
|
||||
|
||||
return 0;
|
||||
@ -119,6 +128,20 @@ static void ctdb_tcp_shutdown(struct ctdb_context *ctdb)
|
||||
ctdb->private_data = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
start the transport
|
||||
*/
|
||||
static int ctdb_tcp_start(struct ctdb_context *ctdb)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i=0; i<ctdb->num_nodes; i++) {
|
||||
ctdb_tcp_connect_node(ctdb->nodes[i]);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
transport packet allocator - allows transport to control memory for packets
|
||||
@ -138,6 +161,7 @@ static const struct ctdb_methods ctdb_tcp_methods = {
|
||||
.start = ctdb_tcp_start,
|
||||
.queue_pkt = ctdb_tcp_queue_pkt,
|
||||
.add_node = ctdb_tcp_add_node,
|
||||
.connect_node = ctdb_tcp_connect_node,
|
||||
.allocate_pkt = ctdb_tcp_allocate_pkt,
|
||||
.shutdown = ctdb_tcp_shutdown,
|
||||
.restart = ctdb_tcp_restart,
|
||||
|
@ -2406,6 +2406,9 @@ static int control_reload_nodes_file(struct ctdb_context *ctdb, int argc, const
|
||||
DEBUG(DEBUG_ERR, ("ERROR: Failed to reload nodes file on node %u. You MUST fix that node manually!\n", mypnn));
|
||||
}
|
||||
|
||||
/* initiate a recovery */
|
||||
control_recover(ctdb, argc, argv);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user