1
0
mirror of https://github.com/samba-team/samba.git synced 2024-12-22 13:34:15 +03:00

ctdb-recoverd: Call an election when the recovery lock is lost

The lock may have been lost due to a failure in the underlying locking
mechanism.  This could be due to quorum loss or similar.  It is best
to call an election to confirm that this node should still be master.
At worst, the node will reelect itself, fail to take the lock and then
ban itself.  This is a suitable outcome for a node that has been
partitioned from others in the cluster.

Signed-off-by: Martin Schwenke <martin@meltin.net>
Reviewed-by: Amitay Isaacs <amitay@gmail.com>
This commit is contained in:
Martin Schwenke 2018-11-08 15:49:30 +11:00 committed by Amitay Isaacs
parent 9d1d5fa4ac
commit da8aaf2aee

View File

@ -915,20 +915,19 @@ static void take_reclock_handler(char status,
s->locked = (status == '0') ;
}
static bool ctdb_recovery_lock(struct ctdb_recoverd *rec);
static void force_election(struct ctdb_recoverd *rec,
uint32_t pnn,
struct ctdb_node_map_old *nodemap);
static void lost_reclock_handler(void *private_data)
{
struct ctdb_recoverd *rec = talloc_get_type_abort(
private_data, struct ctdb_recoverd);
DEBUG(DEBUG_ERR,
("Recovery lock helper terminated unexpectedly - "
"trying to retake recovery lock\n"));
D_ERR("Recovery lock helper terminated, triggering an election\n");
TALLOC_FREE(rec->recovery_lock_handle);
if (! ctdb_recovery_lock(rec)) {
DEBUG(DEBUG_ERR, ("Failed to take recovery lock\n"));
}
force_election(rec, ctdb_get_pnn(rec->ctdb), rec->nodemap);
}
static bool ctdb_recovery_lock(struct ctdb_recoverd *rec)