1
0
mirror of https://github.com/samba-team/samba.git synced 2025-02-02 09:47:23 +03:00

add a new file <reclock>.pnn where each recovery daemon can lock that byte at offset==pnn to offer an alternative way to detect which nodes are active instead of relying on CONNECTED being accurate.

(This used to be ctdb commit 21d3319eaf463e2a00637d440ee2d4d15f53bf09)
This commit is contained in:
Ronnie Sahlberg 2008-02-29 12:37:42 +11:00
parent 4adeafef11
commit e0036942bc
4 changed files with 145 additions and 2 deletions

View File

@ -2720,3 +2720,38 @@ uint32_t *list_of_active_nodes(struct ctdb_context *ctdb,
return nodes;
}
/*
this is used to test if a pnn lock exists and if it exists will return
the number of connections that pnn has reported or -1 if that recovery
daemon is not running.
*/
int
ctdb_read_pnn_lock(int fd, int32_t pnn)
{
struct flock lock;
char c;
lock.l_type = F_WRLCK;
lock.l_whence = SEEK_SET;
lock.l_start = pnn;
lock.l_len = 1;
lock.l_pid = 0;
if (fcntl(fd, F_GETLK, &lock) != 0) {
DEBUG(DEBUG_ERR, (__location__ " F_GETLK failed with %s\n", strerror(errno)));
return -1;
}
if (lock.l_type == F_UNLCK) {
return -1;
}
if (pread(fd, &c, 1, pnn) == -1) {
DEBUG(DEBUG_CRIT,(__location__ " failed read pnn count - %s\n", strerror(errno)));
return -1;
}
return c;
}

View File

@ -517,4 +517,6 @@ uint32_t *list_of_vnnmap_nodes(struct ctdb_context *ctdb,
TALLOC_CTX *mem_ctx,
bool include_self);
int ctdb_read_pnn_lock(int fd, int32_t pnn);
#endif

View File

@ -41,6 +41,7 @@ struct ban_state {
*/
struct ctdb_recoverd {
struct ctdb_context *ctdb;
int rec_file_fd;
uint32_t last_culprit;
uint32_t culprit_counter;
struct timeval first_recover_time;
@ -1957,6 +1958,72 @@ static enum monitor_result verify_recmaster(struct ctdb_context *ctdb, struct ct
return status;
}
/*
this function writes the number of connected nodes we have for this pnn
to the pnn slot in the reclock file
*/
static void
ctdb_recoverd_write_pnn_connect_count(struct ctdb_recoverd *rec, const char count)
{
struct ctdb_context *ctdb = talloc_get_type(rec->ctdb, struct ctdb_context);
if (pwrite(rec->rec_file_fd, &count, 1, ctdb->pnn) == -1) {
DEBUG(DEBUG_CRIT, (__location__ " Failed to write pnn count\n"));
}
}
/*
this function opens the reclock file and sets a byterage lock for the single
byte at position pnn+1.
the existence/non-existence of such a lock provides an alternative mechanism
to know whether a remote node(recovery daemon) is running or not.
*/
static void
ctdb_recoverd_get_pnn_lock(struct ctdb_recoverd *rec)
{
struct ctdb_context *ctdb = talloc_get_type(rec->ctdb, struct ctdb_context);
struct flock lock;
char *pnnfile = NULL;
DEBUG(DEBUG_INFO, ("Setting PNN lock for pnn:%d\n", ctdb->pnn));
if (rec->rec_file_fd != -1) {
DEBUG(DEBUG_CRIT, (__location__ " rec_lock_fd is already open. Aborting\n"));
exit(10);
}
pnnfile = talloc_asprintf(rec, "%s.pnn", ctdb->recovery_lock_file);
CTDB_NO_MEMORY_FATAL(ctdb, pnnfile);
rec->rec_file_fd = open(pnnfile, O_RDWR|O_CREAT, 0600);
if (rec->rec_file_fd == -1) {
DEBUG(DEBUG_CRIT,(__location__ " Unable to open %s - (%s)\n",
pnnfile, strerror(errno)));
exit(10);
}
set_close_on_exec(rec->rec_file_fd);
lock.l_type = F_WRLCK;
lock.l_whence = SEEK_SET;
lock.l_start = ctdb->pnn;
lock.l_len = 1;
lock.l_pid = 0;
if (fcntl(rec->rec_file_fd, F_SETLK, &lock) != 0) {
close(rec->rec_file_fd);
rec->rec_file_fd = -1;
DEBUG(DEBUG_CRIT,(__location__ " Failed to get pnn lock on '%s'\n", pnnfile));
exit(10);
}
DEBUG(DEBUG_NOTICE,(__location__ " Got pnn lock on '%s'\n", pnnfile));
talloc_free(pnnfile);
/* we start out with 0 connected nodes */
ctdb_recoverd_write_pnn_connect_count(rec, 0);
}
/*
the main monitoring loop
@ -1986,6 +2053,10 @@ static void monitor_cluster(struct ctdb_context *ctdb)
rec->priority_time = timeval_current();
/* open the rec file fd and lock our slot */
rec->rec_file_fd = -1;
ctdb_recoverd_get_pnn_lock(rec);
/* register a message port for recovery elections */
ctdb_set_message_handler(ctdb, CTDB_SRVID_RECOVERY, election_handler, rec);

View File

@ -858,8 +858,10 @@ static int control_getdbmap(struct ctdb_context *ctdb, int argc, const char **ar
*/
static int control_getreclock(struct ctdb_context *ctdb, int argc, const char **argv)
{
int ret;
int i, ret, fd;
const char *reclock;
struct ctdb_node_map *nodemap=NULL;
char *pnnfile;
ret = ctdb_ctrl_getreclock(ctdb, TIMELIMIT(), options.pnn, ctdb, &reclock);
if (ret != 0) {
@ -867,7 +869,40 @@ static int control_getreclock(struct ctdb_context *ctdb, int argc, const char **
return ret;
}
DEBUG(DEBUG_ERR, ("Reclock file : %s\n", reclock));
ret = ctdb_ctrl_getnodemap(ctdb, TIMELIMIT(), options.pnn, ctdb, &nodemap);
if (ret != 0) {
DEBUG(DEBUG_ERR, ("Unable to get nodemap from node %u\n", options.pnn));
return ret;
}
pnnfile = talloc_asprintf(ctdb, "%s.pnn", reclock);
CTDB_NO_MEMORY(ctdb, pnnfile);
fd = open(pnnfile, O_RDONLY);
if (fd == -1) {
DEBUG(DEBUG_CRIT,(__location__ " Failed to open reclock pnn file %s - (%s)\n",
pnnfile, strerror(errno)));
exit(10);
}
printf("Reclock file : %s\n", reclock);
for (i=0; i<nodemap->num; i++) {
int count;
count = ctdb_read_pnn_lock(fd, nodemap->nodes[i].pnn);
printf("pnn:%d %-16s", nodemap->nodes[i].pnn,
inet_ntoa(nodemap->nodes[i].sin.sin_addr));
if (count == -1) {
printf(" NOT ACTIVE\n");
} else {
printf(" ACTIVE with %d connections\n", count);
}
}
close(fd);
return 0;
}