mirror of
https://github.com/samba-team/samba.git
synced 2025-01-11 05:18:09 +03:00
Merge commit 'ronnie/1.0.53'
(This used to be ctdb commit 58e6dc722ad1e2415b71baf1d471885169dde14d)
This commit is contained in:
commit
66d154ef5f
@ -1231,29 +1231,6 @@ int ctdb_ctrl_getdbmap(struct ctdb_context *ctdb, struct timeval timeout, uint32
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
get the reclock filename
|
||||
*/
|
||||
int ctdb_ctrl_getreclock(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode,
|
||||
TALLOC_CTX *mem_ctx, const char **reclock)
|
||||
{
|
||||
int ret;
|
||||
TDB_DATA outdata;
|
||||
int32_t res;
|
||||
|
||||
ret = ctdb_control(ctdb, destnode, 0,
|
||||
CTDB_CONTROL_GET_RECLOCK_FILE, 0, tdb_null,
|
||||
mem_ctx, &outdata, &res, &timeout, NULL);
|
||||
if (ret != 0 || res != 0) {
|
||||
DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getreclock failed\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
*reclock = (const char *)talloc_steal(mem_ctx, outdata.dptr);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
get a list of nodes (vnn and flags ) from a remote node
|
||||
*/
|
||||
|
@ -536,7 +536,7 @@ enum ctdb_controls {CTDB_CONTROL_PROCESS_EXISTS = 0,
|
||||
CTDB_CONTROL_START_RECOVERY = 70,
|
||||
CTDB_CONTROL_END_RECOVERY = 71,
|
||||
CTDB_CONTROL_RELOAD_NODES_FILE = 72,
|
||||
CTDB_CONTROL_GET_RECLOCK_FILE = 73,
|
||||
/* #73 removed */
|
||||
CTDB_CONTROL_TRY_DELETE_RECORDS = 74,
|
||||
CTDB_CONTROL_ENABLE_MONITOR = 75,
|
||||
CTDB_CONTROL_DISABLE_MONITOR = 76,
|
||||
@ -1240,7 +1240,6 @@ int32_t ctdb_control_get_tunable(struct ctdb_context *ctdb, TDB_DATA indata,
|
||||
TDB_DATA *outdata);
|
||||
int32_t ctdb_control_set_tunable(struct ctdb_context *ctdb, TDB_DATA indata);
|
||||
int32_t ctdb_control_list_tunables(struct ctdb_context *ctdb, TDB_DATA *outdata);
|
||||
int32_t ctdb_control_get_reclock_file(struct ctdb_context *ctdb, TDB_DATA *outdata);
|
||||
int32_t ctdb_control_try_delete_records(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata);
|
||||
int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata);
|
||||
int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata);
|
||||
|
@ -5,7 +5,7 @@ Vendor: Samba Team
|
||||
Packager: Samba Team <samba@samba.org>
|
||||
Name: ctdb
|
||||
Version: 1.0
|
||||
Release: 51
|
||||
Release: 53
|
||||
Epoch: 0
|
||||
License: GNU GPL version 3
|
||||
Group: System Environment/Daemons
|
||||
@ -118,6 +118,15 @@ fi
|
||||
%{_includedir}/ctdb_private.h
|
||||
|
||||
%changelog
|
||||
* Thu Aug 7 2008 : Version 1.0.53
|
||||
- Remove the reclock.pnn file it can cause gpfs to fail to umount
|
||||
- New transaction code
|
||||
* Mon Aug 4 2008 : Version 1.0.52
|
||||
- Send an explicit gratious arp when starting sending the tcp tickles.
|
||||
- When doing failover, issue a killtcp to non-NFS/non-CIFS clients
|
||||
so that they fail quickly. NFS and CIFS already fail and recover
|
||||
quickly.
|
||||
- Update the test scripts to handle CTRL-C to kill off the test.
|
||||
* Mon Jul 28 2008 : Version 1.0.51
|
||||
- Strip off the vlan tag from bond devices before we check in /proc
|
||||
if the interface is up or not.
|
||||
|
@ -378,10 +378,6 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb,
|
||||
case CTDB_CONTROL_END_RECOVERY:
|
||||
return ctdb_control_end_recovery(ctdb, c, async_reply);
|
||||
|
||||
case CTDB_CONTROL_GET_RECLOCK_FILE:
|
||||
CHECK_CONTROL_DATA_SIZE(0);
|
||||
return ctdb_control_get_reclock_file(ctdb, outdata);
|
||||
|
||||
case CTDB_CONTROL_TRY_DELETE_RECORDS:
|
||||
return ctdb_control_try_delete_records(ctdb, indata, outdata);
|
||||
|
||||
|
@ -865,22 +865,6 @@ int32_t ctdb_control_start_recovery(struct ctdb_context *ctdb,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
report the location for the reclock file
|
||||
*/
|
||||
int32_t ctdb_control_get_reclock_file(struct ctdb_context *ctdb, TDB_DATA *outdata)
|
||||
{
|
||||
char *reclock = NULL;
|
||||
|
||||
reclock = talloc_strdup(outdata, ctdb->recovery_lock_file);
|
||||
CTDB_NO_MEMORY(ctdb, reclock);
|
||||
|
||||
outdata->dsize = strlen(reclock)+1;
|
||||
outdata->dptr = (uint8_t *)reclock;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
try to delete all these records as part of the vacuuming process
|
||||
and return the records we failed to delete
|
||||
|
@ -41,7 +41,6 @@ struct ban_state {
|
||||
*/
|
||||
struct ctdb_recoverd {
|
||||
struct ctdb_context *ctdb;
|
||||
int rec_file_fd;
|
||||
uint32_t recmaster;
|
||||
uint32_t num_active;
|
||||
uint32_t num_connected;
|
||||
@ -2143,148 +2142,6 @@ static enum monitor_result verify_recmaster(struct ctdb_recoverd *rec, struct ct
|
||||
return status;
|
||||
}
|
||||
|
||||
/*
|
||||
this function writes the number of connected nodes we have for this pnn
|
||||
to the pnn slot in the reclock file
|
||||
*/
|
||||
static void
|
||||
ctdb_recoverd_write_pnn_connect_count(struct ctdb_recoverd *rec)
|
||||
{
|
||||
const char count = rec->num_connected;
|
||||
struct ctdb_context *ctdb = talloc_get_type(rec->ctdb, struct ctdb_context);
|
||||
|
||||
if (rec->rec_file_fd == -1) {
|
||||
DEBUG(DEBUG_CRIT,(__location__ " Unable to write pnn count. pnnfile is not open.\n"));
|
||||
return;
|
||||
}
|
||||
|
||||
if (pwrite(rec->rec_file_fd, &count, 1, ctdb->pnn) == -1) {
|
||||
DEBUG(DEBUG_CRIT, (__location__ " Failed to write pnn count\n"));
|
||||
close(rec->rec_file_fd);
|
||||
rec->rec_file_fd = -1;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
this function opens the reclock file and sets a byterage lock for the single
|
||||
byte at position pnn+1.
|
||||
the existence/non-existence of such a lock provides an alternative mechanism
|
||||
to know whether a remote node(recovery daemon) is running or not.
|
||||
*/
|
||||
static void
|
||||
ctdb_recoverd_get_pnn_lock(struct ctdb_recoverd *rec)
|
||||
{
|
||||
struct ctdb_context *ctdb = talloc_get_type(rec->ctdb, struct ctdb_context);
|
||||
struct flock lock;
|
||||
char *pnnfile = NULL;
|
||||
|
||||
DEBUG(DEBUG_INFO, ("Setting PNN lock for pnn:%d\n", ctdb->pnn));
|
||||
|
||||
if (rec->rec_file_fd != -1) {
|
||||
close(rec->rec_file_fd);
|
||||
rec->rec_file_fd = -1;
|
||||
}
|
||||
|
||||
pnnfile = talloc_asprintf(rec, "%s.pnn", ctdb->recovery_lock_file);
|
||||
CTDB_NO_MEMORY_FATAL(ctdb, pnnfile);
|
||||
|
||||
rec->rec_file_fd = open(pnnfile, O_RDWR|O_CREAT, 0600);
|
||||
if (rec->rec_file_fd == -1) {
|
||||
DEBUG(DEBUG_CRIT,(__location__ " Unable to open %s - (%s)\n",
|
||||
pnnfile, strerror(errno)));
|
||||
talloc_free(pnnfile);
|
||||
return;
|
||||
}
|
||||
|
||||
set_close_on_exec(rec->rec_file_fd);
|
||||
lock.l_type = F_WRLCK;
|
||||
lock.l_whence = SEEK_SET;
|
||||
lock.l_start = ctdb->pnn;
|
||||
lock.l_len = 1;
|
||||
lock.l_pid = 0;
|
||||
|
||||
if (fcntl(rec->rec_file_fd, F_SETLK, &lock) != 0) {
|
||||
close(rec->rec_file_fd);
|
||||
rec->rec_file_fd = -1;
|
||||
DEBUG(DEBUG_CRIT,(__location__ " Failed to get pnn lock on '%s'\n", pnnfile));
|
||||
talloc_free(pnnfile);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
DEBUG(DEBUG_NOTICE,(__location__ " Got pnn lock on '%s'\n", pnnfile));
|
||||
talloc_free(pnnfile);
|
||||
|
||||
/* we start out with 0 connected nodes */
|
||||
ctdb_recoverd_write_pnn_connect_count(rec);
|
||||
}
|
||||
|
||||
/*
|
||||
called when we need to do the periodical reclock pnn count update
|
||||
*/
|
||||
static void ctdb_update_pnn_count(struct event_context *ev, struct timed_event *te,
|
||||
struct timeval t, void *p)
|
||||
{
|
||||
int i, count;
|
||||
struct ctdb_recoverd *rec = talloc_get_type(p, struct ctdb_recoverd);
|
||||
struct ctdb_context *ctdb = rec->ctdb;
|
||||
struct ctdb_node_map *nodemap = rec->nodemap;
|
||||
|
||||
/* close and reopen the pnn lock file */
|
||||
ctdb_recoverd_get_pnn_lock(rec);
|
||||
|
||||
ctdb_recoverd_write_pnn_connect_count(rec);
|
||||
|
||||
event_add_timed(rec->ctdb->ev, rec->ctdb,
|
||||
timeval_current_ofs(ctdb->tunable.reclock_ping_period, 0),
|
||||
ctdb_update_pnn_count, rec);
|
||||
|
||||
/* check if there is a split cluster and yeld the recmaster role
|
||||
it the other half of the cluster is larger
|
||||
*/
|
||||
DEBUG(DEBUG_DEBUG, ("CHECK FOR SPLIT CLUSTER\n"));
|
||||
if (rec->nodemap == NULL) {
|
||||
return;
|
||||
}
|
||||
if (rec->rec_file_fd == -1) {
|
||||
return;
|
||||
}
|
||||
/* only test this if we think we are the recmaster */
|
||||
if (ctdb->pnn != rec->recmaster) {
|
||||
DEBUG(DEBUG_DEBUG, ("We are not recmaster, skip test\n"));
|
||||
return;
|
||||
}
|
||||
if (ctdb->recovery_lock_fd == -1) {
|
||||
DEBUG(DEBUG_ERR, (__location__ " Lost reclock pnn file. Yielding recmaster role\n"));
|
||||
close(ctdb->recovery_lock_fd);
|
||||
ctdb->recovery_lock_fd = -1;
|
||||
force_election(rec, ctdb->pnn, rec->nodemap);
|
||||
return;
|
||||
}
|
||||
for (i=0; i<nodemap->num; i++) {
|
||||
/* we dont need to check ourself */
|
||||
if (nodemap->nodes[i].pnn == ctdb->pnn) {
|
||||
continue;
|
||||
}
|
||||
/* dont check nodes that are connected to us */
|
||||
if (!(nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED)) {
|
||||
continue;
|
||||
}
|
||||
/* check if the node is "connected" and how connected it it */
|
||||
count = ctdb_read_pnn_lock(rec->rec_file_fd, nodemap->nodes[i].pnn);
|
||||
if (count < 0) {
|
||||
continue;
|
||||
}
|
||||
/* check if that node is more connected that us */
|
||||
if (count > rec->num_connected) {
|
||||
DEBUG(DEBUG_ERR, ("DISCONNECTED Node %u is more connected than we are, yielding recmaster role\n", nodemap->nodes[i].pnn));
|
||||
close(ctdb->recovery_lock_fd);
|
||||
ctdb->recovery_lock_fd = -1;
|
||||
force_election(rec, ctdb->pnn, rec->nodemap);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* called to check that the allocation of public ip addresses is ok.
|
||||
*/
|
||||
@ -2419,10 +2276,6 @@ static void monitor_cluster(struct ctdb_context *ctdb)
|
||||
|
||||
rec->priority_time = timeval_current();
|
||||
|
||||
/* open the rec file fd and lock our slot */
|
||||
rec->rec_file_fd = -1;
|
||||
ctdb_recoverd_get_pnn_lock(rec);
|
||||
|
||||
/* register a message port for sending memory dumps */
|
||||
ctdb_set_message_handler(ctdb, CTDB_SRVID_MEM_DUMP, mem_dump_handler, rec);
|
||||
|
||||
@ -2441,11 +2294,6 @@ static void monitor_cluster(struct ctdb_context *ctdb)
|
||||
/* register a message port for vacuum fetch */
|
||||
ctdb_set_message_handler(ctdb, CTDB_SRVID_VACUUM_FETCH, vacuum_fetch_handler, rec);
|
||||
|
||||
/* update the reclock pnn file connected count on a regular basis */
|
||||
event_add_timed(ctdb->ev, ctdb,
|
||||
timeval_current_ofs(ctdb->tunable.reclock_ping_period, 0),
|
||||
ctdb_update_pnn_count, rec);
|
||||
|
||||
again:
|
||||
if (mem_ctx) {
|
||||
talloc_free(mem_ctx);
|
||||
|
@ -1490,60 +1490,6 @@ static int control_getdbmap(struct ctdb_context *ctdb, int argc, const char **ar
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
get the filename of the reclock file
|
||||
*/
|
||||
static int control_getreclock(struct ctdb_context *ctdb, int argc, const char **argv)
|
||||
{
|
||||
int i, ret, fd;
|
||||
const char *reclock;
|
||||
struct ctdb_node_map *nodemap=NULL;
|
||||
char *pnnfile;
|
||||
|
||||
ret = ctdb_ctrl_getreclock(ctdb, TIMELIMIT(), options.pnn, ctdb, &reclock);
|
||||
if (ret != 0) {
|
||||
DEBUG(DEBUG_ERR, ("Unable to get reclock file from node %u\n", options.pnn));
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = ctdb_ctrl_getnodemap(ctdb, TIMELIMIT(), options.pnn, ctdb, &nodemap);
|
||||
if (ret != 0) {
|
||||
DEBUG(DEBUG_ERR, ("Unable to get nodemap from node %u\n", options.pnn));
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
pnnfile = talloc_asprintf(ctdb, "%s.pnn", reclock);
|
||||
CTDB_NO_MEMORY(ctdb, pnnfile);
|
||||
|
||||
fd = open(pnnfile, O_RDONLY);
|
||||
if (fd == -1) {
|
||||
DEBUG(DEBUG_CRIT,(__location__ " Failed to open reclock pnn file %s - (%s)\n",
|
||||
pnnfile, strerror(errno)));
|
||||
exit(10);
|
||||
}
|
||||
|
||||
|
||||
printf("Reclock file : %s\n", reclock);
|
||||
for (i=0; i<nodemap->num; i++) {
|
||||
int count;
|
||||
|
||||
count = ctdb_read_pnn_lock(fd, nodemap->nodes[i].pnn);
|
||||
|
||||
printf("pnn:%d %-16s", nodemap->nodes[i].pnn,
|
||||
inet_ntoa(nodemap->nodes[i].sin.sin_addr));
|
||||
if (count == -1) {
|
||||
printf(" NOT ACTIVE\n");
|
||||
} else {
|
||||
printf(" ACTIVE with %d connections\n", count);
|
||||
}
|
||||
}
|
||||
|
||||
close(fd);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
check if the local node is recmaster or not
|
||||
it will return 1 if this node is the recmaster and 0 if it is not
|
||||
@ -2030,7 +1976,6 @@ static const struct {
|
||||
{ "repack", ctdb_repack, false, "repack all databases", "[max_freelist]"},
|
||||
{ "listnodes", control_listnodes, false, "list all nodes in the cluster"},
|
||||
{ "reloadnodes", control_reload_nodes_file, false, "reload the nodes file and restart the transport on all nodes"},
|
||||
{ "getreclock", control_getreclock, false, "get the path to the reclock file" },
|
||||
{ "moveip", control_moveip, false, "move/failover an ip address to another node", "<ip> <node>"},
|
||||
{ "addip", control_addip, true, "add a ip address to a node", "<ip/mask> <iface>"},
|
||||
{ "delip", control_delip, false, "delete an ip address from a node", "<ip>"},
|
||||
|
Loading…
Reference in New Issue
Block a user