1
0
mirror of https://github.com/samba-team/samba.git synced 2025-08-24 21:49:29 +03:00

merge from tridge

(This used to be ctdb commit 586347cfcb8dd72ab6cef4e5d2fceb584f55cb3a)
This commit is contained in:
Ronnie Sahlberg
2008-01-07 16:16:43 +11:00
10 changed files with 212 additions and 111 deletions

View File

@ -21,6 +21,15 @@
#
# CTDB_PUBLIC_ADDRESSES=/etc/ctdb/public_addresses
# Should CTDB present the cluster using a single public ip address to clients
# and multiplex clients across all CONNECTED nodes ?
# This is based on LVS
# When this is enabled, the entire cluster will present one single ip address
# which clients will connect to.
# CTDB_LVS_PUBLIC_IP=10.1.1.1
# IPMUX : OBSOLETE use LVS instead
# Should ctdb implement a single public ip address across the entire cluster
# and multiplex incoming connections across the connected nodes
# When using a single public ip you must also specify the public interface!

View File

@ -46,14 +46,19 @@ case $cmd in
# make sure samba is not already started
service smb stop > /dev/null 2>&1
killall -0 -q smbd && {
sleep 1
# make absolutely sure samba is dead
killall -q -9 smbd
}
# restart the winbind service
[ "$CTDB_MANAGES_WINBIND" = "yes" ] && {
service winbind stop > /dev/null 2>&1
killall -0 -q smbd winbindd && {
killall -0 -q winbindd && {
sleep 1
# make absolutely sure samba is dead
killall -q -9 smbd winbindd
# make absolutely sure winbindd is dead
killall -q -9 winbindd
}
service winbind start
}

View File

@ -55,6 +55,9 @@ case $cmd in
;;
recovered)
# if no IPs have changed then don't need to restart statd
[ -f $CTDB_BASE/state/statd/restart ] || exit 0;
# always restart the lockmanager so that we start with a clusterwide
# graceperiod when ip addresses has changed
[ -x $CTDB_BASE/statd-callout ] && {

View File

@ -20,20 +20,25 @@
<cmdsynopsis>
<command>ctdbd</command>
<arg choice="req">--reclock=&lt;filename&gt;</arg>
<arg choice="req">--nlist=&lt;filename&gt;</arg>
<arg choice="req">--dbdir=&lt;directory&gt;</arg>
<arg choice="opt">-? --help</arg>
<arg choice="opt">--usage</arg>
<arg choice="opt">-i --interactive</arg>
<arg choice="opt">--public-addresses=&lt;filename&gt;</arg>
<arg choice="opt">--event-script-dir=&lt;directory&gt;</arg>
<arg choice="opt">--logfile=&lt;filename&gt;</arg>
<arg choice="opt">--listen=&lt;address&gt;</arg>
<arg choice="opt">--transport=&lt;STRING&gt;</arg>
<arg choice="opt">--socket=&lt;filename&gt;</arg>
<arg choice="opt">-d --debug=&lt;INTEGER&gt;</arg>
<arg choice="req">--dbdir=&lt;directory&gt;</arg>
<arg choice="req">--dbdir-persistent=&lt;directory&gt;</arg>
<arg choice="opt">--event-script-dir=&lt;directory&gt;</arg>
<arg choice="opt">-i --interactive</arg>
<arg choice="opt">--listen=&lt;address&gt;</arg>
<arg choice="opt">--logfile=&lt;filename&gt;</arg>
<arg choice="req">--nlist=&lt;filename&gt;</arg>
<arg choice="opt">--nosetsched</arg>
<arg choice="opt">--public-addresses=&lt;filename&gt;</arg>
<arg choice="opt">--public-interface=&lt;interface&gt;</arg>
<arg choice="req">--reclock=&lt;filename&gt;</arg>
<arg choice="opt">--single-public-ip=&lt;address&gt;</arg>
<arg choice="opt">--socket=&lt;filename&gt;</arg>
<arg choice="opt">--syslog</arg>
<arg choice="opt">--torture</arg>
<arg choice="opt">--transport=&lt;STRING&gt;</arg>
<arg choice="opt">--usage</arg>
</cmdsynopsis>
</refsynopsisdiv>
@ -69,30 +74,10 @@
</listitem>
</varlistentry>
<varlistentry><term>--usage</term>
<varlistentry><term>-d --debug=&lt;DEBUGLEVEL&gt;</term>
<listitem>
<para>
Print useage information to the screen.
</para>
</listitem>
</varlistentry>
<varlistentry><term>--reclock=&lt;filename&gt;</term>
<listitem>
<para>
This is the name of the lock file stored of the shared cluster filesystem that ctdbd uses to arbitrate which node has the role of recovery-master.
This file must be stored on shared storage.
</para>
</listitem>
</varlistentry>
<varlistentry><term>--nlist=&lt;filename&gt;</term>
<listitem>
<para>
This file contains a list of the private ip addresses of every node in the cluster. There is one line/ip address for each node. This file must be the same for all nodes in the cluster.
</para>
<para>
This file is usually /etc/ctdb/nodes .
This option sets the debuglevel on the ctdbd daemon which controls what will be written to the logfile. The default is 0 which will only log important events and errors. A larger number will provide additional logging.
</para>
</listitem>
</varlistentry>
@ -109,22 +94,14 @@
</listitem>
</varlistentry>
<varlistentry><term>-i --interactive</term>
<varlistentry><term>--dbdir-persistent=&lt;directory&gt;</term>
<listitem>
<para>
By default ctdbd will detach itself from the shell and run in
the background as a daemon. This option makes ctdbd to start in interactive mode.
</para>
</listitem>
</varlistentry>
<varlistentry><term>--public_addresses=&lt;filename&gt;</term>
<listitem>
<para>
When used with IP takeover this specifies a file containing the public ip addresses to use on the cluster. This file contains a list of ip addresses netmasks and interfaces. When ctdb is operational it will distribute these public ip addresses evenly across the available nodes.
This is the directory on local storage where ctdbd keeps the local
copy of the persistent TDB databases. This directory is local for each node and should not be stored on the shared cluster filesystem.
</para>
<para>
This is usually the file /etc/ctdb/public_addresses
This directory would usually be /etc/ctdb/persistent .
</para>
</listitem>
</varlistentry>
@ -141,10 +118,11 @@
</listitem>
</varlistentry>
<varlistentry><term>--logfile=&lt;filename&gt;</term>
<varlistentry><term>-i --interactive</term>
<listitem>
<para>
This is the file where ctdbd will write its log. This is usually /var/log/log.ctdb .
By default ctdbd will detach itself from the shell and run in
the background as a daemon. This option makes ctdbd to start in interactive mode.
</para>
</listitem>
</varlistentry>
@ -160,13 +138,92 @@
</listitem>
</varlistentry>
<varlistentry><term>--transport=&lt;STRING&gt;</term>
<varlistentry><term>--logfile=&lt;filename&gt;</term>
<listitem>
<para>
This option specifies which transport to use for ctdbd internode communications. The default is "tcp".
This is the file where ctdbd will write its log. This is usually /var/log/log.ctdb .
</para>
</listitem>
</varlistentry>
<varlistentry><term>--nlist=&lt;filename&gt;</term>
<listitem>
<para>
This file contains a list of the private ip addresses of every node in the cluster. There is one line/ip address for each node. This file must be the same for all nodes in the cluster.
</para>
<para>
Suported transports are "tcp" and "infiniband".
This file is usually /etc/ctdb/nodes .
</para>
</listitem>
</varlistentry>
<varlistentry><term>--nosetsched</term>
<listitem>
<para>
Normally ctdb will change its scheduler to run as a real-time
process. This option is used to change this behaviour and have
ctdb run as a normal process.
</para>
</listitem>
</varlistentry>
<varlistentry><term>--public_addresses=&lt;filename&gt;</term>
<listitem>
<para>
When used with IP takeover this specifies a file containing the public ip addresses to use on the cluster. This file contains a list of ip addresses netmasks and interfaces. When ctdb is operational it will distribute these public ip addresses evenly across the available nodes.
</para>
<para>
This is usually the file /etc/ctdb/public_addresses
</para>
</listitem>
</varlistentry>
<varlistentry><term>--public_interface=&lt;interface&gt;</term>
<listitem>
<para>
This option tells ctdb which interface to attach public-addresses
to and also where to attach the single-public-ip when used.
</para>
</listitem>
</varlistentry>
<varlistentry><term>--reclock=&lt;filename&gt;</term>
<listitem>
<para>
This is the name of the lock file stored of the shared cluster filesystem that ctdbd uses to arbitrate which node has the role of recovery-master.
This file must be stored on shared storage.
</para>
</listitem>
</varlistentry>
<varlistentry><term>--single-public-ip=&lt;address&gt;</term>
<listitem>
<para>
This option is used to activate the "ipmux" functionality of ctdb.
In this mode, all nodes of the cluster will expose a single
ip address from all nodes with all incoming traffic to the cluster
being passed through the current recmaster. This functionality
is similar to using a load-balancing switch.
</para>
<para>
All incoming packets are sent to the recmaster which will multiplex
the clients across all available nodes and pass the packets on to
a different node in the cluster to manage the connection based
on the clients ip address. Outgoing packets however are sent
directly from the node that was choosen back to the client.
Since all incoming packets are sent through the recmaster this will
have a throughput and performance impact when used. This impact
in performance primarily affects write-performance while
read-performance should be mainly unaffected.
Only use this feature if your environment is mostly-read
(i.e. most traffic is from the nodes back to the clients) or
if it is not important to get maximum write-performance to the
cluster.
</para>
<para>
When using a single public ip, you must also specify the
public-interface so that ctdb knows which interface to attach the
single public ip to.
</para>
</listitem>
</varlistentry>
@ -182,10 +239,10 @@
</listitem>
</varlistentry>
<varlistentry><term>-d --debug=&lt;DEBUGLEVEL&gt;</term>
<varlistentry><term>--syslog</term>
<listitem>
<para>
This option sets the debuglevel on the ctdbd daemon which controls what will be written to the logfile. The default is 0 which will only log important events and errors. A larger number will provide additional logging.
Send all log messages to syslog instead of to the ctdb logfile.
</para>
</listitem>
</varlistentry>
@ -200,6 +257,26 @@
</para>
</listitem>
</varlistentry>
<varlistentry><term>--transport=&lt;STRING&gt;</term>
<listitem>
<para>
This option specifies which transport to use for ctdbd internode communications. The default is "tcp".
</para>
<para>
Suported transports are "tcp" and "infiniband".
</para>
</listitem>
</varlistentry>
<varlistentry><term>--usage</term>
<listitem>
<para>
Print useage information to the screen.
</para>
</listitem>
</varlistentry>
</variablelist>
</refsect1>

View File

@ -101,6 +101,7 @@ fi
%{_sysconfdir}/ctdb/events.d/60.nfs
%{_sysconfdir}/ctdb/events.d/61.nfstickle
%{_sysconfdir}/ctdb/events.d/90.ipmux
%{_sysconfdir}/ctdb/events.d/91.lvs
%{_sysconfdir}/ctdb/statd-callout
%{_sbindir}/ctdbd
%{_bindir}/ctdb

View File

@ -170,36 +170,38 @@ ctdb_control_getnodemap(struct ctdb_context *ctdb, uint32_t opcode, TDB_DATA ind
return 0;
}
struct getkeys_params {
/*
a traverse function for pulling all relevent records from pulldb
*/
struct pulldb_data {
struct ctdb_context *ctdb;
uint32_t lmaster;
uint32_t rec_count;
struct getkeys_rec {
TDB_DATA key;
TDB_DATA data;
} *recs;
struct ctdb_control_pulldb_reply *pulldata;
uint32_t len;
bool failed;
};
static int traverse_getkeys(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data, void *p)
static int traverse_pulldb(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data, void *p)
{
struct getkeys_params *params = (struct getkeys_params *)p;
uint32_t lmaster;
struct pulldb_data *params = (struct pulldb_data *)p;
struct ctdb_rec_data *rec;
lmaster = ctdb_lmaster(params->ctdb, &key);
/* only include this record if the lmaster matches or if
the wildcard lmaster (-1) was specified.
*/
if ((params->lmaster != CTDB_LMASTER_ANY) && (params->lmaster != lmaster)) {
return 0;
/* add the record to the blob */
rec = ctdb_marshall_record(params->pulldata, 0, key, NULL, data);
if (rec == NULL) {
params->failed = true;
return -1;
}
params->recs = talloc_realloc(NULL, params->recs, struct getkeys_rec, params->rec_count+1);
key.dptr = talloc_memdup(params->recs, key.dptr, key.dsize);
data.dptr = talloc_memdup(params->recs, data.dptr, data.dsize);
params->recs[params->rec_count].key = key;
params->recs[params->rec_count].data = data;
params->rec_count++;
params->pulldata = talloc_realloc_size(NULL, params->pulldata, rec->length + params->len);
if (params->pulldata == NULL) {
DEBUG(0,(__location__ " Failed to expand pulldb_data to %u (%u records)\n",
rec->length + params->len, params->pulldata->count));
params->failed = true;
return -1;
}
params->pulldata->count++;
memcpy(params->len+(uint8_t *)params->pulldata, rec, rec->length);
params->len += rec->length;
talloc_free(rec);
return 0;
}
@ -211,10 +213,8 @@ int32_t ctdb_control_pull_db(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DAT
{
struct ctdb_control_pulldb *pull;
struct ctdb_db_context *ctdb_db;
struct getkeys_params params;
struct pulldb_data params;
struct ctdb_control_pulldb_reply *reply;
int i;
size_t len = 0;
if (ctdb->freeze_mode != CTDB_FREEZE_FROZEN) {
DEBUG(0,("rejecting ctdb_control_pull_db when not frozen\n"));
@ -225,47 +225,36 @@ int32_t ctdb_control_pull_db(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DAT
ctdb_db = find_ctdb_db(ctdb, pull->db_id);
if (!ctdb_db) {
DEBUG(0,(__location__ " Unknown db\n"));
DEBUG(0,(__location__ " Unknown db 0x%08x\n", pull->db_id));
return -1;
}
params.ctdb = ctdb;
params.lmaster = pull->lmaster;
reply = talloc_zero(outdata, struct ctdb_control_pulldb_reply);
CTDB_NO_MEMORY(ctdb, reply);
params.rec_count = 0;
params.recs = talloc_array(outdata, struct getkeys_rec, 0);
CTDB_NO_MEMORY(ctdb, params.recs);
reply->db_id = pull->db_id;
params.ctdb = ctdb;
params.pulldata = reply;
params.len = offsetof(struct ctdb_control_pulldb_reply, data);
params.failed = false;
if (ctdb_lock_all_databases_mark(ctdb) != 0) {
DEBUG(0,(__location__ " Failed to get lock on entired db - failing\n"));
return -1;
}
tdb_traverse_read(ctdb_db->ltdb->tdb, traverse_getkeys, &params);
if (tdb_traverse_read(ctdb_db->ltdb->tdb, traverse_pulldb, &params) == -1) {
DEBUG(0,(__location__ " Failed to get traverse db '%s'\n", ctdb_db->db_name));
ctdb_lock_all_databases_unmark(ctdb);
talloc_free(params.pulldata);
return -1;
}
ctdb_lock_all_databases_unmark(ctdb);
reply = talloc(outdata, struct ctdb_control_pulldb_reply);
CTDB_NO_MEMORY(ctdb, reply);
reply->db_id = pull->db_id;
reply->count = params.rec_count;
len = offsetof(struct ctdb_control_pulldb_reply, data);
for (i=0;i<reply->count;i++) {
struct ctdb_rec_data *rec;
rec = ctdb_marshall_record(outdata, 0, params.recs[i].key, NULL, params.recs[i].data);
reply = talloc_realloc_size(outdata, reply, rec->length + len);
memcpy(len+(uint8_t *)reply, rec, rec->length);
len += rec->length;
talloc_free(rec);
}
talloc_free(params.recs);
outdata->dptr = (uint8_t *)reply;
outdata->dsize = len;
outdata->dptr = (uint8_t *)params.pulldata;
outdata->dsize = params.len;
return 0;
}

View File

@ -542,7 +542,7 @@ static int pull_one_remote_database(struct ctdb_context *ctdb, uint32_t srcnode,
struct ctdb_ltdb_header header;
if (existing.dsize < sizeof(struct ctdb_ltdb_header)) {
DEBUG(0,(__location__ " Bad record size %u from node %u\n",
existing.dsize, srcnode));
(unsigned)existing.dsize, srcnode));
free(existing.dptr);
talloc_free(tmp_ctx);
return -1;
@ -898,6 +898,7 @@ struct recdb_data {
struct ctdb_context *ctdb;
struct ctdb_control_pulldb_reply *recdata;
uint32_t len;
bool failed;
};
static int traverse_recdb(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data, void *p)
@ -917,10 +918,15 @@ static int traverse_recdb(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data,
/* add the record to the blob ready to send to the nodes */
rec = ctdb_marshall_record(params->recdata, 0, key, NULL, data);
if (rec == NULL) {
params->failed = true;
return -1;
}
params->recdata = talloc_realloc_size(NULL, params->recdata, rec->length + params->len);
if (params->recdata == NULL) {
DEBUG(0,(__location__ " Failed to expand recdata to %u (%u records)\n",
rec->length + params->len, params->recdata->count));
params->failed = true;
return -1;
}
params->recdata->count++;
@ -949,12 +955,20 @@ static int push_recdb_database(struct ctdb_context *ctdb, uint32_t dbid,
params.ctdb = ctdb;
params.recdata = recdata;
params.len = offsetof(struct ctdb_control_pulldb_reply, data);
params.failed = false;
if (tdb_traverse_read(recdb->tdb, traverse_recdb, &params) == -1) {
DEBUG(0,(__location__ " Failed to traverse recdb database\n"));
talloc_free(params.recdata);
return -1;
}
if (params.failed) {
DEBUG(0,(__location__ " Failed to traverse recdb database\n"));
talloc_free(params.recdata);
return -1;
}
recdata = params.recdata;
outdata.dptr = (void *)recdata;

View File

@ -160,6 +160,7 @@ EOF
show_all "/usr/lpp/mmfs/bin/mmlsconfig"
show_all "/usr/lpp/mmfs/bin/mmlsfs all"
show_all "/usr/lpp/mmfs/bin/mmlsnsd"
show_all "/usr/lpp/mmfs/bin/mmlsnsd -X"
show_all "/usr/lpp/mmfs/bin/mmfsadm dump version"
show_all "/usr/lpp/mmfs/bin/mmfsadm dump waiters"
show_all "/usr/lpp/mmfs/bin/mmlsmount all"

View File

@ -19,6 +19,7 @@ MAXNODE=`expr $NUMNODES - 1`
if [ $NODE = "all" ]; then
for a in `egrep '^[[:alnum:]]' $NODES`; do
echo; echo ">> NODE: $a <<"
if [ -f "$SCRIPT" ]; then
rsh $a at -f $SCRIPT now
else

View File

@ -19,6 +19,7 @@ MAXNODE=`expr $NUMNODES - 1`
if [ $NODE = "all" ]; then
for a in `egrep '^[[:alnum:]]' $NODES`; do
echo; echo ">> NODE: $a <<"
if [ -f "$SCRIPT" ]; then
ssh -n $a at -f $SCRIPT now
else