From 4bf6cab4a1e1c11f79fde40fc8b9ae29b0f00e35 Mon Sep 17 00:00:00 2001
From: Amitay Isaacs <amitay@gmail.com>
Date: Mon, 7 Mar 2016 17:05:11 +1100
Subject: [PATCH] ctdb-doc: Sort the tunable variables in alphabetical order

Signed-off-by: Amitay Isaacs <amitay@gmail.com>
Reviewed-by: Martin Schwenke <martin@meltin.net>
---
 ctdb/doc/ctdb-tunables.7.xml | 880 ++++++++++++++++++-----------------
 1 file changed, 443 insertions(+), 437 deletions(-)
diff --git a/ctdb/doc/ctdb-tunables.7.xml b/ctdb/doc/ctdb-tunables.7.xml
index 6c164f3161f..a6ef4353545 100644
--- a/ctdb/doc/ctdb-tunables.7.xml
+++ b/ctdb/doc/ctdb-tunables.7.xml
@@ -29,39 +29,17 @@
       <command>getvar</command> commands for more details.
     </para>
 
-    <refsect2>
-      <title>MaxRedirectCount</title>
-      <para>Default: 3</para>
-      <para>
-	If we are not the DMASTER and need to fetch a record across the network
-	we first send the request to the LMASTER after which the record
-	is passed onto the current DMASTER. If the DMASTER changes before
-	the request has reached that node, the request will be passed onto the
-	"next" DMASTER. For very hot records that migrate rapidly across the
-	cluster this can cause a request to "chase" the record for many hops
-	before it catches up with the record.
-
-	this is how many hops we allow trying to chase the DMASTER before we
-	switch back to the LMASTER again to ask for new directions.
-      </para>
-      <para>
-	When chasing a record, this is how many hops we will chase the record
-	for before going back to the LMASTER to ask for new guidance.
-      </para>
-    </refsect2>
+    <para>
+      The tunable variables are listed alphabetically.
+    </para>
 
     <refsect2>
-      <title>SeqnumInterval</title>
-      <para>Default: 1000</para>
+      <title>AllowClientDBAttach</title>
+      <para>Default: 1</para>
       <para>
-	Some databases have seqnum tracking enabled, so that samba will be able
-	to detect asynchronously when there has been updates to the database.
-	Everytime a database is updated its sequence number is increased.
-      </para>
-      <para>
-	This tunable is used to specify in 'ms' how frequently ctdb will
-	send out updates to remote nodes to inform them that the sequence
-	number is increased.
+	When set to 0, clients are not allowed to attach to any databases.
+	This can be used to temporarily block any new processes from
+	attaching to and accessing the databases.
       </para>
     </refsect2>
 
@@ -69,63 +47,109 @@
       <title>ControlTimeout</title>
       <para>Default: 60</para>
       <para>
-	This is the default
-	setting for timeout for when sending a control message to either the
-	local or a remote ctdb daemon.
+	This is the default setting for timeout for when sending a
+	control message to either the local or a remote ctdb daemon.
       </para>
     </refsect2>
 
     <refsect2>
-      <title>TraverseTimeout</title>
-      <para>Default: 20</para>
+      <title>DatabaseHashSize</title>
+      <para>Default: 100001</para>
       <para>
-	This setting controls how long we allow a traverse process to run.
-	After this timeout triggers, the main ctdb daemon will abort the
-	traverse if it has not yet finished.
+	Number of the hash chains for the local store of the tdbs that
+	ctdb manages.
       </para>
     </refsect2>
 
     <refsect2>
-      <title>KeepaliveInterval</title>
+      <title>DatabaseMaxDead</title>
       <para>Default: 5</para>
       <para>
-	How often in seconds should the nodes send keepalives to eachother.
+	How many dead records per hashchain in the TDB database do we
+	allow before the freelist needs to be processed.
       </para>
     </refsect2>
 
     <refsect2>
-      <title>KeepaliveLimit</title>
-      <para>Default: 5</para>
+      <title>DBRecordCountWarn</title>
+      <para>Default: 100000</para>
       <para>
-	After how many keepalive intervals without any traffic should a node
-	wait until marking the peer as DISCONNECTED.
-      </para>
-      <para>
-	If a node has hung, it can thus take KeepaliveInterval*(KeepaliveLimit+1)
-	seconds before we determine that the node is DISCONNECTED and that we
-	require a recovery. This limitshould not be set too high since we want
-	a hung node to be detectec, and expunged from the cluster well before
-	common CIFS timeouts (45-90 seconds) kick in.
+	When set to non-zero, ctdb will log a warning when we try
+	to recover a database with more than this many records. This
+	will produce a warning if a database grows uncontrollably with
+	orphaned records.
       </para>
     </refsect2>
 
     <refsect2>
-      <title>RecoverTimeout</title>
-      <para>Default: 20</para>
+      <title>DBRecordSizeWarn</title>
+      <para>Default: 10000000</para>
       <para>
-	This is the default setting for timeouts for controls when sent from the
-	recovery daemon. We allow longer control timeouts from the recovery daemon
-	than from normal use since the recovery dameon often use controls that 
-	can take a lot longer than normal controls.
+	When set to non-zero, ctdb will log a warning when we try to
+	recover a database where a single record is bigger than this. This
+	will produce a warning if a database record grows uncontrollably
+	with orphaned sub-records.
       </para>
     </refsect2>
 
     <refsect2>
-      <title>RecoverInterval</title>
-      <para>Default: 1</para>
+      <title>DBSizeWarn</title>
+      <para>Default: 1000000000</para>
       <para>
-	How frequently in seconds should the recovery daemon perform the
-	consistency checks that determine if we need to perform a recovery or not.
+	When set to non-zero, ctdb will log a warning when we try to
+	recover a database bigger than this. This will produce a warning
+	if a database grows uncontrollably.
+      </para>
+    </refsect2>
+
+    <refsect2>
+      <title>DeferredAttachTO</title>
+      <para>Default: 120</para>
+      <para>
+	When databases are frozen we do not allow clients to attach to
+	the databases. Instead of returning an error immediately to the
+	application the attach request from the client is deferred until
+	the database becomes available again at which stage we respond
+	to the client.
+      </para>
+      <para>
+	This timeout controls how long we will defer the request from the
+	client before timing it out and returning an error to the client.
+      </para>
+    </refsect2>
+
+    <refsect2>
+      <title>DeterministicIPs</title>
+      <para>Default: 0</para>
+      <para>
+	When enabled, this tunable makes ctdb try to keep public IP
+	addresses locked to specific nodes as far as possible. This makes
+	it easier for debugging since you can know that as long as all
+	nodes are healthy public IP X will always be hosted by node Y.
+      </para>
+      <para>
+	The cost of using deterministic IP address assignment is that it
+	disables part of the logic where ctdb tries to reduce the number
+	of public IP assignment changes in the cluster. This tunable may
+	increase the number of IP failover/failbacks that are performed
+	on the cluster by a small margin.
+      </para>
+    </refsect2>
+
+    <refsect2>
+      <title>DisableIPFailover</title>
+      <para>Default: 0</para>
+      <para>
+	When enabled, ctdb will not perform failover or failback. Even
+	if a node fails while holding public IPs, ctdb will not recover
+	the IPs or assign them to another node.
+      </para>
+      <para>
+	When you enable this tunable, CTDB will no longer attempt
+	to recover the cluster by failing IP addresses over to other
+	nodes. This leads to a service outage until the administrator
+	has manually performed failover to replacement nodes using the
+	'ctdb moveip' command.
       </para>
     </refsect2>
 
@@ -133,34 +157,20 @@
       <title>ElectionTimeout</title>
       <para>Default: 3</para>
       <para>
-	When electing a new recovery master, this is how many seconds we allow
-	the election to take before we either deem the election finished
-	or we fail the election and start a new one.
+	When electing a new recovery master, this is how many seconds
+	we allow the election to take before we either deem the election
+	finished or we fail the election and start a new one.
       </para>
     </refsect2>
 
     <refsect2>
-      <title>TakeoverTimeout</title>
-      <para>Default: 9</para>
+      <title>EnableBans</title>
+      <para>Default: 1</para>
       <para>
-	This is how many seconds we allow controls to take for IP failover events.
-      </para>
-    </refsect2>
-
-    <refsect2>
-      <title>MonitorInterval</title>
-      <para>Default: 15</para>
-      <para>
-	How often should ctdb run the event scripts to check for a nodes health.
-      </para>
-    </refsect2>
-
-    <refsect2>
-      <title>TickleUpdateInterval</title>
-      <para>Default: 20</para>
-      <para>
-	How often will ctdb record and store the "tickle" information used to
-	kickstart stalled tcp connections after a recovery.
+	When set to 0, this disables BANNING completely in the cluster
+	and thus nodes can not get banned, even it they break. Don't
+	set to 0 unless you know what you are doing.  You should set
+	this to the same value on all nodes to avoid unexpected behaviour.
       </para>
     </refsect2>
 
@@ -172,7 +182,6 @@
 	out.  This is the total time for all enabled scripts that are
 	run for an event, not just a single event script.
       </para>
-
       <para>
 	Note that timeouts are ignored for some events ("takeip",
 	"releaseip", "startrecovery", "recovered") and converted to
@@ -182,95 +191,75 @@
     </refsect2>
 
     <refsect2>
-      <title>MonitorTimeoutCount</title>
-      <para>Default: 20</para>
-      <para>
-	How many monitor events in a row need to timeout before a node
-	is flagged as UNHEALTHY.  This setting is useful if scripts
-	can not be written so that they do not hang for benign
-	reasons.
-      </para>
-    </refsect2>
-
-    <refsect2>
-      <title>RecoveryGracePeriod</title>
-      <para>Default: 120</para>
-      <para>
-	During recoveries, if a node has not caused recovery failures during the
-	last grace period, any records of transgressions that the node has caused
-	recovery failures will be forgiven. This resets the ban-counter back to 
-	zero for that node.
-      </para>
-    </refsect2>
-
-    <refsect2>
-      <title>RecoveryBanPeriod</title>
-      <para>Default: 300</para>
-      <para>
-	If a node becomes banned causing repetitive recovery failures. The node will
-	eventually become banned from the cluster.
-	This controls how long the culprit node will be banned from the cluster
-	before it is allowed to try to join the cluster again.
-	Don't set to small. A node gets banned for a reason and it is usually due
-	to real problems with the node.
-      </para>
-    </refsect2>
-
-    <refsect2>
-      <title>DatabaseHashSize</title>
-      <para>Default: 100001</para>
-      <para>
-	Size of the hash chains for the local store of the tdbs that ctdb manages.
-      </para>
-    </refsect2>
-
-    <refsect2>
-      <title>DatabaseMaxDead</title>
-      <para>Default: 5</para>
-      <para>
-	How many dead records per hashchain in the TDB database do we allow before
-	the freelist needs to be processed.
-      </para>
-    </refsect2>
-
-    <refsect2>
-      <title>RerecoveryTimeout</title>
-      <para>Default: 10</para>
-      <para>
-	Once a recovery has completed, no additional recoveries are permitted
-	until this timeout has expired.
-      </para>
-    </refsect2>
-
-    <refsect2>
-      <title>EnableBans</title>
+      <title>FetchCollapse</title>
       <para>Default: 1</para>
       <para>
-	When set to 0, this disables BANNING completely in the cluster and thus
-	nodes can not get banned, even it they break. Don't set to 0 unless you
-	know what you are doing.  You should set this to the same value on
-	all nodes to avoid unexpected behaviour.
+	When many clients across many nodes try to access the same record
+	at the same time this can lead to a fetch storm where the record
+	becomes very active and bounces between nodes very fast. This
+	leads to high CPU utilization of the ctdbd daemon, trying to
+	bounce that record around very fast, and poor performance.
+      </para>
+      <para>
+	This parameter is used to activate a fetch-collapse. A
+	fetch-collapse is when we track which records we have requests in
+	flight so that we only keep one request in flight from a certain
+	node, even if multiple smbd processes are attemtping to fetch
+	the record at the same time. This can improve performance and
+	reduce CPU utilization for certain workloads.
+      </para>
+      <para>
+	This timeout controls if we should collapse multiple fetch
+	operations of the same record into a single request and defer
+	all duplicates or not.
       </para>
     </refsect2>
 
     <refsect2>
-      <title>DeterministicIPs</title>
-      <para>Default: 0</para>
+      <title>HopcountMakeSticky</title>
+      <para>Default: 50</para>
       <para>
-	When enabled, this tunable makes ctdb try to keep public IP addresses
-	locked to specific nodes as far as possible. This makes it easier for
-	debugging since you can know that as long as all nodes are healthy
-	public IP X will always be hosted by node Y. 
+	If the database is set to 'STICKY' mode, using the 'ctdb
+	setdbsticky' command, any record that is seen as very hot and
+	migrating so fast that hopcount surpasses 50 is set to become a
+	STICKY record for StickyDuration seconds. This means that after
+	each migration the record will be kept on the node and prevented
+	from being migrated off the node.
+       </para>
+       <para>
+	This setting allows one to try to identify such records and
+	stop them from migrating across the cluster so fast. This will
+	improve performance for certain workloads, such as locking.tdb
+	if many clients are opening/closing the same file concurrently.
       </para>
-      <para>
-	The cost of using deterministic IP address assignment is that it
-	disables part of the logic where ctdb tries to reduce the number of
-	public IP assignment changes in the cluster. This tunable may increase
-	the number of IP failover/failbacks that are performed on the cluster
-	by a small margin.
-      </para>
-
     </refsect2>
+
+    <refsect2>
+      <title>KeepaliveInterval</title>
+      <para>Default: 5</para>
+      <para>
+	How often in seconds should the nodes send keep-alive packets to
+	each other.
+      </para>
+    </refsect2>
+
+    <refsect2>
+      <title>KeepaliveLimit</title>
+      <para>Default: 5</para>
+      <para>
+	After how many keepalive intervals without any traffic should
+	a node wait until marking the peer as DISCONNECTED.
+       </para>
+       <para>
+	If a node has hung, it can thus take
+	KeepaliveInterval*(KeepaliveLimit+1) seconds before we determine
+	that the node is DISCONNECTED and that we require a recovery. This
+	limitshould not be set too high since we want a hung node to be
+	detectec, and expunged from the cluster well before common CIFS
+	timeouts (45-90 seconds) kick in.
+      </para>
+    </refsect2>
+
     <refsect2>
       <title>LCP2PublicIPs</title>
       <para>Default: 1</para>
@@ -281,10 +270,52 @@
     </refsect2>
 
     <refsect2>
-      <title>ReclockPingPeriod</title>
-      <para>Default: x</para>
+      <title>LogLatencyMs</title>
+      <para>Default: 0</para>
       <para>
-	Obsolete
+	When set to non-zero, this will make the main daemon log any
+	operation that took longer than this value, in 'ms', to complete.
+	These include "how long time a lockwait child process needed",
+	"how long time to write to a persistent database" but also "how
+	long did it take to get a response to a CALL from a remote node".
+      </para>
+    </refsect2>
+
+    <refsect2>
+      <title>MaxRedirectCount</title>
+      <para>Default: 3</para>
+      <para>
+	If we are not the DMASTER and need to fetch a record across the
+	network we first send the request to the LMASTER after which the
+	record is passed onto the current DMASTER. If the DMASTER changes
+	before the request has reached that node, the request will be
+	passed onto the "next" DMASTER. For very hot records that migrate
+	rapidly across the cluster this can cause a request to "chase"
+	the record for many hops before it catches up with the record.
+      </para>
+      <para>
+	When chasing a record, this is how many hops we will chase
+	the record for before going back to the LMASTER to ask for
+	new guidance.
+      </para>
+    </refsect2>
+
+    <refsect2>
+      <title>MonitorInterval</title>
+      <para>Default: 15</para>
+      <para>
+	How often should ctdb run the event scripts in seconds to check
+	for a node's health.
+      </para>
+    </refsect2>
+
+    <refsect2>
+      <title>MonitorTimeoutCount</title>
+      <para>Default: 20</para>
+      <para>
+	How many monitor events in a row need to timeout before a node
+	is flagged as UNHEALTHY.  This setting is useful if scripts can
+	not be written so that they do not hang for benign reasons.
       </para>
     </refsect2>
 
@@ -292,52 +323,25 @@
       <title>NoIPFailback</title>
       <para>Default: 0</para>
       <para>
-	When set to 1, ctdb will not perform failback of IP addresses when a node
-	becomes healthy. Ctdb WILL perform failover of public IP addresses when a
-	node becomes UNHEALTHY, but when the node becomes HEALTHY again, ctdb
-	will not fail the addresses back.
+	When set to 1, ctdb will not perform failback of IP addresses
+	when a node becomes healthy. Ctdb WILL perform failover of public
+	IP addresses when a node becomes UNHEALTHY, but when the node
+	becomes HEALTHY again, ctdb will not fail the addresses back.
       </para>
       <para>
-	Use with caution! Normally when a node becomes available to the cluster
-	ctdb will try to reassign public IP addresses onto the new node as a way
-	to distribute the workload evenly across the clusternode. Ctdb tries to
-	make sure that all running nodes have approximately the same number of
-	public addresses it hosts.
+	Use with caution! Normally when a node becomes available to the
+	cluster ctdb will try to reassign public IP addresses onto the
+	new node as a way to distribute the workload evenly across the
+	clusternode. Ctdb tries to make sure that all running nodes have
+	approximately the same number of public addresses it hosts.
       </para>
       <para>
-	When you enable this tunable, CTDB will no longer attempt to rebalance
-	the cluster by failing IP addresses back to the new nodes. An unbalanced
-	cluster will therefore remain unbalanced until there is manual
-	intervention from the administrator. When this parameter is set, you can
-	manually fail public IP addresses over to the new node(s) using the
-	'ctdb moveip' command.
-      </para>
-    </refsect2>
-
-    <refsect2>
-      <title>DisableIPFailover</title>
-      <para>Default: 0</para>
-      <para>
-	When enabled, ctdb will not perform failover or failback. Even if a
-	node fails while holding public IPs, ctdb will not recover the IPs or
-	assign them to another node.
-      </para>
-      <para>
-	When you enable this tunable, CTDB will no longer attempt to recover
-	the cluster by failing IP addresses over to other nodes. This leads to
-	a service outage until the administrator has manually performed failover
-	to replacement nodes using the 'ctdb moveip' command.
-      </para>
-    </refsect2>
-
-    <refsect2>
-      <title>NoIPTakeover</title>
-      <para>Default: 0</para>
-      <para>
-	When set to 1, ctdb will not allow IP addresses to be failed over
-	onto this node. Any IP addresses that the node currently hosts
-	will remain on the node but no new IP addresses can be failed over
-	to the node.
+	When you enable this tunable, CTDB will no longer attempt to
+	rebalance the cluster by failing IP addresses back to the new
+	nodes. An unbalanced cluster will therefore remain unbalanced
+	until there is manual intervention from the administrator. When
+	this parameter is set, you can manually fail public IP addresses
+	over to the new node(s) using the 'ctdb moveip' command.
       </para>
     </refsect2>
 
@@ -347,7 +351,7 @@
       <para>
 	If no nodes are healthy then by default ctdb will happily host
 	public IPs on disabled (unhealthy or administratively disabled)
-	nodes.  This can cause problems, for example if the underlying
+	nodes.	This can cause problems, for example if the underlying
 	cluster filesystem is not mounted.  When set to 1 on a node and
 	that node is disabled it, any IPs hosted by this node will be
 	released and the node will not takeover any IPs until it is no
@@ -356,52 +360,13 @@
     </refsect2>
 
     <refsect2>
-      <title>DBRecordCountWarn</title>
-      <para>Default: 100000</para>
-      <para>
-	When set to non-zero, ctdb will log a warning when we try to recover a
-	database with more than this many records. This will produce a warning
-	if a database grows uncontrollably with orphaned records.
-      </para>
-    </refsect2>
-
-    <refsect2>
-      <title>DBRecordSizeWarn</title>
-      <para>Default: 10000000</para>
-      <para>
-	When set to non-zero, ctdb will log a warning when we try to recover a
-	database where a single record is bigger than this. This will produce
-	a warning if a database record grows uncontrollably with orphaned
-	sub-records.
-      </para>
-    </refsect2>
-
-    <refsect2>
-      <title>DBSizeWarn</title>
-      <para>Default: 1000000000</para>
-      <para>
-	When set to non-zero, ctdb will log a warning when we try to recover a
-	database bigger than this. This will produce
-	a warning if a database grows uncontrollably.
-      </para>
-    </refsect2>
-
-    <refsect2>
-      <title>VerboseMemoryNames</title>
+      <title>NoIPTakeover</title>
       <para>Default: 0</para>
       <para>
-	This feature consumes additional memory. when used the talloc library
-	will create more verbose names for all talloc allocated objects.
-      </para>
-    </refsect2>
-
-    <refsect2>
-      <title>RecdPingTimeout</title>
-      <para>Default: 60</para>
-      <para>
-	If the main dameon has not heard a "ping" from the recovery dameon for
-	this many seconds, the main dameon will log a message that the recovery
-	daemon is potentially hung.
+	When set to 1, ctdb will not allow IP addresses to be failed
+	over onto this node. Any IP addresses that the node currently
+	hosts will remain on the node but no new IP addresses can be
+	failed over to the node.
       </para>
     </refsect2>
 
@@ -409,21 +374,19 @@
       <title>RecdFailCount</title>
       <para>Default: 10</para>
       <para>
-	If the recovery daemon has failed to ping the main dameon for this many
-	consecutive intervals, the main daemon will consider the recovery daemon
-	as hung and will try to restart it to recover.
+	If the recovery daemon has failed to ping the main dameon for
+	this many consecutive intervals, the main daemon will consider
+	the recovery daemon as hung and will try to restart it to recover.
       </para>
     </refsect2>
 
     <refsect2>
-      <title>LogLatencyMs</title>
-      <para>Default: 0</para>
+      <title>RecdPingTimeout</title>
+      <para>Default: 60</para>
       <para>
-	When set to non-zero, this will make the main daemon log any operation that
-	took longer than this value, in 'ms', to complete.
-	These include "how long time a lockwait child process needed", 
-	"how long time to write to a persistent database" but also
-	"how long did it take to get a response to a CALL from a remote node".
+	If the main dameon has not heard a "ping" from the recovery dameon
+	for this many seconds, the main dameon will log a message that
+	the recovery daemon is potentially hung.
       </para>
     </refsect2>
 
@@ -431,10 +394,66 @@
       <title>RecLockLatencyMs</title>
       <para>Default: 1000</para>
       <para>
-	When using a reclock file for split brain prevention, if set to non-zero
-	this tunable will make the recovery dameon log a message if the fcntl()
-	call to lock/testlock the recovery file takes longer than this number of 
-	ms.
+	When using a reclock file for split brain prevention, if set
+	to non-zero this tunable will make the recovery dameon log a
+	message if the fcntl() call to lock/testlock the recovery file
+	takes longer than this number of ms.
+      </para>
+    </refsect2>
+
+    <refsect2>
+      <title>RecoverInterval</title>
+      <para>Default: 1</para>
+      <para>
+	How frequently in seconds should the recovery daemon perform
+	the consistency checks that determine if we need to perform a
+	recovery or not.
+      </para>
+    </refsect2>
+
+    <refsect2>
+      <title>RecoverPDBBySeqNum</title>
+      <para>Default: 1</para>
+      <para>
+	When set to zero, database recovery for persistent databases is
+	record-by-record and recovery process simply collects the most
+	recent version of every individual record.
+      </para>
+      <para>
+	When set to non-zero, persistent databases will instead be
+	recovered as a whole db and not by individual records. The
+	node that contains the highest value stored in the record
+	"__db_sequence_number__" is selected and the copy of that nodes
+	database is used as the recovered database.
+      </para>
+      <para>
+	By default, recovery of persistent databses is done using
+	__db_sequence_number__ record.
+      </para>
+    </refsect2>
+
+    <refsect2>
+      <title>RecoverTimeout</title>
+      <para>Default: 20</para>
+      <para>
+	This is the default setting for timeouts for controls when sent
+	from the recovery daemon. We allow longer control timeouts from
+	the recovery daemon than from normal use since the recovery
+	dameon often use controls that can take a lot longer than normal
+	controls.
+      </para>
+    </refsect2>
+
+    <refsect2>
+      <title>RecoveryBanPeriod</title>
+      <para>Default: 300</para>
+      <para>
+	If a node becomes banned causing repetitive recovery failures. The
+	node will eventually become banned from the cluster.  This
+	controls how long the culprit node will be banned from the
+	cluster before it is allowed to try to join the cluster again.
+	Don't set to small. A node gets banned for a reason and it is
+	usually due to real problems with the node.
       </para>
     </refsect2>
 
@@ -447,6 +466,142 @@
       </para>
     </refsect2>
 
+    <refsect2>
+      <title>RecoveryGracePeriod</title>
+      <para>Default: 120</para>
+      <para>
+       During recoveries, if a node has not caused recovery failures
+       during the last grace period, any records of transgressions that
+       the node has caused recovery failures will be forgiven. This
+       resets the ban-counter back to zero for that node.
+      </para>
+    </refsect2>
+
+    <refsect2>
+      <title>RepackLimit</title>
+      <para>Default: 10000</para>
+      <para>
+        During vacuuming, if the number of freelist records are more than
+        <varname>RepackLimit</varname>, then databases are repacked to
+        get rid of the freelist records to avoid fragmentation.
+      </para>
+      <para>
+        Databases are repacked only if both <varname>RepackLimit</varname>
+        and <varname>VacuumLimit</varname> are exceeded.
+      </para>
+    </refsect2>
+
+    <refsect2>
+      <title>RerecoveryTimeout</title>
+      <para>Default: 10</para>
+      <para>
+	Once a recovery has completed, no additional recoveries are
+	permitted until this timeout has expired.
+      </para>
+    </refsect2>
+
+    <refsect2>
+      <title>Samba3AvoidDeadlocks</title>
+      <para>Default: 0</para>
+      <para>
+        Enable code that prevents deadlocks with Samba (only for Samba
+        3.x).
+      </para>
+      <para>
+	This should be set to 1 when using Samba version 3.x to enable
+	special code in CTDB to avoid deadlock with Samba version 3.x.
+	This code is not required for Samba version 4.x and must not be
+	enabled for Samba 4.x.
+      </para>
+    </refsect2>
+
+    <refsect2>
+      <title>SeqnumInterval</title>
+      <para>Default: 1000</para>
+      <para>
+	Some databases have seqnum tracking enabled, so that samba will
+	be able to detect asynchronously when there has been updates
+	to the database.  Everytime a database is updated its sequence
+	number is increased.
+      </para>
+      <para>
+	This tunable is used to specify in 'ms' how frequently ctdb
+	will send out updates to remote nodes to inform them that the
+	sequence number is increased.
+      </para>
+    </refsect2>
+
+    <refsect2>
+      <title>StatHistoryInterval</title>
+      <para>Default: 1</para>
+      <para>
+	Granularity of the statistics collected in the statistics
+	history. This is reported by 'ctdb stats' command.
+      </para>
+    </refsect2>
+
+    <refsect2>
+      <title>StickyDuration</title>
+      <para>Default: 600</para>
+      <para>
+	Once a record has been found to be fetch-lock hot and has been
+	flagged to become STICKY, this is for how long, in seconds,
+	the record will be flagged as a STICKY record.
+      </para>
+    </refsect2>
+
+    <refsect2>
+      <title>StickyPindown</title>
+      <para>Default: 200</para>
+      <para>
+	Once a STICKY record has been migrated onto a node, it will be
+	pinned down on that node for this number of ms. Any request from
+	other nodes to migrate the record off the node will be deferred
+	until the pindown timer expires.
+      </para>
+    </refsect2>
+
+    <refsect2>
+      <title>TakeoverTimeout</title>
+      <para>Default: 9</para>
+      <para>
+	This is how many seconds we allow controls to take for IP
+	failover events.
+      </para>
+    </refsect2>
+
+    <refsect2>
+      <title>TickleUpdateInterval</title>
+      <para>Default: 20</para>
+      <para>
+	How often will ctdb record and store the "tickle" information
+	used to kickstart stalled tcp connections after a recovery.
+      </para>
+    </refsect2>
+
+    <refsect2>
+      <title>TraverseTimeout</title>
+      <para>Default: 20</para>
+      <para>
+	This setting controls how long we allow a traverse process to run.
+	After this timeout triggers, the main ctdb daemon will abort
+	the traverse if it has not yet finished.
+      </para>
+    </refsect2>
+
+    <refsect2>
+      <title>VacuumFastPathCount</title>
+      <para>Default: 60</para>
+      <para>
+	When a record is deleted, it is marked for deletion during
+	vacuuming.  Vacuuming process usually processes this list to
+	purge the records from the database.  If the number of records
+	marked for deletion are more than VacuumFastPathCount, then
+	vacuuming process will scan the complete database for empty
+	records instead of using the list of records marked for deletion.
+      </para>
+    </refsect2>
+
     <refsect2>
       <title>VacuumInterval</title>
       <para>Default: 10</para>
@@ -456,6 +611,20 @@
       </para>
     </refsect2>
 
+    <refsect2>
+      <title>VacuumLimit</title>
+      <para>Default: 5000</para>
+      <para>
+        During vacuuming, if the number of deleted records are more than
+        <varname>VacuumLimit</varname>, then databases are repacked to
+        avoid fragmentation.
+      </para>
+      <para>
+        Databases are repacked only if both <varname>RepackLimit</varname>
+        and <varname>VacuumLimit</varname> are exceeded.
+      </para>
+    </refsect2>
+
     <refsect2>
       <title>VacuumMaxRunTime</title>
       <para>Default: 120</para>
@@ -467,178 +636,15 @@
     </refsect2>
 
     <refsect2>
-      <title>RepackLimit</title>
-      <para>Default: 10000</para>
-      <para>
-        During vacuuming, if the number of freelist records are more
-        than <varname>RepackLimit</varname>, then databases are
-        repacked to get rid of the freelist records to avoid
-        fragmentation.
-      </para>
-      <para>
-        Databases are repacked only if both
-        <varname>RepackLimit</varname> and
-        <varname>VacuumLimit</varname> are exceeded.
-      </para>
-    </refsect2>
-
-    <refsect2>
-      <title>VacuumLimit</title>
-      <para>Default: 5000</para>
-      <para>
-        During vacuuming, if the number of deleted records are more
-        than <varname>VacuumLimit</varname>, then databases are
-        repacked to avoid fragmentation.
-      </para>
-      <para>
-        Databases are repacked only if both
-        <varname>RepackLimit</varname> and
-        <varname>VacuumLimit</varname> are exceeded.
-      </para>
-    </refsect2>
-
-    <refsect2>
-      <title>VacuumFastPathCount</title>
-      <para>Default: 60</para>
-      <para>
-        When a record is deleted, it is marked for deletion during
-        vacuuming.  Vacuuming process usually processes this list to purge
-        the records from the database.  If the number of records marked
-        for deletion are more than VacuumFastPathCount, then vacuuming
-	process will scan the complete database for empty records instead
-	of using the list of records marked for deletion.
-      </para>
-    </refsect2>
-
-    <refsect2>
-      <title>DeferredAttachTO</title>
-      <para>Default: 120</para>
-      <para>
-	When databases are frozen we do not allow clients to attach to the
-	databases. Instead of returning an error immediately to the application
-	the attach request from the client is deferred until the database
-	becomes available again at which stage we respond to the client.
-      </para>
-      <para>
-	This timeout controls how long we will defer the request from the client
-	before timing it out and returning an error to the client.
-      </para>
-    </refsect2>
-
-    <refsect2>
-      <title>HopcountMakeSticky</title>
-      <para>Default: 50</para>
-      <para>
-	If the database is set to 'STICKY' mode, using the 'ctdb setdbsticky' 
-	command, any record that is seen as very hot and migrating so fast that
-	hopcount surpasses 50 is set to become a STICKY record for StickyDuration
-	seconds. This means that after each migration the record will be kept on
-	the node and prevented from being migrated off the node.
-      </para>
-      <para>
-	This setting allows one to try to identify such records and stop them from
-	migrating across the cluster so fast. This will improve performance for
-	certain workloads, such as locking.tdb if many clients are opening/closing
-	the same file concurrently.
-      </para>
-    </refsect2>
-
-    <refsect2>
-      <title>StickyDuration</title>
-      <para>Default: 600</para>
-      <para>
-	Once a record has been found to be fetch-lock hot and has been flagged to
-	become STICKY, this is for how long, in seconds, the record will be 
-	flagged as a STICKY record.
-      </para>
-    </refsect2>
-
-    <refsect2>
-      <title>StickyPindown</title>
-      <para>Default: 200</para>
-      <para>
-	Once a STICKY record has been migrated onto a node, it will be pinned down
-	on that node for this number of ms. Any request from other nodes to migrate
-	the record off the node will be deferred until the pindown timer expires.
-      </para>
-    </refsect2>
-
-    <refsect2>
-      <title>StatHistoryInterval</title>
-      <para>Default: 1</para>
-      <para>
-	Granularity of the statistics collected in the statistics history.
-      </para>
-    </refsect2>
-
-    <refsect2>
-      <title>AllowClientDBAttach</title>
-      <para>Default: 1</para>
-      <para>
-	When set to 0, clients are not allowed to attach to any databases.
-	This can be used to temporarily block any new processes from attaching
-	to and accessing the databases.
-      </para>
-    </refsect2>
-
-    <refsect2>
-      <title>RecoverPDBBySeqNum</title>
-      <para>Default: 1</para>
-      <para>
-	When set to zero, database recovery for persistent databases
-	is record-by-record and recovery process simply collects the
-	most recent version of every individual record.
-      </para>
-      <para>
-	When set to non-zero, persistent databases will instead be
-	recovered as a whole db and not by individual records. The
-	node that contains the highest value stored in the record
-	"__db_sequence_number__" is selected and the copy of that
-	nodes database is used as the recovered database.
-      </para>
-      <para>
-	By default, recovery of persistent databses is done using
-	__db_sequence_number__ record.
-      </para>
-    </refsect2>
-
-    <refsect2>
-      <title>FetchCollapse</title>
-      <para>Default: 1</para>
-      <para>
-	When many clients across many nodes try to access the same record at the
-	same time this can lead to a fetch storm where the record becomes very
-	active and bounces between nodes very fast. This leads to high CPU
-	utilization of the ctdbd daemon, trying to bounce that record around
-	very fast, and poor performance.
-      </para>
-      <para>
-	This parameter is used to activate a fetch-collapse. A fetch-collapse
-	is when we track which records we have requests in flight so that we only
-	keep one request in flight from a certain node, even if multiple smbd
-	processes are attemtping to fetch the record at the same time. This 
-	can improve performance and reduce CPU utilization for certain
-	workloads.
-      </para>
-      <para>
-	This timeout controls if we should collapse multiple fetch operations
-	of the same record into a single request and defer all duplicates or not.
-      </para>
-    </refsect2>
-
-    <refsect2>
-      <title>Samba3AvoidDeadlocks</title>
+      <title>VerboseMemoryNames</title>
       <para>Default: 0</para>
       <para>
-	Enable code that prevents deadlocks with Samba (only for Samba 3.x).
-      </para>
-      <para>
-	This should be set to 1 when using Samba version 3.x to enable special
-	code in CTDB to avoid deadlock with Samba version 3.x.  This code
-	is not required for Samba version 4.x and must not be enabled for
-	Samba 4.x.
+	This feature consumes additional memory. when used the talloc
+	library will create more verbose names for all talloc allocated
+	objects.
       </para>
     </refsect2>
+
   </refsect1>
 
   <refsect1>