Cluster: configurable replicas migration barrier.
It is possible to configure the min number of additional working slaves a master should be left with, for a slave to migrate to an orphaned master.
This commit is contained in:
parent
3ff1bb4b2e
commit
a7d30681c9
19
redis.conf
19
redis.conf
@ -551,6 +551,25 @@ lua-time-limit 5000
|
|||||||
#
|
#
|
||||||
# cluster-node-timeout 15000
|
# cluster-node-timeout 15000
|
||||||
|
|
||||||
|
# Cluster slaves are able to migrate to orphaned masters, that are masters
|
||||||
|
# that are left without working slaves. This improves the cluster ability
|
||||||
|
# to resist to failures as otherwise an orphaned master can't be failed over
|
||||||
|
# in case of failure if it has no working slaves.
|
||||||
|
#
|
||||||
|
# Slaves migrate to orphaned masters only if there are still at least a
|
||||||
|
# given number of other working slaves for their old master. This number
|
||||||
|
# is the "migration barrer". A migration barrier of 1 means that a slave
|
||||||
|
# will migrate only if there is at least 1 other working slave for its master
|
||||||
|
# and so forth. It usually reflects the number of slaves you want for every
|
||||||
|
# master in your cluster.
|
||||||
|
#
|
||||||
|
# Default is 1 (slaves migrate only if their masters remain with at least
|
||||||
|
# one master). To disable migration just set it to a very large value.
|
||||||
|
# A value of 0 can be set but is useful only for debugging and dangerous
|
||||||
|
# in production.
|
||||||
|
#
|
||||||
|
# cluster-migration-barrier 1
|
||||||
|
|
||||||
# In order to setup your cluster make sure to read the documentation
|
# In order to setup your cluster make sure to read the documentation
|
||||||
# available at http://redis.io web site.
|
# available at http://redis.io web site.
|
||||||
|
|
||||||
|
@ -2141,12 +2141,13 @@ void clusterHandleSlaveMigration(int max_slaves) {
|
|||||||
/* Step 1: Don't migrate if the cluster state is not ok. */
|
/* Step 1: Don't migrate if the cluster state is not ok. */
|
||||||
if (server.cluster->state != REDIS_CLUSTER_OK) return;
|
if (server.cluster->state != REDIS_CLUSTER_OK) return;
|
||||||
|
|
||||||
/* Step 2: Don't migrate if my master has just me as working slave. */
|
/* Step 2: Don't migrate if my master will not be left with at least
|
||||||
|
* 'migration-barrier' slaves after my migration. */
|
||||||
if (mymaster == NULL) return;
|
if (mymaster == NULL) return;
|
||||||
for (j = 0; j < mymaster->numslaves; j++)
|
for (j = 0; j < mymaster->numslaves; j++)
|
||||||
if (!nodeFailed(mymaster->slaves[j]) &&
|
if (!nodeFailed(mymaster->slaves[j]) &&
|
||||||
!nodeTimedOut(mymaster->slaves[j])) okslaves++;
|
!nodeTimedOut(mymaster->slaves[j])) okslaves++;
|
||||||
if (okslaves == 1) return;
|
if (okslaves <= server.cluster_migration_barrier) return;
|
||||||
|
|
||||||
/* Step 3: Idenitfy a candidate for migration, and check if among the
|
/* Step 3: Idenitfy a candidate for migration, and check if among the
|
||||||
* masters with the greatest number of ok slaves, I'm the one with the
|
* masters with the greatest number of ok slaves, I'm the one with the
|
||||||
|
@ -21,6 +21,7 @@
|
|||||||
#define REDIS_CLUSTER_SLAVE_VALIDITY_MULT 10 /* Slave data validity. */
|
#define REDIS_CLUSTER_SLAVE_VALIDITY_MULT 10 /* Slave data validity. */
|
||||||
#define REDIS_CLUSTER_FAILOVER_AUTH_RETRY_MULT 4 /* Auth request retry time. */
|
#define REDIS_CLUSTER_FAILOVER_AUTH_RETRY_MULT 4 /* Auth request retry time. */
|
||||||
#define REDIS_CLUSTER_FAILOVER_DELAY 5 /* Seconds */
|
#define REDIS_CLUSTER_FAILOVER_DELAY 5 /* Seconds */
|
||||||
|
#define REDIS_CLUSTER_DEFAULT_MIGRATION_BARRIER 1
|
||||||
|
|
||||||
struct clusterNode;
|
struct clusterNode;
|
||||||
|
|
||||||
|
14
src/config.c
14
src/config.c
@ -422,6 +422,14 @@ void loadServerConfigFromString(char *config) {
|
|||||||
if (server.cluster_node_timeout <= 0) {
|
if (server.cluster_node_timeout <= 0) {
|
||||||
err = "cluster node timeout must be 1 or greater"; goto loaderr;
|
err = "cluster node timeout must be 1 or greater"; goto loaderr;
|
||||||
}
|
}
|
||||||
|
} else if (!strcasecmp(argv[0],"cluster-migration-barrier")
|
||||||
|
&& argc == 2)
|
||||||
|
{
|
||||||
|
server.cluster_migration_barrier = atoi(argv[1]);
|
||||||
|
if (server.cluster_migration_barrier < 0) {
|
||||||
|
err = "cluster migration barrier must be positive";
|
||||||
|
goto loaderr;
|
||||||
|
}
|
||||||
} else if (!strcasecmp(argv[0],"lua-time-limit") && argc == 2) {
|
} else if (!strcasecmp(argv[0],"lua-time-limit") && argc == 2) {
|
||||||
server.lua_time_limit = strtoll(argv[1],NULL,10);
|
server.lua_time_limit = strtoll(argv[1],NULL,10);
|
||||||
} else if (!strcasecmp(argv[0],"slowlog-log-slower-than") &&
|
} else if (!strcasecmp(argv[0],"slowlog-log-slower-than") &&
|
||||||
@ -875,6 +883,10 @@ void configSetCommand(redisClient *c) {
|
|||||||
if (getLongLongFromObject(o,&ll) == REDIS_ERR ||
|
if (getLongLongFromObject(o,&ll) == REDIS_ERR ||
|
||||||
ll <= 0) goto badfmt;
|
ll <= 0) goto badfmt;
|
||||||
server.cluster_node_timeout = ll;
|
server.cluster_node_timeout = ll;
|
||||||
|
} else if (!strcasecmp(c->argv[2]->ptr,"cluster-migration-barrier")) {
|
||||||
|
if (getLongLongFromObject(o,&ll) == REDIS_ERR ||
|
||||||
|
ll < 0) goto badfmt;
|
||||||
|
server.cluster_migration_barrier = ll;
|
||||||
} else {
|
} else {
|
||||||
addReplyErrorFormat(c,"Unsupported CONFIG parameter: %s",
|
addReplyErrorFormat(c,"Unsupported CONFIG parameter: %s",
|
||||||
(char*)c->argv[2]->ptr);
|
(char*)c->argv[2]->ptr);
|
||||||
@ -975,6 +987,7 @@ void configGetCommand(redisClient *c) {
|
|||||||
config_get_numerical_field("min-slaves-max-lag",server.repl_min_slaves_max_lag);
|
config_get_numerical_field("min-slaves-max-lag",server.repl_min_slaves_max_lag);
|
||||||
config_get_numerical_field("hz",server.hz);
|
config_get_numerical_field("hz",server.hz);
|
||||||
config_get_numerical_field("cluster-node-timeout",server.cluster_node_timeout);
|
config_get_numerical_field("cluster-node-timeout",server.cluster_node_timeout);
|
||||||
|
config_get_numerical_field("cluster-migration-barrier",server.cluster_migration_barrier);
|
||||||
|
|
||||||
/* Bool (yes/no) values */
|
/* Bool (yes/no) values */
|
||||||
config_get_bool_field("no-appendfsync-on-rewrite",
|
config_get_bool_field("no-appendfsync-on-rewrite",
|
||||||
@ -1742,6 +1755,7 @@ int rewriteConfig(char *path) {
|
|||||||
rewriteConfigYesNoOption(state,"cluster-enabled",server.cluster_enabled,0);
|
rewriteConfigYesNoOption(state,"cluster-enabled",server.cluster_enabled,0);
|
||||||
rewriteConfigStringOption(state,"cluster-config-file",server.cluster_configfile,REDIS_DEFAULT_CLUSTER_CONFIG_FILE);
|
rewriteConfigStringOption(state,"cluster-config-file",server.cluster_configfile,REDIS_DEFAULT_CLUSTER_CONFIG_FILE);
|
||||||
rewriteConfigNumericalOption(state,"cluster-node-timeout",server.cluster_node_timeout,REDIS_CLUSTER_DEFAULT_NODE_TIMEOUT);
|
rewriteConfigNumericalOption(state,"cluster-node-timeout",server.cluster_node_timeout,REDIS_CLUSTER_DEFAULT_NODE_TIMEOUT);
|
||||||
|
rewriteConfigNumericalOption(state,"cluster-migration-barrier",server.cluster_migration_barrier,REDIS_CLUSTER_DEFAULT_MIGRATION_BARRIER);
|
||||||
rewriteConfigNumericalOption(state,"slowlog-log-slower-than",server.slowlog_log_slower_than,REDIS_SLOWLOG_LOG_SLOWER_THAN);
|
rewriteConfigNumericalOption(state,"slowlog-log-slower-than",server.slowlog_log_slower_than,REDIS_SLOWLOG_LOG_SLOWER_THAN);
|
||||||
rewriteConfigNumericalOption(state,"slowlog-max-len",server.slowlog_max_len,REDIS_SLOWLOG_MAX_LEN);
|
rewriteConfigNumericalOption(state,"slowlog-max-len",server.slowlog_max_len,REDIS_SLOWLOG_MAX_LEN);
|
||||||
rewriteConfigNotifykeyspaceeventsOption(state);
|
rewriteConfigNotifykeyspaceeventsOption(state);
|
||||||
|
@ -1394,6 +1394,7 @@ void initServerConfig() {
|
|||||||
server.repl_min_slaves_max_lag = REDIS_DEFAULT_MIN_SLAVES_MAX_LAG;
|
server.repl_min_slaves_max_lag = REDIS_DEFAULT_MIN_SLAVES_MAX_LAG;
|
||||||
server.cluster_enabled = 0;
|
server.cluster_enabled = 0;
|
||||||
server.cluster_node_timeout = REDIS_CLUSTER_DEFAULT_NODE_TIMEOUT;
|
server.cluster_node_timeout = REDIS_CLUSTER_DEFAULT_NODE_TIMEOUT;
|
||||||
|
server.cluster_migration_barrier = REDIS_CLUSTER_DEFAULT_MIGRATION_BARRIER;
|
||||||
server.cluster_configfile = zstrdup(REDIS_DEFAULT_CLUSTER_CONFIG_FILE);
|
server.cluster_configfile = zstrdup(REDIS_DEFAULT_CLUSTER_CONFIG_FILE);
|
||||||
server.lua_caller = NULL;
|
server.lua_caller = NULL;
|
||||||
server.lua_time_limit = REDIS_LUA_TIME_LIMIT;
|
server.lua_time_limit = REDIS_LUA_TIME_LIMIT;
|
||||||
|
@ -791,6 +791,7 @@ struct redisServer {
|
|||||||
mstime_t cluster_node_timeout; /* Cluster node timeout. */
|
mstime_t cluster_node_timeout; /* Cluster node timeout. */
|
||||||
char *cluster_configfile; /* Cluster auto-generated config file name. */
|
char *cluster_configfile; /* Cluster auto-generated config file name. */
|
||||||
struct clusterState *cluster; /* State of the cluster */
|
struct clusterState *cluster; /* State of the cluster */
|
||||||
|
int cluster_migration_barrier; /* Cluster replicas migration barrier. */
|
||||||
/* Scripting */
|
/* Scripting */
|
||||||
lua_State *lua; /* The Lua interpreter. We use just one for all clients */
|
lua_State *lua; /* The Lua interpreter. We use just one for all clients */
|
||||||
redisClient *lua_client; /* The "fake client" to query Redis from Lua */
|
redisClient *lua_client; /* The "fake client" to query Redis from Lua */
|
||||||
|
Loading…
Reference in New Issue
Block a user