Sentinel: Fix Config Dependency and Rewrite Sequence (#8271)

This commit fixes a well known and an annoying issue in Sentinel mode.

Cause of this issue:
Currently, Redis rewrite process works well in server mode, however in sentinel mode,
the sentinel config has variant semantics for different configurations, in example configuration
https://github.com/redis/redis/blob/unstable/sentinel.conf, we put comments on these.
However the rewrite process only treat the sentinel config as a single option. During rewrite
process, it will mess up with the lines and comments.

Approaches:
In order to solve this issue, we need to differentiate different subconfig options in sentinel separately,
for example, sentinel monitor <master-name> <ip> <redis-port> <quorum>
we can treat it as sentinel monitor option, instead of the sentinel option.

This commit also fixes the dependency issue when putting configurations in sentinel.conf.
For example before this commit,we must put
`sentinel monitor <master-name> <ip> <redis-port> <quorum>` before
`sentinel auth-pass <master-name> <password>` for a single master,
otherwise the server cannot start and will return error. This commit fixes this issue, as long as
the monitoring master was configured, no matter the sequence is, the sentinel can start and run properly.
This commit is contained in:
Wen Hui 2021-01-26 02:31:54 -05:00 committed by GitHub
parent 70789cf4bb
commit 1aad55b66f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 278 additions and 29 deletions

View File

@ -624,8 +624,7 @@ void loadServerConfigFromString(char *config) {
err = "sentinel directive while not in sentinel mode";
goto loaderr;
}
err = sentinelHandleConfiguration(argv+1,argc-1);
if (err) goto loaderr;
queueSentinelConfig(argv+1,argc-1,linenum,lines[i]);
}
} else {
err = "Bad directive or wrong number of arguments"; goto loaderr;
@ -1221,7 +1220,16 @@ struct rewriteConfigState *rewriteConfigReadOldFile(char *path) {
sdsfree(argv[0]);
argv[0] = alt;
}
rewriteConfigAddLineNumberToOption(state,argv[0],linenum);
/* If this is sentinel config, we use sentinel "sentinel <config>" as option
to avoid messing up the sequence. */
if (server.sentinel_mode && argc > 1 && !strcasecmp(argv[0],"sentinel")) {
sds sentinelOption = sdsempty();
sentinelOption = sdscatfmt(sentinelOption,"%S %S",argv[0],argv[1]);
rewriteConfigAddLineNumberToOption(state,sentinelOption,linenum);
sdsfree(sentinelOption);
} else {
rewriteConfigAddLineNumberToOption(state,argv[0],linenum);
}
sdsfreesplitres(argv,argc);
}
fclose(fp);

View File

@ -474,6 +474,20 @@ struct redisCommand sentinelcmds[] = {
{"command",commandCommand,-1, "random @connection", 0,NULL,0,0,0,0,0,0}
};
/* this array is used for sentinel config lookup, which need to be loaded
* before monitoring masters config to avoid dependency issues */
const char *preMonitorCfgName[] = {
"announce-ip",
"announce-port",
"deny-scripts-reconfig",
"sentinel-user",
"sentinel-pass",
"current-epoch",
"myid",
"resolve-hostnames",
"announce-hostnames"
};
/* This function overwrites a few normal Redis config default with Sentinel
* specific defaults. */
void initSentinelConfig(void) {
@ -481,6 +495,8 @@ void initSentinelConfig(void) {
server.protected_mode = 0; /* Sentinel must be exposed. */
}
void freeSentinelLoadQueueEntry(void *item);
/* Perform the Sentinel mode initialization. */
void initSentinel(void) {
unsigned int j;
@ -520,6 +536,7 @@ void initSentinel(void) {
sentinel.sentinel_auth_pass = NULL;
sentinel.sentinel_auth_user = NULL;
memset(sentinel.myid,0,sizeof(sentinel.myid));
server.sentinel_config = NULL;
}
/* This function gets called when the server is in Sentinel mode, started,
@ -1669,7 +1686,137 @@ const char *sentinelCheckCreateInstanceErrors(int role) {
}
}
/* init function for server.sentinel_config */
void initializeSentinelConfig() {
server.sentinel_config = zmalloc(sizeof(struct sentinelConfig));
server.sentinel_config->monitor_cfg = listCreate();
server.sentinel_config->pre_monitor_cfg = listCreate();
server.sentinel_config->post_monitor_cfg = listCreate();
listSetFreeMethod(server.sentinel_config->monitor_cfg,freeSentinelLoadQueueEntry);
listSetFreeMethod(server.sentinel_config->pre_monitor_cfg,freeSentinelLoadQueueEntry);
listSetFreeMethod(server.sentinel_config->post_monitor_cfg,freeSentinelLoadQueueEntry);
}
/* destroy function for server.sentinel_config */
void freeSentinelConfig() {
/* release these three config queues since we will not use it anymore */
listRelease(server.sentinel_config->pre_monitor_cfg);
listRelease(server.sentinel_config->monitor_cfg);
listRelease(server.sentinel_config->post_monitor_cfg);
zfree(server.sentinel_config);
server.sentinel_config = NULL;
}
/* Search config name in pre monitor config name array, return 1 if found,
* 0 if not found. */
int searchPreMonitorCfgName(const char *name) {
for (unsigned int i = 0; i < sizeof(preMonitorCfgName)/sizeof(preMonitorCfgName[0]); i++) {
if (!strcasecmp(preMonitorCfgName[i],name)) return 1;
}
return 0;
}
/* free method for sentinelLoadQueueEntry when release the list */
void freeSentinelLoadQueueEntry(void *item) {
struct sentinelLoadQueueEntry *entry = item;
sdsfreesplitres(entry->argv,entry->argc);
sdsfree(entry->line);
zfree(entry);
}
/* This function is used for queuing sentinel configuration, the main
* purpose of this function is to delay parsing the sentinel config option
* in order to avoid the order dependent issue from the config. */
void queueSentinelConfig(sds *argv, int argc, int linenum, sds line) {
int i;
struct sentinelLoadQueueEntry *entry;
/* initialize sentinel_config for the first call */
if (server.sentinel_config == NULL) initializeSentinelConfig();
entry = zmalloc(sizeof(struct sentinelLoadQueueEntry));
entry->argv = zmalloc(sizeof(char*)*argc);
entry->argc = argc;
entry->linenum = linenum;
entry->line = sdsdup(line);
for (i = 0; i < argc; i++) {
entry->argv[i] = sdsdup(argv[i]);
}
/* Separate config lines with pre monitor config, monitor config and
* post monitor config, in order to parsing config dependencies
* correctly. */
if (!strcasecmp(argv[0],"monitor")) {
listAddNodeTail(server.sentinel_config->monitor_cfg,entry);
} else if (searchPreMonitorCfgName(argv[0])) {
listAddNodeTail(server.sentinel_config->pre_monitor_cfg,entry);
} else{
listAddNodeTail(server.sentinel_config->post_monitor_cfg,entry);
}
}
/* This function is used for loading the sentinel configuration from
* pre_monitor_cfg, monitor_cfg and post_monitor_cfg list */
void loadSentinelConfigFromQueue(void) {
const char *err = NULL;
listIter li;
listNode *ln;
int linenum = 0;
sds line = NULL;
/* if there is no sentinel_config entry, we can return immediately */
if (server.sentinel_config == NULL) return;
/* loading from pre monitor config queue first to avoid dependency issues */
listRewind(server.sentinel_config->pre_monitor_cfg,&li);
while((ln = listNext(&li))) {
struct sentinelLoadQueueEntry *entry = ln->value;
err = sentinelHandleConfiguration(entry->argv,entry->argc);
if (err) {
linenum = entry->linenum;
line = entry->line;
goto loaderr;
}
}
/* loading from monitor config queue */
listRewind(server.sentinel_config->monitor_cfg,&li);
while((ln = listNext(&li))) {
struct sentinelLoadQueueEntry *entry = ln->value;
err = sentinelHandleConfiguration(entry->argv,entry->argc);
if (err) {
linenum = entry->linenum;
line = entry->line;
goto loaderr;
}
}
/* loading from the post monitor config queue */
listRewind(server.sentinel_config->post_monitor_cfg,&li);
while((ln = listNext(&li))) {
struct sentinelLoadQueueEntry *entry = ln->value;
err = sentinelHandleConfiguration(entry->argv,entry->argc);
if (err) {
linenum = entry->linenum;
line = entry->line;
goto loaderr;
}
}
/* free sentinel_config when config loading is finished */
freeSentinelConfig();
return;
loaderr:
fprintf(stderr, "\n*** FATAL CONFIG FILE ERROR (Redis %s) ***\n",
REDIS_VERSION);
fprintf(stderr, "Reading the configuration file, at line %d\n", linenum);
fprintf(stderr, ">>> '%s'\n", line);
fprintf(stderr, "%s\n", err);
exit(1);
}
const char *sentinelHandleConfiguration(char **argv, int argc) {
sentinelRedisInstance *ri;
if (!strcasecmp(argv[0],"monitor") && argc == 5) {
@ -1830,12 +1977,12 @@ void rewriteConfigSentinelOption(struct rewriteConfigState *state) {
/* sentinel unique ID. */
line = sdscatprintf(sdsempty(), "sentinel myid %s", sentinel.myid);
rewriteConfigRewriteLine(state,"sentinel",line,1);
rewriteConfigRewriteLine(state,"sentinel myid",line,1);
/* sentinel deny-scripts-reconfig. */
line = sdscatprintf(sdsempty(), "sentinel deny-scripts-reconfig %s",
sentinel.deny_scripts_reconfig ? "yes" : "no");
rewriteConfigRewriteLine(state,"sentinel",line,
rewriteConfigRewriteLine(state,"sentinel deny-scripts-reconfig",line,
sentinel.deny_scripts_reconfig != SENTINEL_DEFAULT_DENY_SCRIPTS_RECONFIG);
/* For every master emit a "sentinel monitor" config entry. */
@ -1850,14 +1997,16 @@ void rewriteConfigSentinelOption(struct rewriteConfigState *state) {
line = sdscatprintf(sdsempty(),"sentinel monitor %s %s %d %d",
master->name, master_addr->ip, master_addr->port,
master->quorum);
rewriteConfigRewriteLine(state,"sentinel",line,1);
rewriteConfigRewriteLine(state,"sentinel monitor",line,1);
/* rewriteConfigMarkAsProcessed is handled after the loop */
/* sentinel down-after-milliseconds */
if (master->down_after_period != SENTINEL_DEFAULT_DOWN_AFTER) {
line = sdscatprintf(sdsempty(),
"sentinel down-after-milliseconds %s %ld",
master->name, (long) master->down_after_period);
rewriteConfigRewriteLine(state,"sentinel",line,1);
rewriteConfigRewriteLine(state,"sentinel down-after-milliseconds",line,1);
/* rewriteConfigMarkAsProcessed is handled after the loop */
}
/* sentinel failover-timeout */
@ -1865,7 +2014,9 @@ void rewriteConfigSentinelOption(struct rewriteConfigState *state) {
line = sdscatprintf(sdsempty(),
"sentinel failover-timeout %s %ld",
master->name, (long) master->failover_timeout);
rewriteConfigRewriteLine(state,"sentinel",line,1);
rewriteConfigRewriteLine(state,"sentinel failover-timeout",line,1);
/* rewriteConfigMarkAsProcessed is handled after the loop */
}
/* sentinel parallel-syncs */
@ -1873,7 +2024,8 @@ void rewriteConfigSentinelOption(struct rewriteConfigState *state) {
line = sdscatprintf(sdsempty(),
"sentinel parallel-syncs %s %d",
master->name, master->parallel_syncs);
rewriteConfigRewriteLine(state,"sentinel",line,1);
rewriteConfigRewriteLine(state,"sentinel parallel-syncs",line,1);
/* rewriteConfigMarkAsProcessed is handled after the loop */
}
/* sentinel notification-script */
@ -1881,7 +2033,8 @@ void rewriteConfigSentinelOption(struct rewriteConfigState *state) {
line = sdscatprintf(sdsempty(),
"sentinel notification-script %s %s",
master->name, master->notification_script);
rewriteConfigRewriteLine(state,"sentinel",line,1);
rewriteConfigRewriteLine(state,"sentinel notification-script",line,1);
/* rewriteConfigMarkAsProcessed is handled after the loop */
}
/* sentinel client-reconfig-script */
@ -1889,7 +2042,8 @@ void rewriteConfigSentinelOption(struct rewriteConfigState *state) {
line = sdscatprintf(sdsempty(),
"sentinel client-reconfig-script %s %s",
master->name, master->client_reconfig_script);
rewriteConfigRewriteLine(state,"sentinel",line,1);
rewriteConfigRewriteLine(state,"sentinel client-reconfig-script",line,1);
/* rewriteConfigMarkAsProcessed is handled after the loop */
}
/* sentinel auth-pass & auth-user */
@ -1897,27 +2051,32 @@ void rewriteConfigSentinelOption(struct rewriteConfigState *state) {
line = sdscatprintf(sdsempty(),
"sentinel auth-pass %s %s",
master->name, master->auth_pass);
rewriteConfigRewriteLine(state,"sentinel",line,1);
rewriteConfigRewriteLine(state,"sentinel auth-pass",line,1);
/* rewriteConfigMarkAsProcessed is handled after the loop */
}
if (master->auth_user) {
line = sdscatprintf(sdsempty(),
"sentinel auth-user %s %s",
master->name, master->auth_user);
rewriteConfigRewriteLine(state,"sentinel",line,1);
rewriteConfigRewriteLine(state,"sentinel auth-user",line,1);
/* rewriteConfigMarkAsProcessed is handled after the loop */
}
/* sentinel config-epoch */
line = sdscatprintf(sdsempty(),
"sentinel config-epoch %s %llu",
master->name, (unsigned long long) master->config_epoch);
rewriteConfigRewriteLine(state,"sentinel",line,1);
rewriteConfigRewriteLine(state,"sentinel config-epoch",line,1);
/* rewriteConfigMarkAsProcessed is handled after the loop */
/* sentinel leader-epoch */
line = sdscatprintf(sdsempty(),
"sentinel leader-epoch %s %llu",
master->name, (unsigned long long) master->leader_epoch);
rewriteConfigRewriteLine(state,"sentinel",line,1);
rewriteConfigRewriteLine(state,"sentinel leader-epoch",line,1);
/* rewriteConfigMarkAsProcessed is handled after the loop */
/* sentinel known-slave */
di2 = dictGetIterator(master->slaves);
@ -1937,7 +2096,8 @@ void rewriteConfigSentinelOption(struct rewriteConfigState *state) {
line = sdscatprintf(sdsempty(),
"sentinel known-replica %s %s %d",
master->name, slave_addr->ip, slave_addr->port);
rewriteConfigRewriteLine(state,"sentinel",line,1);
rewriteConfigRewriteLine(state,"sentinel known-replica",line,1);
/* rewriteConfigMarkAsProcessed is handled after the loop */
}
dictReleaseIterator(di2);
@ -1949,7 +2109,8 @@ void rewriteConfigSentinelOption(struct rewriteConfigState *state) {
line = sdscatprintf(sdsempty(),
"sentinel known-sentinel %s %s %d %s",
master->name, ri->addr->ip, ri->addr->port, ri->runid);
rewriteConfigRewriteLine(state,"sentinel",line,1);
rewriteConfigRewriteLine(state,"sentinel known-sentinel",line,1);
/* rewriteConfigMarkAsProcessed is handled after the loop */
}
dictReleaseIterator(di2);
@ -1961,7 +2122,8 @@ void rewriteConfigSentinelOption(struct rewriteConfigState *state) {
line = sdscatprintf(sdsempty(),
"sentinel rename-command %s %s %s",
master->name, oldname, newname);
rewriteConfigRewriteLine(state,"sentinel",line,1);
rewriteConfigRewriteLine(state,"sentinel rename-command",line,1);
/* rewriteConfigMarkAsProcessed is handled after the loop */
}
dictReleaseIterator(di2);
}
@ -1969,36 +2131,62 @@ void rewriteConfigSentinelOption(struct rewriteConfigState *state) {
/* sentinel current-epoch is a global state valid for all the masters. */
line = sdscatprintf(sdsempty(),
"sentinel current-epoch %llu", (unsigned long long) sentinel.current_epoch);
rewriteConfigRewriteLine(state,"sentinel",line,1);
rewriteConfigRewriteLine(state,"sentinel current-epoch",line,1);
/* sentinel announce-ip. */
if (sentinel.announce_ip) {
line = sdsnew("sentinel announce-ip ");
line = sdscatrepr(line, sentinel.announce_ip, sdslen(sentinel.announce_ip));
rewriteConfigRewriteLine(state,"sentinel",line,1);
rewriteConfigRewriteLine(state,"sentinel announce-ip",line,1);
} else {
rewriteConfigMarkAsProcessed(state,"sentinel announce-ip");
}
/* sentinel announce-port. */
if (sentinel.announce_port) {
line = sdscatprintf(sdsempty(),"sentinel announce-port %d",
sentinel.announce_port);
rewriteConfigRewriteLine(state,"sentinel",line,1);
rewriteConfigRewriteLine(state,"sentinel announce-port",line,1);
} else {
rewriteConfigMarkAsProcessed(state,"sentinel announce-port");
}
/* sentinel sentinel-user. */
if (sentinel.sentinel_auth_user) {
line = sdscatprintf(sdsempty(), "sentinel sentinel-user %s", sentinel.sentinel_auth_user);
rewriteConfigRewriteLine(state,"sentinel",line,1);
rewriteConfigRewriteLine(state,"sentinel sentinel-user",line,1);
} else {
rewriteConfigMarkAsProcessed(state,"sentinel sentinel-user");
}
/* sentinel sentinel-pass. */
if (sentinel.sentinel_auth_pass) {
line = sdscatprintf(sdsempty(), "sentinel sentinel-pass %s", sentinel.sentinel_auth_pass);
rewriteConfigRewriteLine(state,"sentinel",line,1);
rewriteConfigRewriteLine(state,"sentinel sentinel-pass",line,1);
} else {
rewriteConfigMarkAsProcessed(state,"sentinel sentinel-pass");
}
dictReleaseIterator(di);
/* NOTE: the purpose here is in case due to the state change, the config rewrite
does not handle the configs, however, previously the config was set in the config file,
rewriteConfigMarkAsProcessed should be put here to mark it as processed in order to
delete the old config entry.
*/
rewriteConfigMarkAsProcessed(state,"sentinel monitor");
rewriteConfigMarkAsProcessed(state,"sentinel down-after-milliseconds");
rewriteConfigMarkAsProcessed(state,"sentinel failover-timeout");
rewriteConfigMarkAsProcessed(state,"sentinel parallel-syncs");
rewriteConfigMarkAsProcessed(state,"sentinel notification-script");
rewriteConfigMarkAsProcessed(state,"sentinel client-reconfig-script");
rewriteConfigMarkAsProcessed(state,"sentinel auth-pass");
rewriteConfigMarkAsProcessed(state,"sentinel auth-user");
rewriteConfigMarkAsProcessed(state,"sentinel config-epoch");
rewriteConfigMarkAsProcessed(state,"sentinel leader-epoch");
rewriteConfigMarkAsProcessed(state,"sentinel known-replica");
rewriteConfigMarkAsProcessed(state,"sentinel known-sentinel");
rewriteConfigMarkAsProcessed(state,"sentinel rename-command");
}
/* This function uses the config rewriting Redis engine in order to persist

View File

@ -5851,6 +5851,7 @@ int main(int argc, char **argv) {
exit(1);
}
loadServerConfig(server.configfile, config_from_stdin, options);
if (server.sentinel_mode) loadSentinelConfigFromQueue();
sdsfree(options);
}

View File

@ -942,6 +942,19 @@ struct moduleLoadQueueEntry {
robj **argv;
};
struct sentinelLoadQueueEntry {
int argc;
sds *argv;
int linenum;
sds line;
};
struct sentinelConfig {
list *pre_monitor_cfg;
list *monitor_cfg;
list *post_monitor_cfg;
};
struct sharedObjectsStruct {
robj *crlf, *ok, *err, *emptybulk, *czero, *cone, *pong, *space,
*colon, *queued, *null[4], *nullarray[4], *emptymap[4], *emptyset[4],
@ -1556,6 +1569,8 @@ struct redisServer {
char *bio_cpulist; /* cpu affinity list of bio thread. */
char *aof_rewrite_cpulist; /* cpu affinity list of aof rewrite process. */
char *bgsave_cpulist; /* cpu affinity list of bgsave process. */
/* Sentinel config */
struct sentinelConfig *sentinel_config; /* sentinel config to load at startup time. */
};
typedef struct pubsubPattern {
@ -2238,6 +2253,7 @@ void appendServerSaveParams(time_t seconds, int changes);
void resetServerSaveParams(void);
struct rewriteConfigState; /* Forward declaration to export API. */
void rewriteConfigRewriteLine(struct rewriteConfigState *state, const char *option, sds line, int force);
void rewriteConfigMarkAsProcessed(struct rewriteConfigState *state, const char *option);
int rewriteConfig(char *path, int force_all);
void initConfigValues();
@ -2333,6 +2349,8 @@ void initSentinelConfig(void);
void initSentinel(void);
void sentinelTimer(void);
const char *sentinelHandleConfiguration(char **argv, int argc);
void queueSentinelConfig(sds *argv, int argc, int linenum, sds line);
void loadSentinelConfigFromQueue(void);
void sentinelIsRunning(void);
/* redis-check-rdb & aof */

View File

@ -59,10 +59,9 @@ proc exec_instance {type dirname cfgfile} {
}
# Spawn a redis or sentinel instance, depending on 'type'.
proc spawn_instance {type base_port count {conf {}}} {
proc spawn_instance {type base_port count {conf {}} {base_conf_file ""}} {
for {set j 0} {$j < $count} {incr j} {
set port [find_available_port $base_port $::redis_port_count]
# Create a directory for this instance.
set dirname "${type}_${j}"
lappend ::dirs $dirname
@ -71,7 +70,13 @@ proc spawn_instance {type base_port count {conf {}}} {
# Write the instance config file.
set cfgfile [file join $dirname $type.conf]
set cfg [open $cfgfile w]
if {$base_conf_file ne ""} {
file copy -- $base_conf_file $cfgfile
set cfg [open $cfgfile a+]
} else {
set cfg [open $cfgfile w]
}
if {$::tls} {
puts $cfg "tls-port $port"
puts $cfg "tls-replication yes"

View File

@ -10,7 +10,7 @@ set ::tlsdir "../../tls"
proc main {} {
parse_options
spawn_instance sentinel $::sentinel_base_port $::instances_count [list "sentinel deny-scripts-reconfig no"]
spawn_instance sentinel $::sentinel_base_port $::instances_count [list "sentinel deny-scripts-reconfig no"] "../tests/includes/sentinel.conf"
spawn_instance redis $::redis_base_port $::instances_count
run_tests
cleanup

View File

@ -1,5 +1,5 @@
# Check the basic monitoring and failover capabilities.
source "../tests/includes/start-init-tests.tcl"
source "../tests/includes/init-tests.tcl"
if {$::simulate_error} {

View File

@ -0,0 +1,11 @@
# assume master is down after being unresponsive for 20s
sentinel down-after-milliseconds setmaster 20000
# reconfigure one slave at a time
sentinel parallel-syncs setmaster 2
# wait for 4m before assuming failover went wrong
sentinel failover-timeout setmaster 240000
# monitoring set
sentinel monitor setmaster 10.0.0.1 30000 2

View File

@ -0,0 +1,18 @@
test "(start-init) Flush config and compare rewrite config file lines" {
foreach_sentinel_id id {
assert_match "OK" [S $id SENTINEL FLUSHCONFIG]
set file1 ../tests/includes/sentinel.conf
set file2 [file join "sentinel_${id}" "sentinel.conf"]
set fh1 [open $file1 r]
set fh2 [open $file2 r]
while {[gets $fh1 line1]} {
if {[gets $fh2 line2]} {
assert [string equal $line1 $line2]
} else {
fail "sentinel config file rewrite sequence changed"
}
}
close $fh1
close $fh2
}
}