[MEDIUM] add a maintenance mode to servers

This is a first attempt to add a maintenance mode on servers, using
the stat socket (in admin level).

It can be done with the following command :
   - disable server <backend>/<server>
   - enable  server <backend>/<server>

In this mode, no more checks will be performed on the server and it
will be marked as a special DOWN state (MAINT).

If some servers were tracking it, they'll go DOWN until the server
leaves the maintenance mode. The stats page and the CSV export also
display this special state.

This can be used to disable the server in haproxy before doing some
operations on this server itself. This is a good complement to the
"http-check disable-on-404" keyword and works in TCP mode.
This commit is contained in:
Cyril Bont 2010-01-31 22:34:03 +01:00 committed by Willy Tarreau
parent 844a7e76d2
commit cd19e51b05
5 changed files with 195 additions and 29 deletions

View File

@ -7656,6 +7656,28 @@ clear counters all
server. This has the same effect as restarting. This command is restricted
and can only be issued on sockets configured for level "admin".
disable server <backend>/<server>
Mark the server DOWN for maintenance. In this mode, no more checks will be
performed on the server until it leaves maintenance.
If the server is tracked by other servers, those servers will be set to DOWN
during the maintenance.
Both the backend and the server may be specified either by their name or by
their numeric ID, prefixed with a dash ('#').
This command is restricted and can only be issued on sockets configured for
level "admin".
enable server <backend>/<server>
If the server was previously marked as DOWN for maintenance, this marks the
server UP and checks are re-enabled.
Both the backend and the server may be specified either by their name or by
their numeric ID, prefixed with a dash ('#').
This command is restricted and can only be issued on sockets configured for
level "admin".
get weight <backend>/<server>
Report the current weight and the initial weight of server <server> in
backend <backend> or an error if either doesn't exist. The initial weight is

View File

@ -27,6 +27,8 @@
const char *get_check_status_description(short check_status);
const char *get_check_status_info(short check_status);
void set_server_down(struct server *s);
void set_server_up(struct server *s);
struct task *process_chk(struct task *t);
int start_checks();
void health_adjust(struct server *s, short status);

View File

@ -47,7 +47,7 @@
#define SRV_CHECKED 0x0010 /* this server needs to be checked */
#define SRV_GOINGDOWN 0x0020 /* this server says that it's going down (404) */
#define SRV_WARMINGUP 0x0040 /* this server is warming up after a failure */
/* unused: 0x0080 */
#define SRV_MAINTAIN 0x0080 /* this server is in maintenance mode */
#define SRV_TPROXY_ADDR 0x0100 /* bind to this non-local address to reach this server */
#define SRV_TPROXY_CIP 0x0200 /* bind to the client's IP address to reach this server */
#define SRV_TPROXY_CLI 0x0300 /* bind to the client's IP+port to reach this server */

View File

@ -359,12 +359,16 @@ static int check_for_pending(struct server *s)
* possible to other servers. It automatically recomputes the number of
* servers, but not the map.
*/
static void set_server_down(struct server *s)
void set_server_down(struct server *s)
{
struct server *srv;
struct chunk msg;
int xferred;
if (s->state & SRV_MAINTAIN) {
s->health = s->rise;
}
if (s->health == s->rise || s->tracked) {
int srv_was_paused = s->state & SRV_GOINGDOWN;
@ -380,14 +384,19 @@ static void set_server_down(struct server *s)
chunk_init(&msg, trash, sizeof(trash));
chunk_printf(&msg,
"%sServer %s/%s is DOWN", s->state & SRV_BACKUP ? "Backup " : "",
s->proxy->id, s->id);
server_status_printf(&msg, s,
((!s->tracked && !(s->proxy->options2 & PR_O2_LOGHCHKS))?SSP_O_HCHK:0),
xferred);
if (s->state & SRV_MAINTAIN) {
chunk_printf(&msg,
"%sServer %s/%s is DOWN for maintenance", s->state & SRV_BACKUP ? "Backup " : "",
s->proxy->id, s->id);
} else {
chunk_printf(&msg,
"%sServer %s/%s is DOWN", s->state & SRV_BACKUP ? "Backup " : "",
s->proxy->id, s->id);
server_status_printf(&msg, s,
((!s->tracked && !(s->proxy->options2 & PR_O2_LOGHCHKS))?SSP_O_HCHK:0),
xferred);
}
Warning("%s.\n", trash);
/* we don't send an alert if the server was previously paused */
@ -403,18 +412,24 @@ static void set_server_down(struct server *s)
if (s->state & SRV_CHECKED)
for(srv = s->tracknext; srv; srv = srv->tracknext)
set_server_down(srv);
if (! (srv->state & SRV_MAINTAIN))
/* Only notify tracking servers that are not already in maintenance. */
set_server_down(srv);
}
s->health = 0; /* failure */
}
static void set_server_up(struct server *s) {
void set_server_up(struct server *s) {
struct server *srv;
struct chunk msg;
int xferred;
if (s->state & SRV_MAINTAIN) {
s->health = s->rise;
}
if (s->health == s->rise || s->tracked) {
if (s->proxy->srv_bck == 0 && s->proxy->srv_act == 0) {
if (s->proxy->last_change < now.tv_sec) // ignore negative times
@ -448,20 +463,30 @@ static void set_server_up(struct server *s) {
chunk_init(&msg, trash, sizeof(trash));
chunk_printf(&msg,
"%sServer %s/%s is UP", s->state & SRV_BACKUP ? "Backup " : "",
s->proxy->id, s->id);
if (s->state & SRV_MAINTAIN) {
chunk_printf(&msg,
"%sServer %s/%s is UP (leaving maintenance)", s->state & SRV_BACKUP ? "Backup " : "",
s->proxy->id, s->id);
} else {
chunk_printf(&msg,
"%sServer %s/%s is UP", s->state & SRV_BACKUP ? "Backup " : "",
s->proxy->id, s->id);
server_status_printf(&msg, s,
((!s->tracked && !(s->proxy->options2 & PR_O2_LOGHCHKS))?SSP_O_HCHK:0),
xferred);
server_status_printf(&msg, s,
((!s->tracked && !(s->proxy->options2 & PR_O2_LOGHCHKS))?SSP_O_HCHK:0),
xferred);
}
Warning("%s.\n", trash);
send_log(s->proxy, LOG_NOTICE, "%s.\n", trash);
if (s->state & SRV_CHECKED)
for(srv = s->tracknext; srv; srv = srv->tracknext)
set_server_up(srv);
if (! (srv->state & SRV_MAINTAIN))
/* Only notify tracking servers if they're not in maintenance. */
set_server_up(srv);
s->state &= ~SRV_MAINTAIN;
}
if (s->health >= s->rise)
@ -1007,7 +1032,7 @@ struct task *process_chk(struct task *t)
/* we don't send any health-checks when the proxy is stopped or when
* the server should not be checked.
*/
if (!(s->state & SRV_CHECKED) || s->proxy->state == PR_STSTOPPED) {
if (!(s->state & SRV_CHECKED) || s->proxy->state == PR_STSTOPPED || (s->state & SRV_MAINTAIN)) {
while (tick_is_expired(t->expire, now_ms))
t->expire = tick_add(t->expire, MS_TO_TICKS(s->inter));
return t;

View File

@ -65,6 +65,8 @@ const char stats_sock_usage_msg[] =
" get weight : report a server's current weight\n"
" set weight : change a server's weight\n"
" set timeout : change a timeout setting\n"
" disable server : set a server in maintenance mode\n"
" enable server : re-enable a server that was previously in maintenance mode\n"
"";
const char stats_permission_denied_msg[] =
@ -529,7 +531,102 @@ int stats_sock_parse_request(struct stream_interface *si, char *line)
return 0;
}
}
else { /* not "show" nor "clear" nor "get" nor "set" */
else if (strcmp(args[0], "enable") == 0) {
if (strcmp(args[1], "server") == 0) {
struct proxy *px;
struct server *sv;
if (s->listener->perm.ux.level < ACCESS_LVL_ADMIN) {
s->data_ctx.cli.msg = stats_permission_denied_msg;
si->st0 = STAT_CLI_PRINT;
return 1;
}
/* split "backend/server" and make <line> point to server */
for (line = args[2]; *line; line++)
if (*line == '/') {
*line++ = '\0';
break;
}
if (!*line || !*args[2]) {
s->data_ctx.cli.msg = "Require 'backend/server'.\n";
si->st0 = STAT_CLI_PRINT;
return 1;
}
if (!get_backend_server(args[2], line, &px, &sv)) {
s->data_ctx.cli.msg = px ? "No such server.\n" : "No such backend.\n";
si->st0 = STAT_CLI_PRINT;
return 1;
}
if (sv->state & SRV_MAINTAIN) {
/* The server is really in maintenance, we can change the server state */
if (sv->tracked) {
/* If this server tracks the status of another one,
* we must restore the good status.
*/
if (sv->tracked->state & SRV_RUNNING) {
set_server_up(sv);
} else {
sv->state &= ~SRV_MAINTAIN;
set_server_down(sv);
}
} else {
set_server_up(sv);
}
}
return 1;
}
else { /* unknown "enable" parameter */
return 0;
}
}
else if (strcmp(args[0], "disable") == 0) {
if (strcmp(args[1], "server") == 0) {
struct proxy *px;
struct server *sv;
if (s->listener->perm.ux.level < ACCESS_LVL_ADMIN) {
s->data_ctx.cli.msg = stats_permission_denied_msg;
si->st0 = STAT_CLI_PRINT;
return 1;
}
/* split "backend/server" and make <line> point to server */
for (line = args[2]; *line; line++)
if (*line == '/') {
*line++ = '\0';
break;
}
if (!*line || !*args[2]) {
s->data_ctx.cli.msg = "Require 'backend/server'.\n";
si->st0 = STAT_CLI_PRINT;
return 1;
}
if (!get_backend_server(args[2], line, &px, &sv)) {
s->data_ctx.cli.msg = px ? "No such server.\n" : "No such backend.\n";
si->st0 = STAT_CLI_PRINT;
return 1;
}
if (! (sv->state & SRV_MAINTAIN)) {
/* Not already in maintenance, we can change the server state */
sv->state |= SRV_MAINTAIN;
set_server_down(sv);
}
return 1;
}
else { /* unknown "disable" parameter */
return 0;
}
}
else { /* not "show" nor "clear" nor "get" nor "set" nor "enable" nor "disable" */
return 0;
}
return 1;
@ -1015,6 +1112,7 @@ int stats_dump_http(struct session *s, struct buffer *rep, struct uri_auth *uri)
".backup4 {background: #c060ff;}\n" /* NOLB state shows same as going down */
".backup5 {background: #90b0e0;}\n" /* NOLB state shows same as going down */
".backup6 {background: #e0e0e0;}\n"
".maintain {background: #c07820;}\n"
".rls {letter-spacing: 0.2em; margin-right: 1px;}\n" /* right letter spacing (used for grouping digits) */
"\n"
"a.px:link {color: #ffff40; text-decoration: none;}"
@ -1083,6 +1181,8 @@ int stats_dump_http(struct session *s, struct buffer *rep, struct uri_auth *uri)
"</tr><tr>\n"
"<td class=\"active0\"></td><td class=\"noborder\">active or backup DOWN &nbsp;</td>"
"<td class=\"active6\"></td><td class=\"noborder\">not checked </td>"
"</tr><tr>\n"
"<td class=\"maintain\"></td><td class=\"noborder\" colspan=\"3\">active or backup DOWN for maintenance (MAINT) &nbsp;</td>"
"</tr></table>\n"
"Note: UP with load-balancing disabled is reported as \"NOLB\"."
"</td>"
@ -1608,10 +1708,18 @@ int stats_dump_proxy(struct session *s, struct proxy *px, struct uri_auth *uri)
"UP %d/%d &darr;", "UP",
"NOLB %d/%d &darr;", "NOLB",
"<i>no check</i>" };
chunk_printf(&msg,
/* name */
"<tr class=\"%s%d\"><td class=ac",
(sv->state & SRV_BACKUP) ? "backup" : "active", sv_state);
if (sv->state & SRV_MAINTAIN) {
chunk_printf(&msg,
/* name */
"<tr class=\"maintain\"><td class=ac"
);
}
else {
chunk_printf(&msg,
/* name */
"<tr class=\"%s%d\"><td class=ac",
(sv->state & SRV_BACKUP) ? "backup" : "active", sv_state);
}
if (uri->flags&ST_SHLGNDS) {
char str[INET6_ADDRSTRLEN];
@ -1693,7 +1801,12 @@ int stats_dump_proxy(struct session *s, struct proxy *px, struct uri_auth *uri)
/* status, lest check */
chunk_printf(&msg, "<td class=ac>");
if (svs->state & SRV_CHECKED) {
if (sv->state & SRV_MAINTAIN) {
chunk_printf(&msg, "%s ",
human_time(now.tv_sec - sv->last_change, 1));
chunk_printf(&msg, "MAINT");
}
else if (svs->state & SRV_CHECKED) {
chunk_printf(&msg, "%s ",
human_time(now.tv_sec - sv->last_change, 1));
@ -1801,10 +1914,14 @@ int stats_dump_proxy(struct session *s, struct proxy *px, struct uri_auth *uri)
sv->counters.retries, sv->counters.redispatches);
/* status */
chunk_printf(&msg,
srv_hlt_st[sv_state],
(sv->state & SRV_RUNNING) ? (sv->health - sv->rise + 1) : (sv->health),
(sv->state & SRV_RUNNING) ? (sv->fall) : (sv->rise));
if (sv->state & SRV_MAINTAIN) {
chunk_printf(&msg, "MAINT,");
} else {
chunk_printf(&msg,
srv_hlt_st[sv_state],
(svs->state & SRV_RUNNING) ? (svs->health - svs->rise + 1) : (svs->health),
(svs->state & SRV_RUNNING) ? (svs->fall) : (svs->rise));
}
chunk_printf(&msg,
/* weight, active, backup */