MEDIUM: Add external check
Add an external check which makes use of an external process to check the status of a server.
This commit is contained in:
parent
ccaabcdfca
commit
98637e5bff
@ -448,6 +448,7 @@ The following keywords are supported in the "global" section :
|
||||
- chroot
|
||||
- crt-base
|
||||
- daemon
|
||||
- external-check
|
||||
- gid
|
||||
- group
|
||||
- log
|
||||
@ -547,6 +548,11 @@ daemon
|
||||
operation. It is equivalent to the command line "-D" argument. It can be
|
||||
disabled by the command line "-db" argument.
|
||||
|
||||
external-check
|
||||
Allows the use of an external agent to perform health checks.
|
||||
This is disabled by default as a security precaution.
|
||||
See "option external-check".
|
||||
|
||||
gid <number>
|
||||
Changes the process' group ID to <number>. It is recommended that the group
|
||||
ID is dedicated to HAProxy or to a small set of similar daemons. HAProxy must
|
||||
@ -1327,6 +1333,7 @@ option httplog X X X X
|
||||
option http_proxy (*) X X X X
|
||||
option independent-streams (*) X X X X
|
||||
option ldap-check X - X X
|
||||
option external-check X - X X
|
||||
option log-health-checks (*) X - X X
|
||||
option log-separate-errors (*) X X X -
|
||||
option logasap (*) X X X -
|
||||
@ -1351,6 +1358,8 @@ option tcp-smart-connect (*) X - X X
|
||||
option tcpka X X X X
|
||||
option tcplog X X X X
|
||||
option transparent (*) X - X X
|
||||
external-check command X - X X
|
||||
external-check path X - X X
|
||||
persist rdp-cookie X - X X
|
||||
rate-limit sessions X X X -
|
||||
redirect - X X X
|
||||
@ -4477,6 +4486,20 @@ option ldap-check
|
||||
See also : "option httpchk"
|
||||
|
||||
|
||||
option external-check
|
||||
Use external processes for server health checks
|
||||
May be used in sections : defaults | frontend | listen | backend
|
||||
yes | no | yes | yes
|
||||
|
||||
It is possible to test the health of a server using an external command.
|
||||
This is achieved by running the executable set using "external-check
|
||||
command".
|
||||
|
||||
Requires the "external-check" global to be set.
|
||||
|
||||
See also : "external-check", "external-check command", "external-check path"
|
||||
|
||||
|
||||
option log-health-checks
|
||||
no option log-health-checks
|
||||
Enable or disable logging of health checks status updates
|
||||
@ -5213,6 +5236,54 @@ no option transparent
|
||||
"transparent" option of the "bind" keyword.
|
||||
|
||||
|
||||
external-check command <command>
|
||||
Executable to run when performing an external-check
|
||||
May be used in sections : defaults | frontend | listen | backend
|
||||
yes | no | yes | yes
|
||||
|
||||
Arguments :
|
||||
<command> is the external command to run
|
||||
|
||||
The PATH environment variable used when executing the
|
||||
command may be set using "external-check path".
|
||||
|
||||
The arguments passed to the to the command are:
|
||||
|
||||
proxy_address proxy_port server_address server_port
|
||||
|
||||
The proxy_address and proxy_port are derived from the first listener
|
||||
that is either IPv4, IPv6 or a UNIX socket. It is an error for no such
|
||||
listeners to exist. In the case of a UNIX socket listener the
|
||||
proxy_address will be the path of the socket and the proxy_port will
|
||||
be the string "NOT_USED".
|
||||
|
||||
If the command executed and exits with a zero status then the check is
|
||||
considered to have passed, otherwise the check is considered to have
|
||||
failed.
|
||||
|
||||
Example :
|
||||
external-check command /bin/true
|
||||
|
||||
See also : "external-check", "option external-check", "external-check path"
|
||||
|
||||
|
||||
external-check path <path>
|
||||
The value of the PATH environment variable used when running an external-check
|
||||
May be used in sections : defaults | frontend | listen | backend
|
||||
yes | no | yes | yes
|
||||
|
||||
Arguments :
|
||||
<path> is the path used when executing external command to run
|
||||
|
||||
The default path is "".
|
||||
|
||||
Example :
|
||||
external-check path "/usr/bin:/bin"
|
||||
|
||||
See also : "external-check", "option external-check",
|
||||
"external-check command"
|
||||
|
||||
|
||||
persist rdp-cookie
|
||||
persist rdp-cookie(<name>)
|
||||
Enable RDP cookie-based persistence
|
||||
|
@ -128,6 +128,7 @@
|
||||
#define DEF_AGENT_FALLTIME 1
|
||||
#define DEF_AGENT_RISETIME 1
|
||||
#define DEF_CHECK_REQ "OPTIONS / HTTP/1.0\r\n"
|
||||
#define DEF_CHECK_PATH ""
|
||||
#define DEF_SMTP_CHECK_REQ "HELO localhost\r\n"
|
||||
#define DEF_LDAP_CHECK_REQ "\x30\x0c\x02\x01\x01\x60\x07\x02\x01\x03\x04\x00\x80\x00"
|
||||
#define DEF_REDIS_CHECK_REQ "*1\r\n$4\r\nPING\r\n"
|
||||
|
@ -73,6 +73,10 @@ enum {
|
||||
HCHK_STATUS_L7OKCD, /* L7 check conditionally passed */
|
||||
HCHK_STATUS_L7STS, /* L7 response error, for example HTTP 5xx */
|
||||
|
||||
HCHK_STATUS_PROCERR, /* External process check failure */
|
||||
HCHK_STATUS_PROCTOUT, /* External process check timeout */
|
||||
HCHK_STATUS_PROCOK, /* External process check passed */
|
||||
|
||||
HCHK_STATUS_SIZE
|
||||
};
|
||||
|
||||
@ -145,6 +149,9 @@ struct check {
|
||||
int rise, fall; /* time in iterations */
|
||||
int type; /* Check type, one of PR_O2_*_CHK */
|
||||
struct server *server; /* back-pointer to server */
|
||||
char **argv; /* the arguments to use if running a process-based check */
|
||||
char **envp; /* the environment to use if running a process-based check */
|
||||
struct pid_list *curpid; /* entry in pid_list used for current process-based test, or -1 if not in test */
|
||||
};
|
||||
|
||||
struct check_status {
|
||||
|
@ -79,6 +79,7 @@ struct global {
|
||||
#endif
|
||||
int uid;
|
||||
int gid;
|
||||
int external_check;
|
||||
int nbproc;
|
||||
int maxconn, hardmaxconn;
|
||||
#ifdef USE_OPENSSL
|
||||
|
@ -164,7 +164,8 @@ enum pr_mode {
|
||||
#define PR_O2_SSL3_CHK 0x70000000 /* use SSLv3 CLIENT_HELLO packets for server health */
|
||||
#define PR_O2_LB_AGENT_CHK 0x80000000 /* use a TCP connection to obtain a metric of server health */
|
||||
#define PR_O2_TCPCHK_CHK 0x90000000 /* use TCPCHK check for server health */
|
||||
/* unused: 0xA0000000 to 0xF000000, reserved for health checks */
|
||||
#define PR_O2_EXT_CHK 0xA0000000 /* use external command for server health */
|
||||
/* unused: 0xB0000000 to 0xF000000, reserved for health checks */
|
||||
#define PR_O2_CHK_ANY 0xF0000000 /* Mask to cover any check */
|
||||
/* end of proxy->options2 */
|
||||
|
||||
@ -342,6 +343,8 @@ struct proxy {
|
||||
struct list tcpcheck_rules; /* tcp-check send / expect rules */
|
||||
char *check_req; /* HTTP or SSL request to use for PR_O_HTTP_CHK|PR_O_SSL3_CHK */
|
||||
int check_len; /* Length of the HTTP or SSL3 request */
|
||||
char *check_command; /* Command to use for external agent checks */
|
||||
char *check_path; /* PATH environment to use for external agent checks */
|
||||
char *expect_str; /* http-check expected content : string or text version of the regex */
|
||||
struct my_regex *expect_regex; /* http-check expected content */
|
||||
struct chunk errmsg[HTTP_ERR_SIZE]; /* default or customized error messages for known errors */
|
||||
|
@ -123,6 +123,14 @@ enum srv_admin {
|
||||
#define SRV_SSL_O_NO_TLS_TICKETS 0x0100 /* disable session resumption tickets */
|
||||
#endif
|
||||
|
||||
struct pid_list {
|
||||
struct list list;
|
||||
pid_t pid;
|
||||
struct task *t;
|
||||
int status;
|
||||
int exited;
|
||||
};
|
||||
|
||||
/* A tree occurrence is a descriptor of a place in a tree, with a pointer back
|
||||
* to the server itself.
|
||||
*/
|
||||
|
@ -856,6 +856,9 @@ int cfg_parse_global(const char *file, int linenum, char **args, int kwm)
|
||||
}
|
||||
global.gid = atol(args[1]);
|
||||
}
|
||||
else if (!strcmp(args[0], "external-check")) {
|
||||
global.external_check = 1;
|
||||
}
|
||||
/* user/group name handling */
|
||||
else if (!strcmp(args[0], "user")) {
|
||||
struct passwd *ha_user;
|
||||
@ -2102,6 +2105,11 @@ int cfg_parse_listen(const char *file, int linenum, char **args, int kwm)
|
||||
curproxy->conf.used_listener_id = EB_ROOT;
|
||||
curproxy->conf.used_server_id = EB_ROOT;
|
||||
|
||||
if (defproxy.check_path)
|
||||
curproxy->check_path = strdup(defproxy.check_path);
|
||||
if (defproxy.check_command)
|
||||
curproxy->check_command = strdup(defproxy.check_command);
|
||||
|
||||
goto out;
|
||||
}
|
||||
else if (!strcmp(args[0], "defaults")) { /* use this one to assign default values */
|
||||
@ -2110,6 +2118,8 @@ int cfg_parse_listen(const char *file, int linenum, char **args, int kwm)
|
||||
* config parsing to free all default values.
|
||||
*/
|
||||
free(defproxy.check_req);
|
||||
free(defproxy.check_command);
|
||||
free(defproxy.check_path);
|
||||
free(defproxy.cookie_name);
|
||||
free(defproxy.rdp_cookie_name);
|
||||
free(defproxy.cookie_domain);
|
||||
@ -2617,6 +2627,41 @@ int cfg_parse_listen(const char *file, int linenum, char **args, int kwm)
|
||||
err_code |= ERR_ALERT | ERR_FATAL;
|
||||
}
|
||||
}/* end else if (!strcmp(args[0], "cookie")) */
|
||||
else if (!strcmp(args[0], "external-check")) {
|
||||
if (*(args[1]) == 0) {
|
||||
Alert("parsing [%s:%d] : missing argument after '%s'.\n",
|
||||
file, linenum, args[0]);
|
||||
err_code |= ERR_ALERT | ERR_FATAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!strcmp(args[1], "command")) {
|
||||
if (*(args[1]) == 0) {
|
||||
Alert("parsing [%s:%d] : missing argument after '%s'.\n",
|
||||
file, linenum, args[1]);
|
||||
err_code |= ERR_ALERT | ERR_FATAL;
|
||||
goto out;
|
||||
}
|
||||
free(curproxy->check_command);
|
||||
curproxy->check_command = strdup(args[2]);
|
||||
}
|
||||
else if (!strcmp(args[1], "path")) {
|
||||
if (*(args[1]) == 0) {
|
||||
Alert("parsing [%s:%d] : missing argument after '%s'.\n",
|
||||
file, linenum, args[1]);
|
||||
err_code |= ERR_ALERT | ERR_FATAL;
|
||||
goto out;
|
||||
}
|
||||
free(curproxy->check_path);
|
||||
curproxy->check_path = strdup(args[2]);
|
||||
}
|
||||
else {
|
||||
Alert("parsing [%s:%d] : external-check: unknown argument '%s'.\n",
|
||||
file, linenum, args[1]);
|
||||
err_code |= ERR_ALERT | ERR_FATAL;
|
||||
goto out;
|
||||
}
|
||||
}/* end else if (!strcmp(args[0], "external-check")) */
|
||||
else if (!strcmp(args[0], "persist")) { /* persist */
|
||||
if (*(args[1]) == 0) {
|
||||
Alert("parsing [%s:%d] : missing persist method.\n",
|
||||
@ -4024,6 +4069,13 @@ stats_error_parsing:
|
||||
curproxy->options2 &= ~PR_O2_CHK_ANY;
|
||||
curproxy->options2 |= PR_O2_TCPCHK_CHK;
|
||||
}
|
||||
else if (!strcmp(args[1], "external-check")) {
|
||||
/* excute an external command to check servers' health */
|
||||
free(curproxy->check_req);
|
||||
curproxy->check_req = NULL;
|
||||
curproxy->options2 &= ~PR_O2_CHK_ANY;
|
||||
curproxy->options2 |= PR_O2_EXT_CHK;
|
||||
}
|
||||
else if (!strcmp(args[1], "forwardfor")) {
|
||||
int cur_arg;
|
||||
|
||||
@ -6081,6 +6133,48 @@ int check_config_validity()
|
||||
}
|
||||
}
|
||||
|
||||
if ((curproxy->options2 & PR_O2_CHK_ANY) == PR_O2_EXT_CHK) {
|
||||
if (!global.external_check) {
|
||||
Alert("Proxy '%s' : '%s' unable to find required 'global.external-check'.\n",
|
||||
curproxy->id, "option external-check");
|
||||
cfgerr++;
|
||||
}
|
||||
if (!curproxy->check_command) {
|
||||
Alert("Proxy '%s' : '%s' unable to find required 'external-check command'.\n",
|
||||
curproxy->id, "option external-check");
|
||||
cfgerr++;
|
||||
}
|
||||
}
|
||||
|
||||
if (curproxy->check_command) {
|
||||
int clear = 0;
|
||||
if ((curproxy->options2 & PR_O2_CHK_ANY) != PR_O2_EXT_CHK) {
|
||||
Warning("config : '%s' will be ignored for %s '%s' (requires 'option external-check').\n",
|
||||
"external-check command", proxy_type_str(curproxy), curproxy->id);
|
||||
err_code |= ERR_WARN;
|
||||
clear = 1;
|
||||
}
|
||||
if (curproxy->check_command[0] != '/' && !curproxy->check_path) {
|
||||
Alert("Proxy '%s': '%s' does not have a leading '/' and 'external-command path' is not set.\n",
|
||||
curproxy->id, "external-check command");
|
||||
cfgerr++;
|
||||
}
|
||||
if (clear) {
|
||||
free(curproxy->check_command);
|
||||
curproxy->check_command = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
if (curproxy->check_path) {
|
||||
if ((curproxy->options2 & PR_O2_CHK_ANY) != PR_O2_EXT_CHK) {
|
||||
Warning("config : '%s' will be ignored for %s '%s' (requires 'option external-check').\n",
|
||||
"external-check path", proxy_type_str(curproxy), curproxy->id);
|
||||
err_code |= ERR_WARN;
|
||||
free(curproxy->check_path);
|
||||
curproxy->check_path = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/* if a default backend was specified, let's find it */
|
||||
if (curproxy->defbe.name) {
|
||||
struct proxy *target;
|
||||
|
458
src/checks.c
458
src/checks.c
@ -22,6 +22,7 @@
|
||||
#include <unistd.h>
|
||||
#include <sys/socket.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/wait.h>
|
||||
#include <netinet/in.h>
|
||||
#include <netinet/tcp.h>
|
||||
#include <arpa/inet.h>
|
||||
@ -88,6 +89,10 @@ static const struct check_status check_statuses[HCHK_STATUS_SIZE] = {
|
||||
[HCHK_STATUS_L7OKD] = { CHK_RES_PASSED, "L7OK", "Layer7 check passed" },
|
||||
[HCHK_STATUS_L7OKCD] = { CHK_RES_CONDPASS, "L7OKC", "Layer7 check conditionally passed" },
|
||||
[HCHK_STATUS_L7STS] = { CHK_RES_FAILED, "L7STS", "Layer7 wrong status" },
|
||||
|
||||
[HCHK_STATUS_PROCERR] = { CHK_RES_FAILED, "PROCERR", "External check error" },
|
||||
[HCHK_STATUS_PROCTOUT] = { CHK_RES_FAILED, "PROCTOUT", "External check timeout" },
|
||||
[HCHK_STATUS_PROCOK] = { CHK_RES_FAILED, "PROCOK", "External check passed" },
|
||||
};
|
||||
|
||||
static const struct analyze_status analyze_statuses[HANA_STATUS_SIZE] = { /* 0: ignore, 1: error, 2: OK */
|
||||
@ -1351,7 +1356,7 @@ static struct task *server_warmup(struct task *t)
|
||||
}
|
||||
|
||||
/*
|
||||
* establish a server health-check.
|
||||
* establish a server health-check that makes use of a connection.
|
||||
*
|
||||
* It can return one of :
|
||||
* - SN_ERR_NONE if everything's OK and tcpcheck_main() was not called
|
||||
@ -1365,7 +1370,7 @@ static struct task *server_warmup(struct task *t)
|
||||
* Note that we try to prevent the network stack from sending the ACK during the
|
||||
* connect() when a pure TCP check is used (without PROXY protocol).
|
||||
*/
|
||||
static int connect_chk(struct task *t)
|
||||
static int connect_conn_chk(struct task *t)
|
||||
{
|
||||
struct check *check = t->context;
|
||||
struct server *s = check->server;
|
||||
@ -1448,11 +1453,436 @@ static int connect_chk(struct task *t)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct list pid_list = LIST_HEAD_INIT(pid_list);
|
||||
static struct pool_head *pool2_pid_list;
|
||||
|
||||
void block_sigchld(void)
|
||||
{
|
||||
sigset_t set;
|
||||
sigemptyset(&set);
|
||||
sigaddset(&set, SIGCHLD);
|
||||
assert(sigprocmask(SIG_SETMASK, &set, NULL) == 0);
|
||||
}
|
||||
|
||||
void unblock_sigchld(void)
|
||||
{
|
||||
sigset_t set;
|
||||
sigemptyset(&set);
|
||||
assert(sigprocmask(SIG_SETMASK, &set, NULL) == 0);
|
||||
}
|
||||
|
||||
/* Call with SIGCHLD blocked */
|
||||
static struct pid_list *pid_list_add(pid_t pid, struct task *t)
|
||||
{
|
||||
struct pid_list *elem;
|
||||
struct check *check = t->context;
|
||||
|
||||
elem = pool_alloc2(pool2_pid_list);
|
||||
if (!elem)
|
||||
return NULL;
|
||||
elem->pid = pid;
|
||||
elem->t = t;
|
||||
elem->exited = 0;
|
||||
check->curpid = elem;
|
||||
LIST_INIT(&elem->list);
|
||||
LIST_ADD(&pid_list, &elem->list);
|
||||
return elem;
|
||||
}
|
||||
|
||||
/* Blocks blocks and then unblocks SIGCHLD */
|
||||
static void pid_list_del(struct pid_list *elem)
|
||||
{
|
||||
struct check *check;
|
||||
|
||||
if (!elem)
|
||||
return;
|
||||
|
||||
block_sigchld();
|
||||
LIST_DEL(&elem->list);
|
||||
unblock_sigchld();
|
||||
if (!elem->exited)
|
||||
kill(elem->pid, SIGTERM);
|
||||
|
||||
check = elem->t->context;
|
||||
check->curpid = NULL;
|
||||
pool_free2(pool2_pid_list, elem);
|
||||
}
|
||||
|
||||
/* Called from inside SIGCHLD handler, SIGCHLD is blocked */
|
||||
static void pid_list_expire(pid_t pid, int status)
|
||||
{
|
||||
struct pid_list *elem;
|
||||
|
||||
list_for_each_entry(elem, &pid_list, list) {
|
||||
if (elem->pid == pid) {
|
||||
elem->t->expire = now_ms;
|
||||
elem->status = status;
|
||||
elem->exited = 1;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void sigchld_handler(int signal)
|
||||
{
|
||||
pid_t pid;
|
||||
int status;
|
||||
while ((pid = waitpid(0, &status, WNOHANG)) > 0)
|
||||
pid_list_expire(pid, status);
|
||||
}
|
||||
|
||||
static int init_pid_list(void) {
|
||||
struct sigaction action = {
|
||||
.sa_handler = sigchld_handler,
|
||||
.sa_flags = SA_NOCLDSTOP
|
||||
};
|
||||
|
||||
if (pool2_pid_list != NULL)
|
||||
/* Nothing to do */
|
||||
return 0;
|
||||
|
||||
if (sigaction(SIGCHLD, &action, NULL)) {
|
||||
Alert("Failed to set signal handler for external health checks: %s. Aborting.\n",
|
||||
strerror(errno));
|
||||
return 1;
|
||||
}
|
||||
|
||||
pool2_pid_list = create_pool("pid_list", sizeof(struct pid_list), MEM_F_SHARED);
|
||||
if (pool2_pid_list == NULL) {
|
||||
Alert("Failed to allocate memory pool for external health checks: %s. Aborting.\n",
|
||||
strerror(errno));
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int prepare_external_check(struct check *check)
|
||||
{
|
||||
struct server *s = check->server;
|
||||
struct proxy *px = s->proxy;
|
||||
struct listener *listener = NULL, *l;
|
||||
int i;
|
||||
const char *err_fmt = "Starting [%s:%s] check: out of memory.\n";
|
||||
const char *path = px->check_path ? px->check_path : DEF_CHECK_PATH;
|
||||
char host[46];
|
||||
char serv[6];
|
||||
|
||||
list_for_each_entry(l, &px->conf.listeners, by_fe)
|
||||
/* Use the first INET, INET6 or UNIX listener */
|
||||
if (l->addr.ss_family == AF_INET ||
|
||||
l->addr.ss_family == AF_INET6 ||
|
||||
l->addr.ss_family == AF_UNIX) {
|
||||
listener = l;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!listener) {
|
||||
err_fmt = "Starting [%s:%s] check: no listener.\n";
|
||||
goto err;
|
||||
}
|
||||
|
||||
check->curpid = NULL;
|
||||
|
||||
check->envp = calloc(2, sizeof(check->argv));
|
||||
if (!check->envp)
|
||||
goto err;
|
||||
check->envp[0] = malloc(strlen("PATH=") + strlen(path) + 1);
|
||||
if (!check->envp[0])
|
||||
goto err;
|
||||
strcpy(check->envp[0], "PATH=");
|
||||
strcpy(check->envp[0] + strlen(check->envp[0]), path);
|
||||
check->envp[1] = NULL;
|
||||
|
||||
check->argv = calloc(6, sizeof(check->argv));
|
||||
if (!check->argv)
|
||||
goto err;
|
||||
|
||||
check->argv[0] = px->check_command;
|
||||
|
||||
if (listener->addr.ss_family == AF_INET ||
|
||||
listener->addr.ss_family == AF_INET6) {
|
||||
addr_to_str(&listener->addr, host, sizeof(host));
|
||||
check->argv[1] = strdup(host);
|
||||
port_to_str(&listener->addr, serv, sizeof(serv));
|
||||
check->argv[2] = strdup(serv);
|
||||
} else if (listener->addr.ss_family == AF_UNIX) {
|
||||
const struct sockaddr_un *un;
|
||||
|
||||
un = (struct sockaddr_un *)&listener->addr;
|
||||
check->argv[1] = strdup(un->sun_path);
|
||||
check->argv[2] = strdup("NOT_USED");
|
||||
} else {
|
||||
goto err;
|
||||
}
|
||||
|
||||
addr_to_str(&s->addr, host, sizeof(host));
|
||||
check->argv[3] = strdup(host);
|
||||
port_to_str(&s->addr, serv, sizeof(serv));
|
||||
check->argv[4] = strdup(serv);
|
||||
|
||||
for (i = 0; i < 5; i++)
|
||||
if (!check->argv[i])
|
||||
goto err;
|
||||
|
||||
return 0;
|
||||
err:
|
||||
if (check->envp) {
|
||||
free(check->envp[1]);
|
||||
free(check->envp);
|
||||
check->envp = NULL;
|
||||
}
|
||||
|
||||
if (check->argv) {
|
||||
for (i = 1; i < 5; i++)
|
||||
free(check->argv[i]);
|
||||
free(check->argv);
|
||||
check->argv = NULL;
|
||||
}
|
||||
Alert(err_fmt, px->id, s->id);
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* manages a server health-check. Returns
|
||||
* establish a server health-check that makes use of a process.
|
||||
*
|
||||
* It can return one of :
|
||||
* - SN_ERR_NONE if everything's OK
|
||||
* - SN_ERR_SRVTO if there are no more servers
|
||||
* - SN_ERR_SRVCL if the connection was refused by the server
|
||||
* - SN_ERR_PRXCOND if the connection has been limited by the proxy (maxconn)
|
||||
* - SN_ERR_RESOURCE if a system resource is lacking (eg: fd limits, ports, ...)
|
||||
* - SN_ERR_INTERNAL for any other purely internal errors
|
||||
* Additionnally, in the case of SN_ERR_RESOURCE, an emergency log will be emitted.
|
||||
*
|
||||
* Blocks and then unblocks SIGCHLD
|
||||
*/
|
||||
static int connect_proc_chk(struct task *t)
|
||||
{
|
||||
struct check *check = t->context;
|
||||
struct server *s = check->server;
|
||||
struct proxy *px = s->proxy;
|
||||
int status;
|
||||
pid_t pid;
|
||||
|
||||
if (!check->argv) {
|
||||
status = prepare_external_check(check);
|
||||
if (status < 0)
|
||||
return SN_ERR_RESOURCE;
|
||||
}
|
||||
|
||||
status = SN_ERR_RESOURCE;
|
||||
|
||||
block_sigchld();
|
||||
|
||||
pid = fork();
|
||||
if (pid < 0) {
|
||||
Alert("Failed to fork process for external health check: %s. Aborting.\n",
|
||||
strerror(errno));
|
||||
set_server_check_status(check, HCHK_STATUS_SOCKERR, strerror(errno));
|
||||
goto out;
|
||||
}
|
||||
if (pid == 0) {
|
||||
/* Child */
|
||||
extern char **environ;
|
||||
environ = check->envp;
|
||||
execvp(px->check_command, check->argv);
|
||||
Alert("Failed to exec process for external health check: %s. Aborting.\n",
|
||||
strerror(errno));
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
/* Parent */
|
||||
if (check->result == CHK_RES_UNKNOWN) {
|
||||
if (pid_list_add(pid, t) != NULL) {
|
||||
t->expire = tick_add(now_ms, MS_TO_TICKS(check->inter));
|
||||
|
||||
if (px->timeout.check && px->timeout.connect) {
|
||||
int t_con = tick_add(now_ms, px->timeout.connect);
|
||||
t->expire = tick_first(t->expire, t_con);
|
||||
}
|
||||
status = SN_ERR_NONE;
|
||||
goto out;
|
||||
}
|
||||
else {
|
||||
set_server_check_status(check, HCHK_STATUS_SOCKERR, strerror(errno));
|
||||
}
|
||||
kill(pid, SIGTERM); /* process creation error */
|
||||
}
|
||||
else
|
||||
set_server_check_status(check, HCHK_STATUS_SOCKERR, strerror(errno));
|
||||
|
||||
out:
|
||||
unblock_sigchld();
|
||||
return status;
|
||||
}
|
||||
|
||||
/*
|
||||
* establish a server health-check.
|
||||
*
|
||||
* It can return one of :
|
||||
* - SN_ERR_NONE if everything's OK
|
||||
* - SN_ERR_SRVTO if there are no more servers
|
||||
* - SN_ERR_SRVCL if the connection was refused by the server
|
||||
* - SN_ERR_PRXCOND if the connection has been limited by the proxy (maxconn)
|
||||
* - SN_ERR_RESOURCE if a system resource is lacking (eg: fd limits, ports, ...)
|
||||
* - SN_ERR_INTERNAL for any other purely internal errors
|
||||
* Additionnally, in the case of SN_ERR_RESOURCE, an emergency log will be emitted.
|
||||
*/
|
||||
static int connect_chk(struct task *t)
|
||||
{
|
||||
struct check *check = t->context;
|
||||
|
||||
if (check->type == PR_O2_EXT_CHK)
|
||||
return connect_proc_chk(t);
|
||||
return connect_conn_chk(t);
|
||||
}
|
||||
|
||||
/*
|
||||
* manages a server health-check that uses a process. Returns
|
||||
* the time the task accepts to wait, or TIME_ETERNITY for infinity.
|
||||
*/
|
||||
static struct task *process_chk(struct task *t)
|
||||
static struct task *process_chk_proc(struct task *t)
|
||||
{
|
||||
struct check *check = t->context;
|
||||
struct server *s = check->server;
|
||||
struct connection *conn = check->conn;
|
||||
int rv;
|
||||
int ret;
|
||||
int expired = tick_is_expired(t->expire, now_ms);
|
||||
|
||||
if (!(check->state & CHK_ST_INPROGRESS)) {
|
||||
/* no check currently running */
|
||||
if (!expired) /* woke up too early */
|
||||
return t;
|
||||
|
||||
/* we don't send any health-checks when the proxy is
|
||||
* stopped, the server should not be checked or the check
|
||||
* is disabled.
|
||||
*/
|
||||
if (((check->state & (CHK_ST_ENABLED | CHK_ST_PAUSED)) != CHK_ST_ENABLED) ||
|
||||
s->proxy->state == PR_STSTOPPED)
|
||||
goto reschedule;
|
||||
|
||||
/* we'll initiate a new check */
|
||||
set_server_check_status(check, HCHK_STATUS_START, NULL);
|
||||
|
||||
check->state |= CHK_ST_INPROGRESS;
|
||||
|
||||
ret = connect_chk(t);
|
||||
switch (ret) {
|
||||
case SN_ERR_UP:
|
||||
return t;
|
||||
case SN_ERR_NONE:
|
||||
/* we allow up to min(inter, timeout.connect) for a connection
|
||||
* to establish but only when timeout.check is set
|
||||
* as it may be to short for a full check otherwise
|
||||
*/
|
||||
t->expire = tick_add(now_ms, MS_TO_TICKS(check->inter));
|
||||
|
||||
if (s->proxy->timeout.check && s->proxy->timeout.connect) {
|
||||
int t_con = tick_add(now_ms, s->proxy->timeout.connect);
|
||||
t->expire = tick_first(t->expire, t_con);
|
||||
}
|
||||
|
||||
goto reschedule;
|
||||
|
||||
case SN_ERR_SRVTO: /* ETIMEDOUT */
|
||||
case SN_ERR_SRVCL: /* ECONNREFUSED, ENETUNREACH, ... */
|
||||
conn->flags |= CO_FL_ERROR;
|
||||
chk_report_conn_err(conn, errno, 0);
|
||||
break;
|
||||
case SN_ERR_PRXCOND:
|
||||
case SN_ERR_RESOURCE:
|
||||
case SN_ERR_INTERNAL:
|
||||
conn->flags |= CO_FL_ERROR;
|
||||
chk_report_conn_err(conn, 0, 0);
|
||||
break;
|
||||
}
|
||||
|
||||
/* here, we have seen a synchronous error, no fd was allocated */
|
||||
|
||||
check->state &= ~CHK_ST_INPROGRESS;
|
||||
check_notify_failure(check);
|
||||
|
||||
/* we allow up to min(inter, timeout.connect) for a connection
|
||||
* to establish but only when timeout.check is set
|
||||
* as it may be to short for a full check otherwise
|
||||
*/
|
||||
while (tick_is_expired(t->expire, now_ms)) {
|
||||
int t_con;
|
||||
|
||||
t_con = tick_add(t->expire, s->proxy->timeout.connect);
|
||||
t->expire = tick_add(t->expire, MS_TO_TICKS(check->inter));
|
||||
|
||||
if (s->proxy->timeout.check)
|
||||
t->expire = tick_first(t->expire, t_con);
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* there was a test running.
|
||||
* First, let's check whether there was an uncaught error,
|
||||
* which can happen on connect timeout or error.
|
||||
*/
|
||||
if (check->result == CHK_RES_UNKNOWN) {
|
||||
/* good connection is enough for pure TCP check */
|
||||
struct pid_list *elem = check->curpid;
|
||||
int status = HCHK_STATUS_UNKNOWN;
|
||||
|
||||
if (elem->exited) {
|
||||
status = elem->status; /* Save in case the process exits between use below */
|
||||
if (!WIFEXITED(status))
|
||||
check->code = -1;
|
||||
else
|
||||
check->code = WEXITSTATUS(status);
|
||||
if (!WIFEXITED(status) || WEXITSTATUS(status))
|
||||
status = HCHK_STATUS_PROCERR;
|
||||
else
|
||||
status = HCHK_STATUS_PROCOK;
|
||||
} else if (expired) {
|
||||
status = HCHK_STATUS_PROCTOUT;
|
||||
Warning("kill %d\n", elem->pid);
|
||||
kill(elem->pid, SIGTERM);
|
||||
}
|
||||
set_server_check_status(check, status, NULL);
|
||||
}
|
||||
|
||||
if (check->result == CHK_RES_FAILED) {
|
||||
/* a failure or timeout detected */
|
||||
check_notify_failure(check);
|
||||
}
|
||||
else if (check->result == CHK_RES_CONDPASS) {
|
||||
/* check is OK but asks for stopping mode */
|
||||
check_notify_stopping(check);
|
||||
}
|
||||
else if (check->result == CHK_RES_PASSED) {
|
||||
/* a success was detected */
|
||||
check_notify_success(check);
|
||||
}
|
||||
check->state &= ~CHK_ST_INPROGRESS;
|
||||
|
||||
pid_list_del(check->curpid);
|
||||
|
||||
rv = 0;
|
||||
if (global.spread_checks > 0) {
|
||||
rv = srv_getinter(check) * global.spread_checks / 100;
|
||||
rv -= (int) (2 * rv * (rand() / (RAND_MAX + 1.0)));
|
||||
}
|
||||
t->expire = tick_add(now_ms, MS_TO_TICKS(srv_getinter(check) + rv));
|
||||
}
|
||||
|
||||
reschedule:
|
||||
while (tick_is_expired(t->expire, now_ms))
|
||||
t->expire = tick_add(t->expire, MS_TO_TICKS(check->inter));
|
||||
return t;
|
||||
}
|
||||
|
||||
/*
|
||||
* manages a server health-check that uses a connection. Returns
|
||||
* the time the task accepts to wait, or TIME_ETERNITY for infinity.
|
||||
*/
|
||||
static struct task *process_chk_conn(struct task *t)
|
||||
{
|
||||
struct check *check = t->context;
|
||||
struct server *s = check->server;
|
||||
@ -1596,6 +2026,19 @@ static struct task *process_chk(struct task *t)
|
||||
return t;
|
||||
}
|
||||
|
||||
/*
|
||||
* manages a server health-check. Returns
|
||||
* the time the task accepts to wait, or TIME_ETERNITY for infinity.
|
||||
*/
|
||||
static struct task *process_chk(struct task *t)
|
||||
{
|
||||
struct check *check = t->context;
|
||||
|
||||
if (check->type == PR_O2_EXT_CHK)
|
||||
return process_chk_proc(t);
|
||||
return process_chk_conn(t);
|
||||
}
|
||||
|
||||
static int start_check_task(struct check *check, int mininter,
|
||||
int nbcheck, int srvpos)
|
||||
{
|
||||
@ -1686,6 +2129,13 @@ int start_checks() {
|
||||
* the number of servers, weighted by the server's position in the list.
|
||||
*/
|
||||
for (px = proxy; px; px = px->next) {
|
||||
if ((px->options2 & PR_O2_CHK_ANY) == PR_O2_EXT_CHK) {
|
||||
if (init_pid_list()) {
|
||||
Alert("Starting [%s] check: out of memory.\n", px->id);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
for (s = px->srv; s; s = s->next) {
|
||||
/* A task for the main check */
|
||||
if (s->check.state & CHK_ST_CONFIGURED) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user