From dcac41806239d4f3b2de5e4d1bacf9c03ca99efb Mon Sep 17 00:00:00 2001 From: Christopher Faulet Date: Tue, 15 Jun 2021 16:17:17 +0200 Subject: [PATCH] BUG/MEDIUM: resolvers: Add a task on servers to check SRV resolution status When a server relies on a SRV resolution, a task is created to clean it up (fqdn/port and address) when the SRV resolution is considered as outdated (based on the resolvers 'timeout' value). It is only possible if the server inherits outdated info from a state file and is no longer selected to be attached to a SRV item. Note that most of time, a server is attached to a SRV item. Thus when the item becomes obsolete, the server is cleaned up. It is important to have such task to be sure the server will be free again to have a chance to be resolved again with fresh information. Of course, this patch is a workaround to solve a design issue. But there is no other obvious way to fix it without rewritting all the resolvers part. And it must be backportable. This patch relies on following commits: * MINOR: resolvers: Clean server in a dedicated function when removing a SRV item * MINOR: resolvers: Remove server from named_servers tree when removing a SRV item All the series must be backported as far as 2.2 after some observation period. Backports to 2.0 and 1.8 must be evaluated. --- include/haproxy/server-t.h | 1 + src/resolvers.c | 46 +++++++++++++++++++++++++++++++++----- src/server.c | 1 + src/server_state.c | 3 ++- 4 files changed, 44 insertions(+), 7 deletions(-) diff --git a/include/haproxy/server-t.h b/include/haproxy/server-t.h index e866efaa8..ef91ee9c7 100644 --- a/include/haproxy/server-t.h +++ b/include/haproxy/server-t.h @@ -368,6 +368,7 @@ struct server { struct list srv_rec_item; /* to attach server to a srv record item */ struct list ip_rec_item; /* to attach server to a A or AAAA record item */ struct ebpt_node host_dn; /* hostdn store for srvrq and state file matching*/ + struct task *srvrq_check; /* Task testing SRV record expiration date for this server */ struct { const char *file; /* file where the section appears */ struct eb32_node id; /* place in the tree of used IDs */ diff --git a/src/resolvers.c b/src/resolvers.c index 8e18faa72..a068137ff 100644 --- a/src/resolvers.c +++ b/src/resolvers.c @@ -595,6 +595,26 @@ static void resolv_srvrq_cleanup_srv(struct server *srv) HA_SPIN_UNLOCK(SERVER_LOCK, &srv->lock); LIST_DELETE(&srv->srv_rec_item); LIST_APPEND(&srv->srvrq->attached_servers, &srv->srv_rec_item); + + srv->srvrq_check->expire = TICK_ETERNITY; +} + +/* Takes care to cleanup a server resolution when it is outdated. This only + * happens for a server relying on a SRV record. + */ +static struct task *resolv_srvrq_expire_task(struct task *t, void *context, unsigned int state) +{ + struct server *srv = context; + + if (!tick_is_expired(t->expire, now_ms)) + goto end; + + HA_SPIN_LOCK(DNS_LOCK, &srv->srvrq->resolvers); + resolv_srvrq_cleanup_srv(srv); + HA_SPIN_UNLOCK(DNS_LOCK, &srv->srvrq->resolvers); + + end: + return t; } /* Checks for any obsolete record, also identify any SRV request, and try to @@ -727,6 +747,7 @@ srv_found: if (srv) { /* re-enable DNS resolution for this server by default */ srv->flags &= ~SRV_F_NO_RESOLUTION; + srv->srvrq_check->expire = TICK_ETERNITY; /* Check if an Additional Record is associated to this SRV record. * Perform some sanity checks too to ensure the record can be used. @@ -2428,17 +2449,30 @@ static int resolvers_finalize_config(void) continue; } srv->resolvers = resolvers; + srv->srvrq_check = NULL; + if (srv->srvrq) { + if (!srv->srvrq->resolvers) { + srv->srvrq->resolvers = srv->resolvers; + if (resolv_link_resolution(srv->srvrq, OBJ_TYPE_SRVRQ, 0) == -1) { + ha_alert("%s '%s' : unable to set DNS resolution for server '%s'.\n", + proxy_type_str(px), px->id, srv->id); + err_code |= (ERR_ALERT|ERR_ABORT); + continue; + } + } - if (srv->srvrq && !srv->srvrq->resolvers) { - srv->srvrq->resolvers = srv->resolvers; - if (resolv_link_resolution(srv->srvrq, OBJ_TYPE_SRVRQ, 0) == -1) { - ha_alert("%s '%s' : unable to set DNS resolution for server '%s'.\n", + srv->srvrq_check = task_new(MAX_THREADS_MASK); + if (!srv->srvrq_check) { + ha_alert("%s '%s' : unable to create SRVRQ task for server '%s'.\n", proxy_type_str(px), px->id, srv->id); err_code |= (ERR_ALERT|ERR_ABORT); - continue; + goto err; } + srv->srvrq_check->process = resolv_srvrq_expire_task; + srv->srvrq_check->context = srv; + srv->srvrq_check->expire = TICK_ETERNITY; } - if (!srv->srvrq && resolv_link_resolution(srv, OBJ_TYPE_SERVER, 0) == -1) { + else if (resolv_link_resolution(srv, OBJ_TYPE_SERVER, 0) == -1) { ha_alert("%s '%s', unable to set DNS resolution for server '%s'.\n", proxy_type_str(px), px->id, srv->id); err_code |= (ERR_ALERT|ERR_ABORT); diff --git a/src/server.c b/src/server.c index 09a969703..f48024008 100644 --- a/src/server.c +++ b/src/server.c @@ -2201,6 +2201,7 @@ struct server *new_server(struct proxy *proxy) void free_server(struct server *srv) { task_destroy(srv->warmup); + task_destroy(srv->srvrq_check); free(srv->id); free(srv->cookie); diff --git a/src/server_state.c b/src/server_state.c index 7a478a2ec..18d9cd6bf 100644 --- a/src/server_state.c +++ b/src/server_state.c @@ -425,8 +425,9 @@ static void srv_state_srv_update(struct server *srv, int version, char **params) for (i = 0; tmp[i]; i++) tmp[i] = tolower(tmp[i]); - /* insert in tree */ + /* insert in tree and set the srvrq expiration date */ ebis_insert(&srv->srvrq->named_servers, &srv->host_dn); + task_schedule(srv->srvrq_check, tick_add(now_ms, srv->srvrq->resolvers->hold.timeout)); /* Unset SRV_F_MAPPORTS for SRV records. * SRV_F_MAPPORTS is unfortunately set by parse_server()