From c775838ad7a1f33dcd2f1fac01d1a805bb96bc1f Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Tue, 14 Feb 2017 17:28:17 +0100 Subject: [PATCH 01/14] resolved: make sure configured NTAs affect subdomains too This ensures that configured NTAs exclude not only the listed domain but also all domains below it from DNSSEC validation -- except if a positive trust anchor is defined below (as suggested by RFC7647, section 1.1) Fixes: #5048 --- src/resolve/resolved-dns-trust-anchor.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/src/resolve/resolved-dns-trust-anchor.c b/src/resolve/resolved-dns-trust-anchor.c index d8529f83173..7e08cba4e1f 100644 --- a/src/resolve/resolved-dns-trust-anchor.c +++ b/src/resolve/resolved-dns-trust-anchor.c @@ -547,10 +547,33 @@ int dns_trust_anchor_lookup_positive(DnsTrustAnchor *d, const DnsResourceKey *ke } int dns_trust_anchor_lookup_negative(DnsTrustAnchor *d, const char *name) { + int r; + assert(d); assert(name); - return set_contains(d->negative_by_name, name); + for (;;) { + /* If the domain is listed as-is in the NTA database, then that counts */ + if (set_contains(d->negative_by_name, name)) + return true; + + /* If the domain isn't listed as NTA, but is listed as positive trust anchor, then that counts. See RFC + * 7646, section 1.1 */ + if (hashmap_contains(d->positive_by_key, &DNS_RESOURCE_KEY_CONST(DNS_CLASS_IN, DNS_TYPE_DS, name))) + return false; + + if (hashmap_contains(d->positive_by_key, &DNS_RESOURCE_KEY_CONST(DNS_CLASS_IN, DNS_TYPE_KEY, name))) + return false; + + /* And now, let's look at the parent, and check that too */ + r = dns_name_parent(&name); + if (r < 0) + return r; + if (r == 0) + break; + } + + return false; } static int dns_trust_anchor_revoked_put(DnsTrustAnchor *d, DnsResourceRecord *rr) { From 97c2ea26456f21334ac164f330426dd518067f08 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Tue, 14 Feb 2017 17:54:30 +0100 Subject: [PATCH 02/14] resolved: fix NSEC proofs for missing TLDs For the wildcard NSEC check we need to generate an "asterisk" domain, by prepend the common ancestor with "*.". So far we did that with a simple strappenda() which is fine for most domains, but doesn't work if the common ancestor is the root domain as we usually write that as "." in normalized form, and "*." joined with "." is "*.." and not "*." as it should be. Hence, use the clean way out, let's just use dns_name_concat() which only exists precisely for this reason, to properly concatenate labels. There's a good chance this actually fixes #5029, as this NSEC proof is triggered by lookups in the TLD "example", which doesn't exist in the Internet. --- src/resolve/resolved-dns-dnssec.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/resolve/resolved-dns-dnssec.c b/src/resolve/resolved-dns-dnssec.c index 51327105d04..eddab58a818 100644 --- a/src/resolve/resolved-dns-dnssec.c +++ b/src/resolve/resolved-dns-dnssec.c @@ -1710,7 +1710,8 @@ static int dnssec_nsec_covers(DnsResourceRecord *rr, const char *name) { } static int dnssec_nsec_covers_wildcard(DnsResourceRecord *rr, const char *name) { - const char *common_suffix, *wc; + _cleanup_free_ char *wc = NULL; + const char *common_suffix; int r; assert(rr); @@ -1734,7 +1735,10 @@ static int dnssec_nsec_covers_wildcard(DnsResourceRecord *rr, const char *name) if (r <= 0) return r; - wc = strjoina("*.", common_suffix); + r = dns_name_concat("*", common_suffix, &wc); + if (r < 0) + return r; + return dns_name_between(dns_resource_key_name(rr->key), wc, rr->nsec.next_domain_name); } From ce7c8b20df36550ee7d30862b50afdea0d206907 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Tue, 14 Feb 2017 18:20:34 +0100 Subject: [PATCH 03/14] resolved: when the dns server feature level grace period elapses, flush caches The cache might contain all kinds of unauthenticated data that we really shouldn't be using if we upgrade our feature level and suddenly are able to get authenticated data again. Might fix: #4866 --- src/resolve/resolved-dns-server.c | 21 +++++++++++++++++++++ src/resolve/resolved-dns-server.h | 2 ++ 2 files changed, 23 insertions(+) diff --git a/src/resolve/resolved-dns-server.c b/src/resolve/resolved-dns-server.c index 072cbfca1ab..9f81798f6eb 100644 --- a/src/resolve/resolved-dns-server.c +++ b/src/resolve/resolved-dns-server.c @@ -415,6 +415,8 @@ DnsServerFeatureLevel dns_server_possible_feature_level(DnsServer *s) { dns_server_feature_level_to_string(s->possible_feature_level), dns_server_string(s)); + dns_server_flush_cache(s); + } else if (s->possible_feature_level <= s->verified_feature_level) s->possible_feature_level = s->verified_feature_level; else { @@ -792,6 +794,25 @@ DnssecMode dns_server_get_dnssec_mode(DnsServer *s) { return manager_get_dnssec_mode(s->manager); } +void dns_server_flush_cache(DnsServer *s) { + DnsServer *current; + DnsScope *scope; + + assert(s); + + /* Flush the cache of the scope this server belongs to */ + + current = s->link ? s->link->current_dns_server : s->manager->current_dns_server; + if (current != s) + return; + + scope = s->link ? s->link->unicast_scope : s->manager->unicast_scope; + if (!scope) + return; + + dns_cache_flush(&scope->cache); +} + static const char* const dns_server_type_table[_DNS_SERVER_TYPE_MAX] = { [DNS_SERVER_SYSTEM] = "system", [DNS_SERVER_FALLBACK] = "fallback", diff --git a/src/resolve/resolved-dns-server.h b/src/resolve/resolved-dns-server.h index 406282d864b..bc95d53c6a8 100644 --- a/src/resolve/resolved-dns-server.h +++ b/src/resolve/resolved-dns-server.h @@ -149,3 +149,5 @@ DnssecMode dns_server_get_dnssec_mode(DnsServer *s); DEFINE_TRIVIAL_CLEANUP_FUNC(DnsServer*, dns_server_unref); extern const struct hash_ops dns_server_hash_ops; + +void dns_server_flush_cache(DnsServer *s); From 941dd294507e1def8fd5e59c5bc3e3ed2b27b6b0 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Tue, 14 Feb 2017 19:25:47 +0100 Subject: [PATCH 04/14] resolved: automatically downgrade reply bits on send Doesn't really change anything, but makes things a bit simpler to read. --- src/resolve/resolved-dns-stub.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/src/resolve/resolved-dns-stub.c b/src/resolve/resolved-dns-stub.c index 7d43825960e..12936bc0158 100644 --- a/src/resolve/resolved-dns-stub.c +++ b/src/resolve/resolved-dns-stub.c @@ -94,9 +94,18 @@ static int dns_stub_finish_reply_packet( assert(p); - /* If the client didn't do EDNS, clamp the rcode to 4 bit */ - if (!add_opt && rcode > 0xF) - rcode = DNS_RCODE_SERVFAIL; + if (!add_opt) { + /* If the client can't to EDNS0, don't do DO either */ + edns0_do = false; + + /* If the client didn't do EDNS, clamp the rcode to 4 bit */ + if (rcode > 0xF) + rcode = DNS_RCODE_SERVFAIL; + } + + /* Don't set the AD bit unless DO is on, too */ + if (!edns0_do) + ad = false; DNS_PACKET_HEADER(p)->id = id; @@ -214,7 +223,7 @@ static void dns_stub_query_complete(DnsQuery *q) { q->answer_rcode, !!q->request_dns_packet->opt, DNS_PACKET_DO(q->request_dns_packet), - DNS_PACKET_DO(q->request_dns_packet) && dns_query_fully_authenticated(q)); + dns_query_fully_authenticated(q)); if (r < 0) { log_debug_errno(r, "Failed to finish reply packet: %m"); break; From 2b2d98c175f851b77706c4bee0deed99d36fa565 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 15 Feb 2017 15:13:32 +0100 Subject: [PATCH 05/14] resolved: propagate AD bit for NXDOMAIN into stub replies When we managed to prove non-existance of a name, then we should properly propagate this to clients by setting the AD bit on NXDOMAIN. See: #4621 --- src/resolve/resolved-dns-stub.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/src/resolve/resolved-dns-stub.c b/src/resolve/resolved-dns-stub.c index 12936bc0158..7afbfedfb07 100644 --- a/src/resolve/resolved-dns-stub.c +++ b/src/resolve/resolved-dns-stub.c @@ -171,7 +171,7 @@ static int dns_stub_send(Manager *m, DnsStream *s, DnsPacket *p, DnsPacket *repl return 0; } -static int dns_stub_send_failure(Manager *m, DnsStream *s, DnsPacket *p, int rcode) { +static int dns_stub_send_failure(Manager *m, DnsStream *s, DnsPacket *p, int rcode, bool authenticated) { _cleanup_(dns_packet_unrefp) DnsPacket *reply = NULL; int r; @@ -182,7 +182,7 @@ static int dns_stub_send_failure(Manager *m, DnsStream *s, DnsPacket *p, int rco if (r < 0) return log_debug_errno(r, "Failed to make failure packet: %m"); - r = dns_stub_finish_reply_packet(reply, DNS_PACKET_ID(p), rcode, !!p->opt, DNS_PACKET_DO(p), false); + r = dns_stub_finish_reply_packet(reply, DNS_PACKET_ID(p), rcode, !!p->opt, DNS_PACKET_DO(p), authenticated); if (r < 0) return log_debug_errno(r, "Failed to build failure packet: %m"); @@ -207,7 +207,7 @@ static void dns_stub_query_complete(DnsQuery *q) { r = dns_query_process_cname(q); if (r == -ELOOP) { - (void) dns_stub_send_failure(q->manager, q->request_dns_stream, q->request_dns_packet, DNS_RCODE_SERVFAIL); + (void) dns_stub_send_failure(q->manager, q->request_dns_stream, q->request_dns_packet, DNS_RCODE_SERVFAIL, false); break; } if (r < 0) { @@ -233,11 +233,11 @@ static void dns_stub_query_complete(DnsQuery *q) { break; case DNS_TRANSACTION_RCODE_FAILURE: - (void) dns_stub_send_failure(q->manager, q->request_dns_stream, q->request_dns_packet, q->answer_rcode); + (void) dns_stub_send_failure(q->manager, q->request_dns_stream, q->request_dns_packet, q->answer_rcode, dns_query_fully_authenticated(q)); break; case DNS_TRANSACTION_NOT_FOUND: - (void) dns_stub_send_failure(q->manager, q->request_dns_stream, q->request_dns_packet, DNS_RCODE_NXDOMAIN); + (void) dns_stub_send_failure(q->manager, q->request_dns_stream, q->request_dns_packet, DNS_RCODE_NXDOMAIN, dns_query_fully_authenticated(q)); break; case DNS_TRANSACTION_TIMEOUT: @@ -253,7 +253,7 @@ static void dns_stub_query_complete(DnsQuery *q) { case DNS_TRANSACTION_NO_TRUST_ANCHOR: case DNS_TRANSACTION_RR_TYPE_UNSUPPORTED: case DNS_TRANSACTION_NETWORK_DOWN: - (void) dns_stub_send_failure(q->manager, q->request_dns_stream, q->request_dns_packet, DNS_RCODE_SERVFAIL); + (void) dns_stub_send_failure(q->manager, q->request_dns_stream, q->request_dns_packet, DNS_RCODE_SERVFAIL, false); break; case DNS_TRANSACTION_NULL: @@ -300,52 +300,52 @@ static void dns_stub_process_query(Manager *m, DnsStream *s, DnsPacket *p) { if (in_addr_is_localhost(p->family, &p->sender) <= 0 || in_addr_is_localhost(p->family, &p->destination) <= 0) { log_error("Got packet on unexpected IP range, refusing."); - dns_stub_send_failure(m, s, p, DNS_RCODE_SERVFAIL); + dns_stub_send_failure(m, s, p, DNS_RCODE_SERVFAIL, false); goto fail; } r = dns_packet_extract(p); if (r < 0) { log_debug_errno(r, "Failed to extract resources from incoming packet, ignoring packet: %m"); - dns_stub_send_failure(m, s, p, DNS_RCODE_FORMERR); + dns_stub_send_failure(m, s, p, DNS_RCODE_FORMERR, false); goto fail; } if (!DNS_PACKET_VERSION_SUPPORTED(p)) { log_debug("Got EDNS OPT field with unsupported version number."); - dns_stub_send_failure(m, s, p, DNS_RCODE_BADVERS); + dns_stub_send_failure(m, s, p, DNS_RCODE_BADVERS, false); goto fail; } if (dns_type_is_obsolete(p->question->keys[0]->type)) { log_debug("Got message with obsolete key type, refusing."); - dns_stub_send_failure(m, s, p, DNS_RCODE_NOTIMP); + dns_stub_send_failure(m, s, p, DNS_RCODE_NOTIMP, false); goto fail; } if (dns_type_is_zone_transer(p->question->keys[0]->type)) { log_debug("Got request for zone transfer, refusing."); - dns_stub_send_failure(m, s, p, DNS_RCODE_NOTIMP); + dns_stub_send_failure(m, s, p, DNS_RCODE_NOTIMP, false); goto fail; } if (!DNS_PACKET_RD(p)) { /* If the "rd" bit is off (i.e. recursion was not requested), then refuse operation */ log_debug("Got request with recursion disabled, refusing."); - dns_stub_send_failure(m, s, p, DNS_RCODE_REFUSED); + dns_stub_send_failure(m, s, p, DNS_RCODE_REFUSED, false); goto fail; } if (DNS_PACKET_DO(p) && DNS_PACKET_CD(p)) { log_debug("Got request with DNSSEC CD bit set, refusing."); - dns_stub_send_failure(m, s, p, DNS_RCODE_NOTIMP); + dns_stub_send_failure(m, s, p, DNS_RCODE_NOTIMP, false); goto fail; } r = dns_query_new(m, &q, p->question, p->question, 0, SD_RESOLVED_PROTOCOLS_ALL|SD_RESOLVED_NO_SEARCH); if (r < 0) { log_error_errno(r, "Failed to generate query object: %m"); - dns_stub_send_failure(m, s, p, DNS_RCODE_SERVFAIL); + dns_stub_send_failure(m, s, p, DNS_RCODE_SERVFAIL, false); goto fail; } @@ -365,7 +365,7 @@ static void dns_stub_process_query(Manager *m, DnsStream *s, DnsPacket *p) { r = dns_query_go(q); if (r < 0) { log_error_errno(r, "Failed to start query: %m"); - dns_stub_send_failure(m, s, p, DNS_RCODE_SERVFAIL); + dns_stub_send_failure(m, s, p, DNS_RCODE_SERVFAIL, false); goto fail; } From cbb1aabb99c5898213cc8d7d942785dcc442581d Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 15 Feb 2017 15:29:05 +0100 Subject: [PATCH 06/14] resolved: when accepted a query candidate as final answer, propagate authentication bool even on failure Let's make sure that if we accept a query candidate, then let's also propagate the authenticated flag for it, so that we can properly report back to the clients whether lookups failed due to non-existance that can be proven. --- src/resolve/resolved-dns-query.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/resolve/resolved-dns-query.c b/src/resolve/resolved-dns-query.c index c58845c3b60..0dfe9320b5d 100644 --- a/src/resolve/resolved-dns-query.c +++ b/src/resolve/resolved-dns-query.c @@ -811,6 +811,7 @@ static void dns_query_accept(DnsQuery *q, DnsQueryCandidate *c) { q->answer = dns_answer_unref(q->answer); q->answer_rcode = 0; q->answer_dnssec_result = _DNSSEC_RESULT_INVALID; + q->answer_authenticated = false; q->answer_errno = c->error_code; } @@ -847,15 +848,18 @@ static void dns_query_accept(DnsQuery *q, DnsQueryCandidate *c) { continue; default: - /* Any kind of failure? Store the data away, - * if there's nothing stored yet. */ - + /* Any kind of failure? Store the data away, if there's nothing stored yet. */ if (state == DNS_TRANSACTION_SUCCESS) continue; + /* If there's already an authenticated negative reply stored, then prefer that over any unauthenticated one */ + if (q->answer_authenticated && !t->answer_authenticated) + continue; + q->answer = dns_answer_unref(q->answer); q->answer_rcode = t->answer_rcode; q->answer_dnssec_result = t->answer_dnssec_result; + q->answer_authenticated = t->answer_authenticated; q->answer_errno = t->answer_errno; state = t->state; From 97277567b8695e1c3f3eab65c9d1a9cf02d06fde Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 15 Feb 2017 16:01:53 +0100 Subject: [PATCH 07/14] resolved: when DNSSEC mode is disabled, don't go beyond EDNS0 feature level There's no point in talking to a server in DNSSEC mode when we don't actually want to verify anything. See: #5352 --- src/resolve/resolved-dns-server.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/src/resolve/resolved-dns-server.c b/src/resolve/resolved-dns-server.c index 9f81798f6eb..2df917e61e1 100644 --- a/src/resolve/resolved-dns-server.c +++ b/src/resolve/resolved-dns-server.c @@ -399,12 +399,24 @@ static bool dns_server_grace_period_expired(DnsServer *s) { } DnsServerFeatureLevel dns_server_possible_feature_level(DnsServer *s) { + DnsServerFeatureLevel best; + assert(s); - if (s->possible_feature_level != DNS_SERVER_FEATURE_LEVEL_BEST && - dns_server_grace_period_expired(s)) { + /* Determine the best feature level we care about. If DNSSEC mode is off there's no point in using anything + * better than EDNS0, hence don't even try. */ + best = dns_server_get_dnssec_mode(s) == DNSSEC_NO ? + DNS_SERVER_FEATURE_LEVEL_EDNS0 : + DNS_SERVER_FEATURE_LEVEL_BEST; - s->possible_feature_level = DNS_SERVER_FEATURE_LEVEL_BEST; + /* Clamp the feature level the highest level we care about. The DNSSEC mode might have changed since the last + * time, hence let's downgrade if we are still at a higher level. */ + if (s->possible_feature_level > best) + s->possible_feature_level = best; + + if (s->possible_feature_level < best && dns_server_grace_period_expired(s)) { + + s->possible_feature_level = best; dns_server_reset_counters(s); From dc349f5f7a833233aebd08e30e662d6298f854e3 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 15 Feb 2017 18:24:46 +0100 Subject: [PATCH 08/14] resolved: lengthen timeout for TCP transactions When we are doing a TCP transaction the kernel will automatically resend all packets for us, there's no need to do that ourselves. Hence: increase the timeout for TCP transactions substantially, to give the kernel enough time to connect to the peer, without interrupting it when we become impatient. --- src/resolve/resolved-dns-transaction.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/resolve/resolved-dns-transaction.c b/src/resolve/resolved-dns-transaction.c index 96874096636..6c0c7539c2e 100644 --- a/src/resolve/resolved-dns-transaction.c +++ b/src/resolve/resolved-dns-transaction.c @@ -31,6 +31,7 @@ #include "string-table.h" #define TRANSACTIONS_MAX 4096 +#define TRANSACTION_TCP_TIMEOUT_USEC (10U*USEC_PER_SEC) static void dns_transaction_reset_answer(DnsTransaction *t) { assert(t); @@ -1212,9 +1213,17 @@ static usec_t transaction_get_resend_timeout(DnsTransaction *t) { assert(t); assert(t->scope); + switch (t->scope->protocol) { case DNS_PROTOCOL_DNS: + + /* When we do TCP, grant a much longer timeout, as in this case there's no need for us to quickly + * resend, as the kernel does that anyway for us, and we really don't want to interrupt it in that + * needlessly. */ + if (t->stream) + return TRANSACTION_TCP_TIMEOUT_USEC; + assert(t->server); return t->server->resend_timeout; @@ -1579,7 +1588,7 @@ int dns_transaction_go(DnsTransaction *t) { r = dns_transaction_emit_udp(t); if (r == -EMSGSIZE) log_debug("Sending query via TCP since it is too large."); - if (r == -EAGAIN) + else if (r == -EAGAIN) log_debug("Sending query via TCP since server doesn't support UDP."); if (r == -EMSGSIZE || r == -EAGAIN) r = dns_transaction_open_tcp(t); From 201d99584ed7af8078bb243ce2587e5455074713 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 15 Feb 2017 18:28:23 +0100 Subject: [PATCH 09/14] resolved: cache SERVFAIL responses for 30s Some domains (such as us.ynuf.alipay.com) almost appear as if they actively want to sabotage our DNSSEC work. Specifically, they unconditionally return SERVFAIL on SOA lookups and always only after a 1s delay (at least). This is pretty bad for our validation logic, as we use SOA lookups to distuingish zones from non-terminal names. Moreover, SERVFAIL is an error that is typically returned if we send requests a server doesn't grok, and thus is reason for us to downgrade our protocol and try again. In case of these zones this means we'll accept the SERVFAIL response only after a full iterative downgrade to our lowest feature level: TCP. In combination with the 1s delays this has the effect of making us hit our transaction timeout way to easily. As first attempt to improve the situation: let's start caching SERVFAIL responses in our cache, after the full downgrade for a short period of time. Conceptually this is exposed as "weird rcode" caching, but for now we only consider SERVFAIL a "weird rcode" worthy of caching. Later on we might want to add more. --- src/resolve/resolved-dns-cache.c | 123 +++++++++++++++++++++++-------- 1 file changed, 93 insertions(+), 30 deletions(-) diff --git a/src/resolve/resolved-dns-cache.c b/src/resolve/resolved-dns-cache.c index c43a7865dc3..ee93147015a 100644 --- a/src/resolve/resolved-dns-cache.c +++ b/src/resolve/resolved-dns-cache.c @@ -34,6 +34,10 @@ /* We never keep any item longer than 2h in our cache */ #define CACHE_TTL_MAX_USEC (2 * USEC_PER_HOUR) +/* How long to cache strange rcodes, i.e. rcodes != SUCCESS and != NXDOMAIN (specifically: that's only SERVFAIL for + * now) */ +#define CACHE_TTL_STRANGE_RCODE_USEC (30 * USEC_PER_SEC) + typedef enum DnsCacheItemType DnsCacheItemType; typedef struct DnsCacheItem DnsCacheItem; @@ -41,12 +45,14 @@ enum DnsCacheItemType { DNS_CACHE_POSITIVE, DNS_CACHE_NODATA, DNS_CACHE_NXDOMAIN, + DNS_CACHE_RCODE, /* "strange" RCODE (effective only SERVFAIL for now) */ }; struct DnsCacheItem { DnsCacheItemType type; DnsResourceKey *key; DnsResourceRecord *rr; + int rcode; usec_t until; bool authenticated:1; @@ -60,6 +66,27 @@ struct DnsCacheItem { LIST_FIELDS(DnsCacheItem, by_key); }; +static const char *dns_cache_item_type_to_string(DnsCacheItem *item) { + assert(item); + + switch (item->type) { + + case DNS_CACHE_POSITIVE: + return "POSITIVE"; + + case DNS_CACHE_NODATA: + return "NODATA"; + + case DNS_CACHE_NXDOMAIN: + return "NXDOMAIN"; + + case DNS_CACHE_RCODE: + return dns_rcode_to_string(item->rcode); + } + + return NULL; +} + static void dns_cache_item_free(DnsCacheItem *i) { if (!i) return; @@ -484,7 +511,6 @@ static int dns_cache_put_negative( assert(c); assert(key); - assert(soa); assert(owner_address); /* Never cache pseudo RR keys. DNS_TYPE_ANY is particularly @@ -495,13 +521,17 @@ static int dns_cache_put_negative( if (dns_type_is_pseudo(key->type)) return 0; - if (nsec_ttl <= 0 || soa->soa.minimum <= 0 || soa->ttl <= 0) { - log_debug("Not caching negative entry with zero SOA/NSEC/NSEC3 TTL: %s", - dns_resource_key_to_string(key, key_str, sizeof key_str)); - return 0; - } + if (IN_SET(rcode, DNS_RCODE_SUCCESS, DNS_RCODE_NXDOMAIN)) { + if (!soa) + return 0; - if (!IN_SET(rcode, DNS_RCODE_SUCCESS, DNS_RCODE_NXDOMAIN)) + /* For negative replies, check if we have a TTL of a SOA */ + if (nsec_ttl <= 0 || soa->soa.minimum <= 0 || soa->ttl <= 0) { + log_debug("Not caching negative entry with zero SOA/NSEC/NSEC3 TTL: %s", + dns_resource_key_to_string(key, key_str, sizeof key_str)); + return 0; + } + } else if (rcode != DNS_RCODE_SERVFAIL) return 0; r = dns_cache_init(c); @@ -514,12 +544,17 @@ static int dns_cache_put_negative( if (!i) return -ENOMEM; - i->type = rcode == DNS_RCODE_SUCCESS ? DNS_CACHE_NODATA : DNS_CACHE_NXDOMAIN; - i->until = calculate_until(soa, nsec_ttl, timestamp, true); + i->type = + rcode == DNS_RCODE_SUCCESS ? DNS_CACHE_NODATA : + rcode == DNS_RCODE_NXDOMAIN ? DNS_CACHE_NXDOMAIN : DNS_CACHE_RCODE; + i->until = + i->type == DNS_CACHE_RCODE ? timestamp + CACHE_TTL_STRANGE_RCODE_USEC : + calculate_until(soa, nsec_ttl, timestamp, true); i->authenticated = authenticated; i->owner_family = owner_family; i->owner_address = *owner_address; i->prioq_idx = PRIOQ_IDX_NULL; + i->rcode = rcode; if (i->type == DNS_CACHE_NXDOMAIN) { /* NXDOMAIN entries should apply equally to all types, so we use ANY as @@ -543,7 +578,7 @@ static int dns_cache_put_negative( return r; log_debug("Added %s cache entry for %s "USEC_FMT"s", - i->type == DNS_CACHE_NODATA ? "NODATA" : "NXDOMAIN", + dns_cache_item_type_to_string(i), dns_resource_key_to_string(i->key, key_str, sizeof key_str), (i->until - timestamp) / USEC_PER_SEC); @@ -615,6 +650,7 @@ int dns_cache_put( const union in_addr_union *owner_address) { DnsResourceRecord *soa = NULL, *rr; + bool weird_rcode = false; DnsAnswerFlags flags; unsigned cache_keys; int r, ifindex; @@ -624,18 +660,28 @@ int dns_cache_put( dns_cache_remove_previous(c, key, answer); - /* We only care for positive replies and NXDOMAINs, on all - * other replies we will simply flush the respective entries, - * and that's it */ - if (!IN_SET(rcode, DNS_RCODE_SUCCESS, DNS_RCODE_NXDOMAIN)) - return 0; + /* We only care for positive replies and NXDOMAINs, on all other replies we will simply flush the respective + * entries, and that's it. (Well, with one further exception: since some DNS zones (akamai!) return SERVFAIL + * consistently for some lookups, and forwarders tend to propagate that we'll cache that too, but only for a + * short time.) */ - if (dns_answer_size(answer) <= 0) { - char key_str[DNS_RESOURCE_KEY_STRING_MAX]; + if (IN_SET(rcode, DNS_RCODE_SUCCESS, DNS_RCODE_NXDOMAIN)) { - log_debug("Not caching negative entry without a SOA record: %s", - dns_resource_key_to_string(key, key_str, sizeof key_str)); - return 0; + if (dns_answer_size(answer) <= 0) { + char key_str[DNS_RESOURCE_KEY_STRING_MAX]; + + log_debug("Not caching negative entry without a SOA record: %s", + dns_resource_key_to_string(key, key_str, sizeof key_str)); + return 0; + } + + } else { + /* Only cache SERVFAIL as "weird" rcode for now. We can add more later, should that turn out to be + * beneficial. */ + if (rcode != DNS_RCODE_SERVFAIL) + return 0; + + weird_rcode = true; } cache_keys = dns_answer_size(answer); @@ -690,19 +736,20 @@ int dns_cache_put( if (r > 0) return 0; - /* See https://tools.ietf.org/html/rfc2308, which say that a - * matching SOA record in the packet is used to enable - * negative caching. */ + /* See https://tools.ietf.org/html/rfc2308, which say that a matching SOA record in the packet is used to + * enable negative caching. We apply one exception though: if we are about to cache a weird rcode we do so + * regardless of a SOA. */ r = dns_answer_find_soa(answer, key, &soa, &flags); if (r < 0) goto fail; - if (r == 0) - return 0; - - /* Refuse using the SOA data if it is unsigned, but the key is - * signed */ - if (authenticated && (flags & DNS_ANSWER_AUTHENTICATED) == 0) + if (r == 0 && !weird_rcode) return 0; + if (r > 0) { + /* Refuse using the SOA data if it is unsigned, but the key is + * signed */ + if (authenticated && (flags & DNS_ANSWER_AUTHENTICATED) == 0) + return 0; + } r = dns_cache_put_negative( c, @@ -799,6 +846,7 @@ int dns_cache_lookup(DnsCache *c, DnsResourceKey *key, bool clamp_ttl, int *rcod DnsCacheItem *j, *first, *nsec = NULL; bool have_authenticated = false, have_non_authenticated = false; usec_t current; + int found_rcode = -1; assert(c); assert(key); @@ -842,6 +890,8 @@ int dns_cache_lookup(DnsCache *c, DnsResourceKey *key, bool clamp_ttl, int *rcod n++; } else if (j->type == DNS_CACHE_NXDOMAIN) nxdomain = true; + else if (j->type == DNS_CACHE_RCODE) + found_rcode = j->rcode; if (j->authenticated) have_authenticated = true; @@ -849,6 +899,19 @@ int dns_cache_lookup(DnsCache *c, DnsResourceKey *key, bool clamp_ttl, int *rcod have_non_authenticated = true; } + if (found_rcode >= 0) { + log_debug("RCODE %s cache hit for %s", + dns_rcode_to_string(found_rcode), + dns_resource_key_to_string(key, key_str, sizeof(key_str))); + + *ret = NULL; + *rcode = found_rcode; + *authenticated = false; + + c->n_hit++; + return 1; + } + if (nsec && !IN_SET(key->type, DNS_TYPE_NSEC, DNS_TYPE_DS)) { /* Note that we won't derive information for DS RRs from an NSEC, because we only cache NSEC RRs from * the lower-zone of a zone cut, but the DS RRs are on the upper zone. */ @@ -1042,7 +1105,7 @@ void dns_cache_dump(DnsCache *cache, FILE *f) { fputs(dns_resource_key_to_string(j->key, key_str, sizeof key_str), f); fputs(" -- ", f); - fputs(j->type == DNS_CACHE_NODATA ? "NODATA" : "NXDOMAIN", f); + fputs(dns_cache_item_type_to_string(j), f); fputc('\n', f); } } From 7d581a6576ffab89c23fe2862c22d2ee93d61559 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 15 Feb 2017 18:53:25 +0100 Subject: [PATCH 10/14] resolved: don't downgrade feature level if we get RCODE on UDP level Retrying a transaction via TCP is a good approach for mitigating packet loss. However, it's not a good away way to fix a bad RCODE if we already downgraded to UDP level for it. Hence, don't do this. This is a small tweak only, but shortens the time we spend on downgrading when a specific domain continously returns a bad rcode. --- src/resolve/resolved-dns-transaction.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/resolve/resolved-dns-transaction.c b/src/resolve/resolved-dns-transaction.c index 6c0c7539c2e..7d969ff0b74 100644 --- a/src/resolve/resolved-dns-transaction.c +++ b/src/resolve/resolved-dns-transaction.c @@ -911,9 +911,13 @@ void dns_transaction_process_reply(DnsTransaction *t, DnsPacket *p) { /* Request failed, immediately try again with reduced features */ - if (t->current_feature_level <= DNS_SERVER_FEATURE_LEVEL_WORST) { - /* This was already at the lowest possible feature level? If so, we can't downgrade - * this transaction anymore, hence let's process the response, and accept the rcode. */ + if (t->current_feature_level <= DNS_SERVER_FEATURE_LEVEL_UDP) { + /* This was already at UDP feature level? If so, it doesn't make sense to downgrade + * this transaction anymore, hence let's process the response, and accept the + * rcode. Note that we don't retry on TCP, since that's a suitable way to mitigate + * packet loss, but is not going to give us better rcodes should we actually have + * managed to get them already at UDP level. */ + log_debug("Server returned error: %s", dns_rcode_to_string(DNS_PACKET_RCODE(p))); break; } @@ -1136,7 +1140,7 @@ static int dns_transaction_emit_udp(DnsTransaction *t) { return r; if (t->current_feature_level < DNS_SERVER_FEATURE_LEVEL_UDP) - return -EAGAIN; + return -EAGAIN; /* Sorry, can't do UDP, try TCP! */ if (!dns_server_dnssec_supported(t->server) && dns_type_is_dnssec(t->key->type)) return -EOPNOTSUPP; From 1fdeaeb741bf06d1c2efe08c28b5f9d0a3a7a5ec Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 15 Feb 2017 19:54:32 +0100 Subject: [PATCH 11/14] resolved: show rcode in debug output for incoming replies This is the most important piece of information of replies, hence show this in the first log message about it. (Wireshark shows it too in the short summary, hence this definitely makes sense...) --- src/resolve/resolved-dns-transaction.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/resolve/resolved-dns-transaction.c b/src/resolve/resolved-dns-transaction.c index 7d969ff0b74..532169ff249 100644 --- a/src/resolve/resolved-dns-transaction.c +++ b/src/resolve/resolved-dns-transaction.c @@ -833,7 +833,7 @@ void dns_transaction_process_reply(DnsTransaction *t, DnsPacket *p) { * should hence not attempt to access the query or transaction * after calling this function. */ - log_debug("Processing incoming packet on transaction %" PRIu16".", t->id); + log_debug("Processing incoming packet on transaction %" PRIu16". (rcode=%s)", t->id, dns_rcode_to_string(DNS_PACKET_RCODE(p))); switch (t->scope->protocol) { From 2d4a4e141928f20035e83b59c44f465dbcc893df Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 15 Feb 2017 19:55:34 +0100 Subject: [PATCH 12/14] resolved: initialize all return values on successful exit of dns_cache_lookup() Following our coding style on success we should initialize all return parameters of a function. We missed to cases for dns_cache_lookup() (but covered all others), fix them too. --- src/resolve/resolved-dns-cache.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/resolve/resolved-dns-cache.c b/src/resolve/resolved-dns-cache.c index ee93147015a..46a25c2bee4 100644 --- a/src/resolve/resolved-dns-cache.c +++ b/src/resolve/resolved-dns-cache.c @@ -865,6 +865,8 @@ int dns_cache_lookup(DnsCache *c, DnsResourceKey *key, bool clamp_ttl, int *rcod *ret = NULL; *rcode = DNS_RCODE_SUCCESS; + *authenticated = false; + return 0; } @@ -879,6 +881,8 @@ int dns_cache_lookup(DnsCache *c, DnsResourceKey *key, bool clamp_ttl, int *rcod *ret = NULL; *rcode = DNS_RCODE_SUCCESS; + *authenticated = false; + return 0; } From 74a3ed740824a4ead40a733ee27f3ac236aa9f8e Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 15 Feb 2017 19:56:59 +0100 Subject: [PATCH 13/14] resolved: extend various timeouts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's increase a number of timeouts as they apparently are too short for some real-world lookups. See: https://github.com/systemd/systemd/issues/4003#issuecomment-279842616 In particular we change the following timeouts: 1) The first UDP retry we increase 500ms → 750ms. This is a good idea, since some servers need relatively long responses for trivial lookups, and giving up our first attempt also has the effect of trying a different server for the next attempt which has the side effect that we'll run two down-grade iterations in parallel, on both servers. Hence, let's give servers a bit more time in the first iteration. 2) Permit 24 retries instead of just 16 per transactions. If we end up downgrading all the way down to UDP for a lookup we already need 5 iterations for that. If we want permit a couple of lost packages for each (let's say 4), then we already need 20 iterations. 3) Increase the overall query timeout on the service side to 60s (from 45s), simply because very long and slow DNSSEC + CNAME chains (such as us.ynuf.alipay.com) hit this boundary too easily. The client side timeout for the bus method call is increased to 90s, in order to have room for the dbus reply to go through --- src/resolve/resolve-tool.c | 2 +- src/resolve/resolved-dns-query.c | 2 +- src/resolve/resolved-dns-server.c | 2 +- src/resolve/resolved-dns-transaction.h | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/resolve/resolve-tool.c b/src/resolve/resolve-tool.c index 3cec36817aa..32537ce6e8a 100644 --- a/src/resolve/resolve-tool.c +++ b/src/resolve/resolve-tool.c @@ -38,7 +38,7 @@ #include "strv.h" #include "terminal-util.h" -#define DNS_CALL_TIMEOUT_USEC (45*USEC_PER_SEC) +#define DNS_CALL_TIMEOUT_USEC (90*USEC_PER_SEC) static int arg_family = AF_UNSPEC; static int arg_ifindex = 0; diff --git a/src/resolve/resolved-dns-query.c b/src/resolve/resolved-dns-query.c index 0dfe9320b5d..2b091e6c45d 100644 --- a/src/resolve/resolved-dns-query.c +++ b/src/resolve/resolved-dns-query.c @@ -28,7 +28,7 @@ #include "string-util.h" /* How long to wait for the query in total */ -#define QUERY_TIMEOUT_USEC (30 * USEC_PER_SEC) +#define QUERY_TIMEOUT_USEC (60 * USEC_PER_SEC) #define CNAME_MAX 8 #define QUERIES_MAX 2048 diff --git a/src/resolve/resolved-dns-server.c b/src/resolve/resolved-dns-server.c index 2df917e61e1..5498f7b9cbd 100644 --- a/src/resolve/resolved-dns-server.c +++ b/src/resolve/resolved-dns-server.c @@ -28,7 +28,7 @@ #include "string-util.h" /* After how much time to repeat classic DNS requests */ -#define DNS_TIMEOUT_MIN_USEC (500 * USEC_PER_MSEC) +#define DNS_TIMEOUT_MIN_USEC (750 * USEC_PER_MSEC) #define DNS_TIMEOUT_MAX_USEC (5 * USEC_PER_SEC) /* The amount of time to wait before retrying with a full feature set */ diff --git a/src/resolve/resolved-dns-transaction.h b/src/resolve/resolved-dns-transaction.h index c4fb51958db..a8d97738efc 100644 --- a/src/resolve/resolved-dns-transaction.h +++ b/src/resolve/resolved-dns-transaction.h @@ -178,7 +178,7 @@ DnsTransactionSource dns_transaction_source_from_string(const char *s) _pure_; #define MDNS_PROBING_INTERVAL_USEC (250 * USEC_PER_MSEC) /* Maximum attempts to send DNS requests, across all DNS servers */ -#define DNS_TRANSACTION_ATTEMPTS_MAX 16 +#define DNS_TRANSACTION_ATTEMPTS_MAX 24 /* Maximum attempts to send LLMNR requests, see RFC 4795 Section 2.7 */ #define LLMNR_TRANSACTION_ATTEMPTS_MAX 3 From 6993d26469ca8deee0eea2b806b3c415deaa2e25 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 15 Feb 2017 20:05:27 +0100 Subject: [PATCH 14/14] resolved: try to authenticate SOA on negative replies For caching negative replies we need the SOA TTL information. Hence, let's authenticate all auxiliary SOA RRs through DS requests on all negative requests. --- src/resolve/resolved-dns-transaction.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/resolve/resolved-dns-transaction.c b/src/resolve/resolved-dns-transaction.c index 532169ff249..ecd70686836 100644 --- a/src/resolve/resolved-dns-transaction.c +++ b/src/resolve/resolved-dns-transaction.c @@ -2009,8 +2009,18 @@ int dns_transaction_request_dnssec_keys(DnsTransaction *t) { r = dns_resource_key_match_rr(t->key, rr, NULL); if (r < 0) return r; - if (r == 0) - continue; + if (r == 0) { + /* Hmm, so this SOA RR doesn't match our original question. In this case, maybe this is + * a negative reply, and we need the a SOA RR's TTL in order to cache a negative entry? + * If so, we need to validate it, too. */ + + r = dns_answer_match_key(t->answer, t->key, NULL); + if (r < 0) + return r; + if (r > 0) /* positive reply, we won't need the SOA and hence don't need to validate + * it. */ + continue; + } r = dnssec_has_rrsig(t->answer, rr->key); if (r < 0)