mirror of
https://github.com/systemd/systemd-stable.git
synced 2025-02-12 21:57:27 +03:00
-1 was used everywhere, but -EBADF or -EBADFD started being used in various places. Let's make things consistent in the new style. Note that there are two candidates: EBADF 9 Bad file descriptor EBADFD 77 File descriptor in bad state Since we're initializating the fd, we're just assigning a value that means "no fd yet", so it's just a bad file descriptor, and the first errno fits better. If instead we had a valid file descriptor that became invalid because of some operation or state change, the other errno would fit better. In some places, initialization is dropped if unnecessary.
596 lines
21 KiB
C
596 lines
21 KiB
C
/* SPDX-License-Identifier: LGPL-2.1-or-later */
|
|
|
|
#include <netinet/tcp.h>
|
|
#include <unistd.h>
|
|
|
|
#include "alloc-util.h"
|
|
#include "fd-util.h"
|
|
#include "io-util.h"
|
|
#include "macro.h"
|
|
#include "missing_network.h"
|
|
#include "resolved-dns-stream.h"
|
|
#include "resolved-manager.h"
|
|
|
|
#define DNS_STREAMS_MAX 128
|
|
|
|
#define DNS_QUERIES_PER_STREAM 32
|
|
|
|
static void dns_stream_stop(DnsStream *s) {
|
|
assert(s);
|
|
|
|
s->io_event_source = sd_event_source_disable_unref(s->io_event_source);
|
|
s->timeout_event_source = sd_event_source_disable_unref(s->timeout_event_source);
|
|
s->fd = safe_close(s->fd);
|
|
|
|
/* Disconnect us from the server object if we are now not usable anymore */
|
|
dns_stream_detach(s);
|
|
}
|
|
|
|
static int dns_stream_update_io(DnsStream *s) {
|
|
uint32_t f = 0;
|
|
|
|
assert(s);
|
|
|
|
if (s->write_packet && s->n_written < sizeof(s->write_size) + s->write_packet->size)
|
|
f |= EPOLLOUT;
|
|
else if (!ordered_set_isempty(s->write_queue)) {
|
|
dns_packet_unref(s->write_packet);
|
|
s->write_packet = ordered_set_steal_first(s->write_queue);
|
|
s->write_size = htobe16(s->write_packet->size);
|
|
s->n_written = 0;
|
|
f |= EPOLLOUT;
|
|
}
|
|
|
|
/* Let's read a packet if we haven't queued any yet. Except if we already hit a limit of parallel
|
|
* queries for this connection. */
|
|
if ((!s->read_packet || s->n_read < sizeof(s->read_size) + s->read_packet->size) &&
|
|
set_size(s->queries) < DNS_QUERIES_PER_STREAM)
|
|
f |= EPOLLIN;
|
|
|
|
s->requested_events = f;
|
|
|
|
#if ENABLE_DNS_OVER_TLS
|
|
/* For handshake and clean closing purposes, TLS can override requested events */
|
|
if (s->dnstls_events != 0)
|
|
f = s->dnstls_events;
|
|
#endif
|
|
|
|
return sd_event_source_set_io_events(s->io_event_source, f);
|
|
}
|
|
|
|
static int dns_stream_complete(DnsStream *s, int error) {
|
|
_cleanup_(dns_stream_unrefp) _unused_ DnsStream *ref = dns_stream_ref(s); /* Protect stream while we process it */
|
|
|
|
assert(s);
|
|
assert(error >= 0);
|
|
|
|
/* Error is > 0 when the connection failed for some reason in the network stack. It's == 0 if we sent
|
|
* and received exactly one packet each (in the LLMNR client case). */
|
|
|
|
#if ENABLE_DNS_OVER_TLS
|
|
if (s->encrypted) {
|
|
int r;
|
|
|
|
r = dnstls_stream_shutdown(s, error);
|
|
if (r != -EAGAIN)
|
|
dns_stream_stop(s);
|
|
} else
|
|
#endif
|
|
dns_stream_stop(s);
|
|
|
|
dns_stream_detach(s);
|
|
|
|
if (s->complete)
|
|
s->complete(s, error);
|
|
else /* the default action if no completion function is set is to close the stream */
|
|
dns_stream_unref(s);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int dns_stream_identify(DnsStream *s) {
|
|
CMSG_BUFFER_TYPE(CMSG_SPACE(MAXSIZE(struct in_pktinfo, struct in6_pktinfo))
|
|
+ CMSG_SPACE(int) + /* for the TTL */
|
|
+ EXTRA_CMSG_SPACE /* kernel appears to require extra space */) control;
|
|
struct msghdr mh = {};
|
|
struct cmsghdr *cmsg;
|
|
socklen_t sl;
|
|
int r;
|
|
|
|
assert(s);
|
|
|
|
if (s->identified)
|
|
return 0;
|
|
|
|
/* Query the local side */
|
|
s->local_salen = sizeof(s->local);
|
|
r = getsockname(s->fd, &s->local.sa, &s->local_salen);
|
|
if (r < 0)
|
|
return -errno;
|
|
if (s->local.sa.sa_family == AF_INET6 && s->ifindex <= 0)
|
|
s->ifindex = s->local.in6.sin6_scope_id;
|
|
|
|
/* Query the remote side */
|
|
s->peer_salen = sizeof(s->peer);
|
|
r = getpeername(s->fd, &s->peer.sa, &s->peer_salen);
|
|
if (r < 0)
|
|
return -errno;
|
|
if (s->peer.sa.sa_family == AF_INET6 && s->ifindex <= 0)
|
|
s->ifindex = s->peer.in6.sin6_scope_id;
|
|
|
|
/* Check consistency */
|
|
assert(s->peer.sa.sa_family == s->local.sa.sa_family);
|
|
assert(IN_SET(s->peer.sa.sa_family, AF_INET, AF_INET6));
|
|
|
|
/* Query connection meta information */
|
|
sl = sizeof(control);
|
|
if (s->peer.sa.sa_family == AF_INET) {
|
|
r = getsockopt(s->fd, IPPROTO_IP, IP_PKTOPTIONS, &control, &sl);
|
|
if (r < 0)
|
|
return -errno;
|
|
} else if (s->peer.sa.sa_family == AF_INET6) {
|
|
|
|
r = getsockopt(s->fd, IPPROTO_IPV6, IPV6_2292PKTOPTIONS, &control, &sl);
|
|
if (r < 0)
|
|
return -errno;
|
|
} else
|
|
return -EAFNOSUPPORT;
|
|
|
|
mh.msg_control = &control;
|
|
mh.msg_controllen = sl;
|
|
|
|
CMSG_FOREACH(cmsg, &mh) {
|
|
|
|
if (cmsg->cmsg_level == IPPROTO_IPV6) {
|
|
assert(s->peer.sa.sa_family == AF_INET6);
|
|
|
|
switch (cmsg->cmsg_type) {
|
|
|
|
case IPV6_PKTINFO: {
|
|
struct in6_pktinfo *i = (struct in6_pktinfo*) CMSG_DATA(cmsg);
|
|
|
|
if (s->ifindex <= 0)
|
|
s->ifindex = i->ipi6_ifindex;
|
|
break;
|
|
}
|
|
|
|
case IPV6_HOPLIMIT:
|
|
s->ttl = *(int *) CMSG_DATA(cmsg);
|
|
break;
|
|
}
|
|
|
|
} else if (cmsg->cmsg_level == IPPROTO_IP) {
|
|
assert(s->peer.sa.sa_family == AF_INET);
|
|
|
|
switch (cmsg->cmsg_type) {
|
|
|
|
case IP_PKTINFO: {
|
|
struct in_pktinfo *i = (struct in_pktinfo*) CMSG_DATA(cmsg);
|
|
|
|
if (s->ifindex <= 0)
|
|
s->ifindex = i->ipi_ifindex;
|
|
break;
|
|
}
|
|
|
|
case IP_TTL:
|
|
s->ttl = *(int *) CMSG_DATA(cmsg);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* The Linux kernel sets the interface index to the loopback
|
|
* device if the connection came from the local host since it
|
|
* avoids the routing table in such a case. Let's unset the
|
|
* interface index in such a case. */
|
|
if (s->ifindex == LOOPBACK_IFINDEX)
|
|
s->ifindex = 0;
|
|
|
|
/* If we don't know the interface index still, we look for the
|
|
* first local interface with a matching address. Yuck! */
|
|
if (s->ifindex <= 0)
|
|
s->ifindex = manager_find_ifindex(s->manager, s->local.sa.sa_family, sockaddr_in_addr(&s->local.sa));
|
|
|
|
if (s->protocol == DNS_PROTOCOL_LLMNR && s->ifindex > 0) {
|
|
/* Make sure all packets for this connection are sent on the same interface */
|
|
r = socket_set_unicast_if(s->fd, s->local.sa.sa_family, s->ifindex);
|
|
if (r < 0)
|
|
log_debug_errno(errno, "Failed to invoke IP_UNICAST_IF/IPV6_UNICAST_IF: %m");
|
|
}
|
|
|
|
s->identified = true;
|
|
|
|
return 0;
|
|
}
|
|
|
|
ssize_t dns_stream_writev(DnsStream *s, const struct iovec *iov, size_t iovcnt, int flags) {
|
|
ssize_t m;
|
|
|
|
assert(s);
|
|
assert(iov);
|
|
|
|
#if ENABLE_DNS_OVER_TLS
|
|
if (s->encrypted && !(flags & DNS_STREAM_WRITE_TLS_DATA))
|
|
return dnstls_stream_writev(s, iov, iovcnt);
|
|
#endif
|
|
|
|
if (s->tfo_salen > 0) {
|
|
struct msghdr hdr = {
|
|
.msg_iov = (struct iovec*) iov,
|
|
.msg_iovlen = iovcnt,
|
|
.msg_name = &s->tfo_address.sa,
|
|
.msg_namelen = s->tfo_salen
|
|
};
|
|
|
|
m = sendmsg(s->fd, &hdr, MSG_FASTOPEN);
|
|
if (m < 0) {
|
|
if (errno == EOPNOTSUPP) {
|
|
s->tfo_salen = 0;
|
|
if (connect(s->fd, &s->tfo_address.sa, s->tfo_salen) < 0)
|
|
return -errno;
|
|
|
|
return -EAGAIN;
|
|
}
|
|
if (errno == EINPROGRESS)
|
|
return -EAGAIN;
|
|
|
|
return -errno;
|
|
} else
|
|
s->tfo_salen = 0; /* connection is made */
|
|
} else {
|
|
m = writev(s->fd, iov, iovcnt);
|
|
if (m < 0)
|
|
return -errno;
|
|
}
|
|
|
|
return m;
|
|
}
|
|
|
|
static ssize_t dns_stream_read(DnsStream *s, void *buf, size_t count) {
|
|
ssize_t ss;
|
|
|
|
#if ENABLE_DNS_OVER_TLS
|
|
if (s->encrypted)
|
|
ss = dnstls_stream_read(s, buf, count);
|
|
else
|
|
#endif
|
|
{
|
|
ss = read(s->fd, buf, count);
|
|
if (ss < 0)
|
|
return -errno;
|
|
}
|
|
|
|
return ss;
|
|
}
|
|
|
|
static int on_stream_timeout(sd_event_source *es, usec_t usec, void *userdata) {
|
|
DnsStream *s = ASSERT_PTR(userdata);
|
|
|
|
return dns_stream_complete(s, ETIMEDOUT);
|
|
}
|
|
|
|
static DnsPacket *dns_stream_take_read_packet(DnsStream *s) {
|
|
assert(s);
|
|
|
|
/* Note, dns_stream_update() should be called after this is called. When this is called, the
|
|
* stream may be already full and the EPOLLIN flag is dropped from the stream IO event source.
|
|
* Even this makes a room to read in the stream, this does not call dns_stream_update(), hence
|
|
* EPOLLIN flag is not set automatically. So, to read further packets from the stream,
|
|
* dns_stream_update() must be called explicitly. Currently, this is only called from
|
|
* on_stream_io(), and there dns_stream_update() is called. */
|
|
|
|
if (!s->read_packet)
|
|
return NULL;
|
|
|
|
if (s->n_read < sizeof(s->read_size))
|
|
return NULL;
|
|
|
|
if (s->n_read < sizeof(s->read_size) + be16toh(s->read_size))
|
|
return NULL;
|
|
|
|
s->n_read = 0;
|
|
return TAKE_PTR(s->read_packet);
|
|
}
|
|
|
|
static int on_stream_io(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
|
|
_cleanup_(dns_stream_unrefp) DnsStream *s = dns_stream_ref(userdata); /* Protect stream while we process it */
|
|
bool progressed = false;
|
|
int r;
|
|
|
|
assert(s);
|
|
|
|
#if ENABLE_DNS_OVER_TLS
|
|
if (s->encrypted) {
|
|
r = dnstls_stream_on_io(s, revents);
|
|
if (r == DNSTLS_STREAM_CLOSED)
|
|
return 0;
|
|
if (r == -EAGAIN)
|
|
return dns_stream_update_io(s);
|
|
if (r < 0)
|
|
return dns_stream_complete(s, -r);
|
|
|
|
r = dns_stream_update_io(s);
|
|
if (r < 0)
|
|
return r;
|
|
}
|
|
#endif
|
|
|
|
/* only identify after connecting */
|
|
if (s->tfo_salen == 0) {
|
|
r = dns_stream_identify(s);
|
|
if (r < 0)
|
|
return dns_stream_complete(s, -r);
|
|
}
|
|
|
|
if ((revents & EPOLLOUT) &&
|
|
s->write_packet &&
|
|
s->n_written < sizeof(s->write_size) + s->write_packet->size) {
|
|
|
|
struct iovec iov[] = {
|
|
IOVEC_MAKE(&s->write_size, sizeof(s->write_size)),
|
|
IOVEC_MAKE(DNS_PACKET_DATA(s->write_packet), s->write_packet->size),
|
|
};
|
|
|
|
IOVEC_INCREMENT(iov, ELEMENTSOF(iov), s->n_written);
|
|
|
|
ssize_t ss = dns_stream_writev(s, iov, ELEMENTSOF(iov), 0);
|
|
if (ss < 0) {
|
|
if (!ERRNO_IS_TRANSIENT(ss))
|
|
return dns_stream_complete(s, -ss);
|
|
} else {
|
|
progressed = true;
|
|
s->n_written += ss;
|
|
}
|
|
|
|
/* Are we done? If so, disable the event source for EPOLLOUT */
|
|
if (s->n_written >= sizeof(s->write_size) + s->write_packet->size) {
|
|
r = dns_stream_update_io(s);
|
|
if (r < 0)
|
|
return dns_stream_complete(s, -r);
|
|
}
|
|
}
|
|
|
|
while ((revents & (EPOLLIN|EPOLLHUP|EPOLLRDHUP)) &&
|
|
(!s->read_packet ||
|
|
s->n_read < sizeof(s->read_size) + s->read_packet->size)) {
|
|
|
|
if (s->n_read < sizeof(s->read_size)) {
|
|
ssize_t ss;
|
|
|
|
ss = dns_stream_read(s, (uint8_t*) &s->read_size + s->n_read, sizeof(s->read_size) - s->n_read);
|
|
if (ss < 0) {
|
|
if (!ERRNO_IS_TRANSIENT(ss))
|
|
return dns_stream_complete(s, -ss);
|
|
break;
|
|
} else if (ss == 0)
|
|
return dns_stream_complete(s, ECONNRESET);
|
|
else {
|
|
progressed = true;
|
|
s->n_read += ss;
|
|
}
|
|
}
|
|
|
|
if (s->n_read >= sizeof(s->read_size)) {
|
|
|
|
if (be16toh(s->read_size) < DNS_PACKET_HEADER_SIZE)
|
|
return dns_stream_complete(s, EBADMSG);
|
|
|
|
if (s->n_read < sizeof(s->read_size) + be16toh(s->read_size)) {
|
|
ssize_t ss;
|
|
|
|
if (!s->read_packet) {
|
|
r = dns_packet_new(&s->read_packet, s->protocol, be16toh(s->read_size), DNS_PACKET_SIZE_MAX);
|
|
if (r < 0)
|
|
return dns_stream_complete(s, -r);
|
|
|
|
s->read_packet->size = be16toh(s->read_size);
|
|
s->read_packet->ipproto = IPPROTO_TCP;
|
|
s->read_packet->family = s->peer.sa.sa_family;
|
|
s->read_packet->ttl = s->ttl;
|
|
s->read_packet->ifindex = s->ifindex;
|
|
s->read_packet->timestamp = now(CLOCK_BOOTTIME);
|
|
|
|
if (s->read_packet->family == AF_INET) {
|
|
s->read_packet->sender.in = s->peer.in.sin_addr;
|
|
s->read_packet->sender_port = be16toh(s->peer.in.sin_port);
|
|
s->read_packet->destination.in = s->local.in.sin_addr;
|
|
s->read_packet->destination_port = be16toh(s->local.in.sin_port);
|
|
} else {
|
|
assert(s->read_packet->family == AF_INET6);
|
|
s->read_packet->sender.in6 = s->peer.in6.sin6_addr;
|
|
s->read_packet->sender_port = be16toh(s->peer.in6.sin6_port);
|
|
s->read_packet->destination.in6 = s->local.in6.sin6_addr;
|
|
s->read_packet->destination_port = be16toh(s->local.in6.sin6_port);
|
|
|
|
if (s->read_packet->ifindex == 0)
|
|
s->read_packet->ifindex = s->peer.in6.sin6_scope_id;
|
|
if (s->read_packet->ifindex == 0)
|
|
s->read_packet->ifindex = s->local.in6.sin6_scope_id;
|
|
}
|
|
}
|
|
|
|
ss = dns_stream_read(s,
|
|
(uint8_t*) DNS_PACKET_DATA(s->read_packet) + s->n_read - sizeof(s->read_size),
|
|
sizeof(s->read_size) + be16toh(s->read_size) - s->n_read);
|
|
if (ss < 0) {
|
|
if (!ERRNO_IS_TRANSIENT(ss))
|
|
return dns_stream_complete(s, -ss);
|
|
break;
|
|
} else if (ss == 0)
|
|
return dns_stream_complete(s, ECONNRESET);
|
|
else
|
|
s->n_read += ss;
|
|
}
|
|
|
|
/* Are we done? If so, call the packet handler and re-enable EPOLLIN for the
|
|
* event source if necessary. */
|
|
_cleanup_(dns_packet_unrefp) DnsPacket *p = dns_stream_take_read_packet(s);
|
|
if (p) {
|
|
assert(s->on_packet);
|
|
r = s->on_packet(s, p);
|
|
if (r < 0)
|
|
return r;
|
|
|
|
r = dns_stream_update_io(s);
|
|
if (r < 0)
|
|
return dns_stream_complete(s, -r);
|
|
|
|
s->packet_received = true;
|
|
|
|
/* If we just disabled the read event, stop reading */
|
|
if (!FLAGS_SET(s->requested_events, EPOLLIN))
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Complete the stream if finished reading and writing one packet, and there's nothing
|
|
* else left to write. */
|
|
if (s->type == DNS_STREAM_LLMNR_SEND && s->packet_received &&
|
|
!FLAGS_SET(s->requested_events, EPOLLOUT))
|
|
return dns_stream_complete(s, 0);
|
|
|
|
/* If we did something, let's restart the timeout event source */
|
|
if (progressed && s->timeout_event_source) {
|
|
r = sd_event_source_set_time_relative(s->timeout_event_source, DNS_STREAM_ESTABLISHED_TIMEOUT_USEC);
|
|
if (r < 0)
|
|
log_warning_errno(errno, "Couldn't restart TCP connection timeout, ignoring: %m");
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static DnsStream *dns_stream_free(DnsStream *s) {
|
|
DnsPacket *p;
|
|
|
|
assert(s);
|
|
|
|
dns_stream_stop(s);
|
|
|
|
if (s->manager) {
|
|
LIST_REMOVE(streams, s->manager->dns_streams, s);
|
|
s->manager->n_dns_streams[s->type]--;
|
|
}
|
|
|
|
#if ENABLE_DNS_OVER_TLS
|
|
if (s->encrypted)
|
|
dnstls_stream_free(s);
|
|
#endif
|
|
|
|
ORDERED_SET_FOREACH(p, s->write_queue)
|
|
dns_packet_unref(ordered_set_remove(s->write_queue, p));
|
|
|
|
dns_packet_unref(s->write_packet);
|
|
dns_packet_unref(s->read_packet);
|
|
dns_server_unref(s->server);
|
|
|
|
ordered_set_free(s->write_queue);
|
|
|
|
return mfree(s);
|
|
}
|
|
|
|
DEFINE_TRIVIAL_REF_UNREF_FUNC(DnsStream, dns_stream, dns_stream_free);
|
|
|
|
int dns_stream_new(
|
|
Manager *m,
|
|
DnsStream **ret,
|
|
DnsStreamType type,
|
|
DnsProtocol protocol,
|
|
int fd,
|
|
const union sockaddr_union *tfo_address,
|
|
int (on_packet)(DnsStream*, DnsPacket*),
|
|
int (complete)(DnsStream*, int), /* optional */
|
|
usec_t connect_timeout_usec) {
|
|
|
|
_cleanup_(dns_stream_unrefp) DnsStream *s = NULL;
|
|
int r;
|
|
|
|
assert(m);
|
|
assert(ret);
|
|
assert(type >= 0);
|
|
assert(type < _DNS_STREAM_TYPE_MAX);
|
|
assert(protocol >= 0);
|
|
assert(protocol < _DNS_PROTOCOL_MAX);
|
|
assert(fd >= 0);
|
|
assert(on_packet);
|
|
|
|
if (m->n_dns_streams[type] > DNS_STREAMS_MAX)
|
|
return -EBUSY;
|
|
|
|
s = new(DnsStream, 1);
|
|
if (!s)
|
|
return -ENOMEM;
|
|
|
|
*s = (DnsStream) {
|
|
.n_ref = 1,
|
|
.fd = -EBADF,
|
|
.protocol = protocol,
|
|
.type = type,
|
|
};
|
|
|
|
r = ordered_set_ensure_allocated(&s->write_queue, &dns_packet_hash_ops);
|
|
if (r < 0)
|
|
return r;
|
|
|
|
r = sd_event_add_io(m->event, &s->io_event_source, fd, EPOLLIN, on_stream_io, s);
|
|
if (r < 0)
|
|
return r;
|
|
|
|
(void) sd_event_source_set_description(s->io_event_source, "dns-stream-io");
|
|
|
|
r = sd_event_add_time_relative(
|
|
m->event,
|
|
&s->timeout_event_source,
|
|
CLOCK_BOOTTIME,
|
|
connect_timeout_usec, 0,
|
|
on_stream_timeout, s);
|
|
if (r < 0)
|
|
return r;
|
|
|
|
(void) sd_event_source_set_description(s->timeout_event_source, "dns-stream-timeout");
|
|
|
|
LIST_PREPEND(streams, m->dns_streams, s);
|
|
m->n_dns_streams[type]++;
|
|
s->manager = m;
|
|
|
|
s->fd = fd;
|
|
s->on_packet = on_packet;
|
|
s->complete = complete;
|
|
|
|
if (tfo_address) {
|
|
s->tfo_address = *tfo_address;
|
|
s->tfo_salen = tfo_address->sa.sa_family == AF_INET6 ? sizeof(tfo_address->in6) : sizeof(tfo_address->in);
|
|
}
|
|
|
|
*ret = TAKE_PTR(s);
|
|
|
|
return 0;
|
|
}
|
|
|
|
int dns_stream_write_packet(DnsStream *s, DnsPacket *p) {
|
|
int r;
|
|
|
|
assert(s);
|
|
assert(p);
|
|
|
|
r = ordered_set_put(s->write_queue, p);
|
|
if (r < 0)
|
|
return r;
|
|
|
|
dns_packet_ref(p);
|
|
|
|
return dns_stream_update_io(s);
|
|
}
|
|
|
|
void dns_stream_detach(DnsStream *s) {
|
|
assert(s);
|
|
|
|
if (!s->server)
|
|
return;
|
|
|
|
if (s->server->stream != s)
|
|
return;
|
|
|
|
dns_server_unref_stream(s->server);
|
|
}
|