Dmitriy Matrenichev 53f5489130
fix: increase host dns packet ttl for pods
This PR fixes incorrect packet TTL if `forwardKubeDNSToHost` is enabled.

Credits go to Julian Wiedmann.
Closes #8698.

Signed-off-by: Dmitriy Matrenichev <dmitry.matrenichev@siderolabs.com>
2024-05-15 17:40:32 +03:00

446 lines
11 KiB
Go

// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
// Package dns provides dns server implementation.
package dns
import (
"context"
"errors"
"fmt"
"io"
"math/rand/v2"
"net"
"net/netip"
"slices"
"strings"
"sync"
"sync/atomic"
"syscall"
"time"
"github.com/coredns/coredns/plugin"
"github.com/coredns/coredns/plugin/cache"
"github.com/coredns/coredns/plugin/pkg/proxy"
"github.com/coredns/coredns/request"
"github.com/miekg/dns"
"go.uber.org/zap"
"golang.org/x/sys/unix"
)
// Cache is a [dns.Handler] to [plugin.Handler] adapter.
type Cache struct {
cache *cache.Cache
logger *zap.Logger
}
// NewCache creates a new Cache.
func NewCache(next plugin.Handler, l *zap.Logger) *Cache {
c := cache.NewCache("zones", "view")
c.Next = next
return &Cache{cache: c, logger: l}
}
// ServeDNS implements [dns.Handler].
func (c *Cache) ServeDNS(wr dns.ResponseWriter, msg *dns.Msg) {
_, err := c.cache.ServeDNS(context.Background(), wr, msg)
if err != nil {
// we should probably call newProxy.Healthcheck() if there are too many errors
c.logger.Warn("error serving dns request", zap.Error(err))
}
}
// Handler is a dns proxy selector.
type Handler struct {
mx sync.RWMutex
dests []*proxy.Proxy
logger *zap.Logger
}
// NewHandler creates a new Handler.
func NewHandler(logger *zap.Logger) *Handler {
return &Handler{
logger: logger,
}
}
// Name implements plugin.Handler.
func (h *Handler) Name() string {
return "Handler"
}
// ServeDNS implements plugin.Handler.
func (h *Handler) ServeDNS(ctx context.Context, wrt dns.ResponseWriter, msg *dns.Msg) (int, error) {
h.mx.RLock()
defer h.mx.RUnlock()
req := request.Request{W: wrt, Req: msg}
h.logger.Debug("dns request", zap.Stringer("data", msg))
upstreams := slices.Clone(h.dests)
if len(upstreams) == 0 {
emptyProxyErr := new(dns.Msg).SetRcode(req.Req, dns.RcodeServerFailure)
err := wrt.WriteMsg(emptyProxyErr)
if err != nil {
// We can't do much here, but at least log the error.
h.logger.Warn("failed to write 'no destination available' error dns response", zap.Error(err))
}
return dns.RcodeServerFailure, errors.New("no destination available")
}
rand.Shuffle(len(upstreams), func(i, j int) { upstreams[i], upstreams[j] = upstreams[j], upstreams[i] })
var (
resp *dns.Msg
err error
)
for _, ups := range upstreams {
resp, err = ups.Connect(ctx, req, proxy.Options{})
if errors.Is(err, proxy.ErrCachedClosed) { // Remote side closed conn, can only happen with TCP.
continue
}
if err == nil {
break
}
continue
}
if err != nil {
return dns.RcodeServerFailure, err
}
if !req.Match(resp) {
resp = new(dns.Msg).SetRcode(req.Req, dns.RcodeFormatError)
err = wrt.WriteMsg(resp)
if err != nil {
// We can't do much here, but at least log the error.
h.logger.Warn("failed to write non-matched response", zap.Error(err))
}
h.logger.Warn("dns response didn't match", zap.Stringer("data", resp))
return dns.RcodeFormatError, nil
}
err = wrt.WriteMsg(resp)
if err != nil {
// We can't do much here, but at least log the error.
h.logger.Warn("error writing dns response", zap.Error(err))
}
h.logger.Debug("dns response", zap.Stringer("data", resp))
return dns.RcodeSuccess, nil
}
// SetProxy sets destination dns proxy servers.
func (h *Handler) SetProxy(prxs []*proxy.Proxy) bool {
h.mx.Lock()
defer h.mx.Unlock()
if slices.Equal(h.dests, prxs) {
return false
}
h.dests = prxs
return true
}
// Stop stops and clears dns proxy selector.
func (h *Handler) Stop() { h.SetProxy(nil) }
// NewNodeHandler creates a new NodeHandler.
func NewNodeHandler(next plugin.Handler, hostMapper HostMapper, logger *zap.Logger) *NodeHandler {
return &NodeHandler{next: next, mapper: hostMapper, logger: logger}
}
// HostMapper is a name to node mapper.
type HostMapper interface {
ResolveAddr(ctx context.Context, qType uint16, name string) []netip.Addr
}
// NodeHandler try to resolve dns request to a node. If required node is not found, it will move to the next handler.
type NodeHandler struct {
next plugin.Handler
mapper HostMapper
logger *zap.Logger
enabled atomic.Bool
}
// Name implements plugin.Handler.
func (h *NodeHandler) Name() string {
return "NodeHandler"
}
// ServeDNS implements plugin.Handler.
func (h *NodeHandler) ServeDNS(ctx context.Context, wrt dns.ResponseWriter, msg *dns.Msg) (int, error) {
if !h.enabled.Load() {
return h.next.ServeDNS(ctx, wrt, msg)
}
idx := slices.IndexFunc(msg.Question, func(q dns.Question) bool { return q.Qtype == dns.TypeA || q.Qtype == dns.TypeAAAA })
if idx == -1 {
return h.next.ServeDNS(ctx, wrt, msg)
}
req := request.Request{W: wrt, Req: msg}
// Check if the request is for a node.
result := h.mapper.ResolveAddr(ctx, req.QType(), req.Name())
if len(result) == 0 {
return h.next.ServeDNS(ctx, wrt, msg)
}
resp := new(dns.Msg).SetReply(req.Req)
resp.Authoritative = true
resp.Answer = mapAnswers(result, req.Name())
err := wrt.WriteMsg(resp)
if err != nil {
// We can't do much here, but at least log the error.
h.logger.Warn("error writing dns response in node handler", zap.Error(err))
}
return dns.RcodeSuccess, nil
}
func mapAnswers(addrs []netip.Addr, name string) []dns.RR {
var result []dns.RR
for _, addr := range addrs {
switch {
case addr.Is4():
result = append(result, &dns.A{
Hdr: dns.RR_Header{
Name: name,
Rrtype: dns.TypeA,
Class: dns.ClassINET,
Ttl: nodeDNSResponseTTL,
},
A: addr.AsSlice(),
})
case addr.Is6():
result = append(result, &dns.AAAA{
Hdr: dns.RR_Header{
Name: name,
Rrtype: dns.TypeAAAA,
Class: dns.ClassINET,
Ttl: nodeDNSResponseTTL,
},
AAAA: addr.AsSlice(),
})
}
}
return result
}
const nodeDNSResponseTTL = 10
// SetEnabled sets the handler enabled state.
func (h *NodeHandler) SetEnabled(enabled bool) {
h.enabled.Store(enabled)
}
// ServerOptions is a Server options.
type ServerOptions struct {
Listener net.Listener
PacketConn net.PacketConn
Handler dns.Handler
ReadTimeout time.Duration
WriteTimeout time.Duration
IdleTimeout func() time.Duration
MaxTCPQueries int
Logger *zap.Logger
}
// NewServer creates a new Server.
func NewServer(opts ServerOptions) *Server {
return &Server{
srv: &dns.Server{
Listener: opts.Listener,
PacketConn: opts.PacketConn,
Handler: opts.Handler,
ReadTimeout: opts.ReadTimeout,
WriteTimeout: opts.WriteTimeout,
IdleTimeout: opts.IdleTimeout,
MaxTCPQueries: opts.MaxTCPQueries,
},
logger: opts.Logger,
}
}
// Server is a dns server.
type Server struct {
srv *dns.Server
logger *zap.Logger
}
// Start starts the dns server. Returns a function to stop the server.
func (s *Server) Start(onDone func(err error)) (stop func(), stopped <-chan struct{}) {
done := make(chan struct{})
fn := sync.OnceFunc(func() {
for {
err := s.srv.Shutdown()
if err != nil {
if strings.Contains(err.Error(), "server not started") {
// There a possible scenario where `go func()` not yet reached `ActivateAndServe` and yielded CPU
// time to another goroutine and then this closure reached `Shutdown`. In that case
// `ActivateAndServe` will actually start after `Shutdown` and this closure will block forever
// because `go func()` will never exit and close `done` channel.
continue
}
s.logger.Error("error shutting down dns server", zap.Error(err))
}
break
}
closer := io.Closer(s.srv.Listener)
if closer == nil {
closer = s.srv.PacketConn
}
if closer != nil {
err := closer.Close()
if err != nil && !errors.Is(err, net.ErrClosed) {
s.logger.Error("error closing dns server listener", zap.Error(err))
} else {
s.logger.Debug("dns server listener closed")
}
}
<-done
})
go func() {
defer close(done)
onDone(s.srv.ActivateAndServe())
}()
return fn, done
}
// NewTCPListener creates a new TCP listener.
func NewTCPListener(network, addr string, control ControlFn) (net.Listener, error) {
network, ok := networkNames[network]
if !ok {
return nil, fmt.Errorf("unsupported network: %s", network)
}
lc := net.ListenConfig{Control: control}
return lc.Listen(context.Background(), network, addr)
}
// NewUDPPacketConn creates a new UDP packet connection.
func NewUDPPacketConn(network, addr string, control ControlFn) (net.PacketConn, error) {
network, ok := networkNames[network]
if !ok {
return nil, fmt.Errorf("unsupported network: %s", network)
}
lc := net.ListenConfig{Control: control}
return lc.ListenPacket(context.Background(), network, addr)
}
// ControlFn is an alias to [net.ListenConfig.Control] function.
type ControlFn = func(string, string, syscall.RawConn) error
// MakeControl creates a control function for setting socket options.
func MakeControl(network string, forwardEnabled bool) (ControlFn, error) {
maxHops := 1
if forwardEnabled {
maxHops = 2
}
var options []controlOptions
switch network {
case "tcp", "tcp4":
options = []controlOptions{
{unix.IPPROTO_IP, unix.IP_RECVTTL, maxHops, "failed to set IP_RECVTTL"},
{unix.IPPROTO_TCP, unix.TCP_FASTOPEN, 5, "failed to set TCP_FASTOPEN"}, // tcp specific stuff from systemd
{unix.IPPROTO_TCP, unix.TCP_NODELAY, 1, "failed to set TCP_NODELAY"}, // tcp specific stuff from systemd
{unix.IPPROTO_IP, unix.IP_TTL, maxHops, "failed to set IP_TTL"},
}
case "tcp6":
options = []controlOptions{
{unix.IPPROTO_IPV6, unix.IPV6_RECVHOPLIMIT, maxHops, "failed to set IPV6_RECVHOPLIMIT"},
{unix.IPPROTO_TCP, unix.TCP_FASTOPEN, 5, "failed to set TCP_FASTOPEN"}, // tcp specific stuff from systemd
{unix.IPPROTO_TCP, unix.TCP_NODELAY, 1, "failed to set TCP_NODELAY"}, // tcp specific stuff from systemd
{unix.IPPROTO_IPV6, unix.IPV6_UNICAST_HOPS, maxHops, "failed to set IPV6_UNICAST_HOPS"},
}
case "udp", "udp4":
options = []controlOptions{
{unix.IPPROTO_IP, unix.IP_RECVTTL, maxHops, "failed to set IP_RECVTTL"},
{unix.IPPROTO_IP, unix.IP_TTL, maxHops, "failed to set IP_TTL"},
}
case "udp6":
options = []controlOptions{
{unix.IPPROTO_IPV6, unix.IPV6_RECVHOPLIMIT, maxHops, "failed to set IPV6_RECVHOPLIMIT"},
{unix.IPPROTO_IPV6, unix.IPV6_UNICAST_HOPS, maxHops, "failed to set IPV6_UNICAST_HOPS"},
}
default:
return nil, fmt.Errorf("unsupported network: %s", network)
}
return func(_ string, _ string, c syscall.RawConn) error {
var resErr error
err := c.Control(func(fd uintptr) {
for _, opt := range options {
opErr := unix.SetsockoptInt(int(fd), opt.level, opt.opt, opt.val)
if opErr != nil {
resErr = fmt.Errorf(opt.errorMessage+": %w", opErr)
return
}
}
})
if err != nil {
return fmt.Errorf("failed in control call: %w", err)
}
if resErr != nil {
return fmt.Errorf("failed to set socket options: %w", resErr)
}
return nil
}, nil
}
type controlOptions struct {
level int
opt int
val int
errorMessage string
}
var networkNames = map[string]string{
"tcp": "tcp4",
"tcp4": "tcp4",
"tcp6": "tcp6",
"udp": "udp4",
"udp4": "udp4",
"udp6": "udp6",
}