fix: don't wait for the hostname in maintenance mode

Fixes #6119

With new stable default hostname feature, any default hostname is
disabled until the machine config is available.

Talos enters maintenance mode when the default config source is empty,
so it doesn't have any machine config available at the moment
maintenance service is started.

Hostname might be set via different sources, e.g. kernel args or via
DHCP before the machine config is available, but if all these sources
are not available, hostname won't be set at all.

This stops waiting for the hostname, and skips setting any DNS names in
the maintenance mode certificate SANs if the hostname is not available.

Also adds a regression test via new `--disable-dhcp-hostname` flag to
`talosctl cluster create`.

Signed-off-by: Andrey Smirnov <andrey.smirnov@talos-systems.com>
This commit is contained in:
Andrey Smirnov 2022-08-23 00:36:10 +04:00
parent b15a639246
commit 2f2d97b6b5
No known key found for this signature in database
GPG Key ID: 7B26396447AB6DFD
9 changed files with 99 additions and 31 deletions

View File

@ -397,6 +397,14 @@ local integration_kubespan = Step("e2e-kubespan", target="e2e-qemu", privileged=
"IMAGE_REGISTRY": local_registry,
"WITH_CONFIG_PATCH": '[{"op": "replace", "path": "/cluster/discovery/registries/kubernetes/disabled", "value": false}]', # use Kubernetes discovery backend
});
local integration_default_hostname = Step("e2e-default-hostname", target="e2e-qemu", privileged=true, depends_on=[integration_kubespan], environment={
# regression test: make sure Talos works in maintenance mode when no hostname is set
"SHORT_INTEGRATION_TEST": "yes",
"IMAGE_REGISTRY": local_registry,
"VIA_MAINTENANCE_MODE": "true",
"DISABLE_DHCP_HOSTNAME": "true",
});
local integration_qemu_encrypted_vip = Step("e2e-encrypted-vip", target="e2e-qemu", privileged=true, depends_on=[load_artifacts], environment={
"WITH_DISK_ENCRYPTION": "true",
"WITH_VIRTUAL_IP": "true",
@ -452,7 +460,7 @@ local integration_pipelines = [
Pipeline('integration-provision-1', default_pipeline_steps + [integration_provision_tests_prepare, integration_provision_tests_track_1]) + integration_trigger(['integration-provision', 'integration-provision-1']),
Pipeline('integration-provision-2', default_pipeline_steps + [integration_provision_tests_prepare, integration_provision_tests_track_2]) + integration_trigger(['integration-provision', 'integration-provision-2']),
Pipeline('integration-misc', default_pipeline_steps + [integration_extensions
, integration_cilium, integration_bios, integration_disk_image, integration_control_plane_port, integration_no_cluster_discovery, integration_kubespan]) + integration_trigger(['integration-misc']),
, integration_cilium, integration_bios, integration_disk_image, integration_control_plane_port, integration_no_cluster_discovery, integration_kubespan, integration_default_hostname]) + integration_trigger(['integration-misc']),
Pipeline('integration-qemu-encrypted-vip', default_pipeline_steps + [integration_qemu_encrypted_vip]) + integration_trigger(['integration-qemu-encrypted-vip']),
Pipeline('integration-qemu-race', default_pipeline_steps + [build_race, integration_qemu_race]) + integration_trigger(['integration-qemu-race']),
Pipeline('integration-qemu-csi', default_pipeline_steps + [integration_qemu_csi]) + integration_trigger(['integration-qemu-csi']),
@ -464,7 +472,7 @@ local integration_pipelines = [
Pipeline('cron-integration-provision-1', default_pipeline_steps + [integration_provision_tests_prepare, integration_provision_tests_track_1], [default_cron_pipeline]) + cron_trigger(['thrice-daily', 'nightly']),
Pipeline('cron-integration-provision-2', default_pipeline_steps + [integration_provision_tests_prepare, integration_provision_tests_track_2], [default_cron_pipeline]) + cron_trigger(['thrice-daily', 'nightly']),
Pipeline('cron-integration-misc', default_pipeline_steps + [integration_extensions
, integration_cilium, integration_bios, integration_disk_image, integration_control_plane_port, integration_no_cluster_discovery, integration_kubespan], [default_cron_pipeline]) + cron_trigger(['thrice-daily', 'nightly']),
, integration_cilium, integration_bios, integration_disk_image, integration_control_plane_port, integration_no_cluster_discovery, integration_kubespan, integration_default_hostname], [default_cron_pipeline]) + cron_trigger(['thrice-daily', 'nightly']),
Pipeline('cron-integration-qemu-encrypted-vip', default_pipeline_steps + [integration_qemu_encrypted_vip], [default_cron_pipeline]) + cron_trigger(['thrice-daily', 'nightly']),
Pipeline('cron-integration-qemu-race', default_pipeline_steps + [build_race, integration_qemu_race], [default_cron_pipeline]) + cron_trigger(['nightly']),
Pipeline('cron-integration-qemu-csi', default_pipeline_steps + [integration_qemu_csi], [default_cron_pipeline]) + cron_trigger(['nightly']),

View File

@ -71,11 +71,6 @@ const (
networkMTUFlag = "mtu"
networkCIDRFlag = "cidr"
nameserversFlag = "nameservers"
cniBinPathFlag = "cni-bin-path"
cniConfDirFlag = "cni-conf-dir"
cniCacheDirFlag = "cni-cache-dir"
cniBundleURLFlag = "cni-bundle-url"
dockerDisableIPv6Flag = "docker-disable-ipv6"
clusterDiskSizeFlag = "disk"
clusterDisksFlag = "user-disk"
customCNIUrlFlag = "custom-cni-url"
@ -152,6 +147,7 @@ var (
extraBootKernelArgs string
dockerDisableIPv6 bool
controlPlanePort int
dhcpSkipHostname bool
)
// createCmd represents the cluster up command.
@ -287,6 +283,7 @@ func create(ctx context.Context, flags *pflag.FlagSet) (err error) {
BundleURL: cniBundleURL,
},
DHCPSkipHostname: dhcpSkipHostname,
DockerDisableIPv6: dockerDisableIPv6,
},
@ -895,10 +892,10 @@ func init() {
createCmd.Flags().StringVar(&forceEndpoint, forceEndpointFlag, "", "use endpoint instead of provider defaults")
createCmd.Flags().StringVar(&kubernetesVersion, "kubernetes-version", constants.DefaultKubernetesVersion, "desired kubernetes version to run")
createCmd.Flags().StringVarP(&inputDir, inputDirFlag, "i", "", "location of pre-generated config files")
createCmd.Flags().StringSliceVar(&cniBinPath, cniBinPathFlag, []string{filepath.Join(defaultCNIDir, "bin")}, "search path for CNI binaries (VM only)")
createCmd.Flags().StringVar(&cniConfDir, cniConfDirFlag, filepath.Join(defaultCNIDir, "conf.d"), "CNI config directory path (VM only)")
createCmd.Flags().StringVar(&cniCacheDir, cniCacheDirFlag, filepath.Join(defaultCNIDir, "cache"), "CNI cache directory path (VM only)")
createCmd.Flags().StringVar(&cniBundleURL, cniBundleURLFlag, fmt.Sprintf("https://github.com/%s/talos/releases/download/%s/talosctl-cni-bundle-%s.tar.gz",
createCmd.Flags().StringSliceVar(&cniBinPath, "cni-bin-path", []string{filepath.Join(defaultCNIDir, "bin")}, "search path for CNI binaries (VM only)")
createCmd.Flags().StringVar(&cniConfDir, "cni-conf-dir", filepath.Join(defaultCNIDir, "conf.d"), "CNI config directory path (VM only)")
createCmd.Flags().StringVar(&cniCacheDir, "cni-cache-dir", filepath.Join(defaultCNIDir, "cache"), "CNI cache directory path (VM only)")
createCmd.Flags().StringVar(&cniBundleURL, "cni-bundle-url", fmt.Sprintf("https://github.com/%s/talos/releases/download/%s/talosctl-cni-bundle-%s.tar.gz",
images.Username, trimVersion(version.Tag), constants.ArchVariable), "URL to download CNI bundle from (VM only)")
createCmd.Flags().StringVarP(&ports,
"exposed-ports",
@ -924,8 +921,9 @@ func init() {
createCmd.Flags().StringArrayVar(&configPatchWorker, "config-patch-worker", nil, "patch generated machineconfigs (applied to 'worker' type)")
createCmd.Flags().BoolVar(&badRTC, "bad-rtc", false, "launch VM with bad RTC state (QEMU only)")
createCmd.Flags().StringVar(&extraBootKernelArgs, "extra-boot-kernel-args", "", "add extra kernel args to the initial boot from vmlinuz and initramfs (QEMU only)")
createCmd.Flags().BoolVar(&dockerDisableIPv6, dockerDisableIPv6Flag, false, "skip enabling IPv6 in containers (Docker only)")
createCmd.Flags().BoolVar(&dockerDisableIPv6, "docker-disable-ipv6", false, "skip enabling IPv6 in containers (Docker only)")
createCmd.Flags().IntVar(&controlPlanePort, controlPlanePortFlag, constants.DefaultControlPlanePort, "control plane port (load balancer and local API port)")
createCmd.Flags().BoolVar(&dhcpSkipHostname, "disable-dhcp-hostname", false, "skip announcing hostname via DHCP (QEMU only)")
Cmd.AddCommand(createCmd)
}
@ -944,11 +942,6 @@ func checkForDefinedGenFlag(flags *pflag.FlagSet) string {
networkMTUFlag,
networkCIDRFlag,
nameserversFlag,
cniBinPathFlag,
cniConfDirFlag,
cniCacheDirFlag,
cniBundleURLFlag,
dockerDisableIPv6Flag,
clusterDiskSizeFlag,
clusterDisksFlag,
customCNIUrlFlag,

View File

@ -63,6 +63,23 @@ case "${WITH_CONTROL_PLANE_PORT:-false}" in
;;
esac
case "${VIA_MAINTENANCE_MODE:-false}" in
false)
;;
*)
# apply config via maintenance mode
QEMU_FLAGS="${QEMU_FLAGS} --skip-injecting-config --with-apply-config"
;;
esac
case "${DISABLE_DHCP_HOSTNAME:-false}" in
false)
;;
*)
QEMU_FLAGS="${QEMU_FLAGS} --disable-dhcp-hostname"
;;
esac
case "${USE_DISK_IMAGE:-false}" in
false)
DISK_IMAGE_FLAG=

View File

@ -34,9 +34,9 @@ import (
//
//nolint:gocyclo
func Run(ctx context.Context, logger *log.Logger, r runtime.Runtime) ([]byte, error) {
logger.Println("waiting for network address and hostname to be ready")
logger.Println("waiting for network address to be ready")
if err := network.NewReadyCondition(r.State().V1Alpha2().Resources(), network.AddressReady, network.HostnameReady).Wait(ctx); err != nil {
if err := network.NewReadyCondition(r.State().V1Alpha2().Resources(), network.AddressReady).Wait(ctx); err != nil {
return nil, fmt.Errorf("error waiting for the network to be ready: %w", err)
}
@ -52,12 +52,17 @@ func Run(ctx context.Context, logger *log.Logger, r runtime.Runtime) ([]byte, er
ips := currentAddresses.(*network.NodeAddress).TypedSpec().IPs()
// hostname might not be available yet, so use it only if it is available
hostnameStatus, err := r.State().V1Alpha2().Resources().Get(ctx, resource.NewMetadata(network.NamespaceName, network.HostnameStatusType, network.HostnameID, resource.VersionUndefined))
if err != nil {
if err != nil && !state.IsNotFoundError(err) {
return nil, fmt.Errorf("error getting node hostname: %w", err)
}
dnsNames := hostnameStatus.(*network.HostnameStatus).TypedSpec().DNSNames()
var dnsNames []string
if hostnameStatus != nil {
dnsNames = hostnameStatus.(*network.HostnameStatus).TypedSpec().DNSNames()
}
tlsConfig, provider, err := genTLSConfig(ips, dnsNames)
if err != nil {

View File

@ -111,21 +111,45 @@ func (suite *DiscoverySuite) TestMembers() {
continue
}
memberByID := make(map[string]*cluster.Member)
memberByName := slices.ToMap(members,
func(member *cluster.Member) (string, *cluster.Member) {
return member.Metadata().ID(), member
},
)
memberByIP := make(map[netaddr.IP]*cluster.Member)
for _, member := range members {
memberByID[member.Metadata().ID()] = member
for _, addr := range member.TypedSpec().Addresses {
memberByIP[addr] = member
}
}
nodesInfo := suite.Cluster.Info().Nodes
for _, nodeInfo := range nodesInfo {
matchingMember := memberByID[nodeInfo.Name]
matchingMember := memberByName[nodeInfo.Name]
var matchingMemberByIP *cluster.Member
for _, nodeIPStd := range nodeInfo.IPs {
nodeIP, ok := netaddr.FromStdIP(nodeIPStd)
suite.Assert().True(ok)
matchingMemberByIP = memberByIP[nodeIP]
break
}
// if hostnames are not set via DHCP, use match by IP
if matchingMember == nil {
matchingMember = matchingMemberByIP
}
suite.Require().NotNil(matchingMember)
suite.Assert().Equal(nodeInfo.Type, matchingMember.TypedSpec().MachineType)
suite.Assert().Equal(expectedTalosVersion, matchingMember.TypedSpec().OperatingSystem)
suite.Assert().Equal(nodeInfo.Name, matchingMember.TypedSpec().Hostname)
for _, nodeIPStd := range nodeInfo.IPs {
nodeIP, ok := netaddr.FromStdIP(nodeIPStd)

View File

@ -27,7 +27,7 @@ import (
"github.com/talos-systems/talos/pkg/provision/providers/vm"
)
//nolint:gocyclo
//nolint:gocyclo,cyclop
func (p *provisioner) createNode(state *vm.State, clusterReq provision.ClusterRequest, nodeReq provision.NodeRequest, opts *provision.Options) (provision.NodeInfo, error) {
arch := Arch(opts.TargetArch)
pidPath := state.GetRelativePath(fmt.Sprintf("%s.pid", nodeReq.Name))
@ -131,7 +131,6 @@ func (p *provisioner) createNode(state *vm.State, clusterReq provision.ClusterRe
CNI: clusterReq.Network.CNI,
CIDRs: clusterReq.Network.CIDRs,
IPs: nodeReq.IPs,
Hostname: nodeReq.Name,
GatewayAddrs: clusterReq.Network.GatewayAddrs,
MTU: clusterReq.Network.MTU,
Nameservers: clusterReq.Network.Nameservers,
@ -140,6 +139,10 @@ func (p *provisioner) createNode(state *vm.State, clusterReq provision.ClusterRe
APIPort: apiPort,
}
if !clusterReq.Network.DHCPSkipHostname {
launchConfig.Hostname = nodeReq.Name
}
if !nodeReq.PXEBooted {
launchConfig.KernelImagePath = strings.ReplaceAll(clusterReq.KernelPath, constants.ArchVariable, opts.TargetArch)
launchConfig.InitrdPath = strings.ReplaceAll(clusterReq.InitramfsPath, constants.ArchVariable, opts.TargetArch)

View File

@ -60,14 +60,23 @@ func handlerDHCP4(serverIP net.IP, statePath string) server4.Handler {
return
}
resp, err := dhcpv4.NewReplyFromRequest(m,
modifiers := []dhcpv4.Modifier{
dhcpv4.WithNetmask(match.Netmask),
dhcpv4.WithYourIP(match.IP),
dhcpv4.WithOption(dhcpv4.OptHostName(match.Hostname)),
dhcpv4.WithOption(dhcpv4.OptDNS(match.Nameservers...)),
dhcpv4.WithOption(dhcpv4.OptRouter(match.Gateway)),
dhcpv4.WithOption(dhcpv4.OptIPAddressLeaseTime(5*time.Minute)),
dhcpv4.WithOption(dhcpv4.OptIPAddressLeaseTime(5 * time.Minute)),
dhcpv4.WithOption(dhcpv4.OptServerIdentifier(serverIP)),
}
if match.Hostname != "" {
modifiers = append(modifiers,
dhcpv4.WithOption(dhcpv4.OptHostName(match.Hostname)),
)
}
resp, err := dhcpv4.NewReplyFromRequest(m,
modifiers...,
)
if err != nil {
log.Printf("failure building response: %s", err)
@ -153,7 +162,6 @@ func handlerDHCP6(serverHwAddr net.HardwareAddr, statePath string) server6.Handl
modifiers := []dhcpv6.Modifier{
dhcpv6.WithDNS(match.Nameservers...),
dhcpv6.WithFQDN(0, match.Hostname),
dhcpv6.WithIANA(dhcpv6.OptIAAddress{
IPv6Addr: match.IP,
PreferredLifetime: 5 * time.Minute,
@ -167,6 +175,12 @@ func handlerDHCP6(serverHwAddr net.HardwareAddr, statePath string) server6.Handl
}),
}
if match.Hostname != "" {
modifiers = append(modifiers,
dhcpv6.WithFQDN(0, match.Hostname),
)
}
var resp *dhcpv6.Message
switch msg.MessageType { //nolint:exhaustive

View File

@ -57,6 +57,9 @@ type NetworkRequest struct {
// CNI-specific parameters.
CNI CNIConfig
// DHCP options
DHCPSkipHostname bool
// Docker-specific parameters.
DockerDisableIPv6 bool
}

View File

@ -108,6 +108,7 @@ talosctl cluster create [flags]
--cpus-workers string the share of CPUs as fraction (each worker/VM) (default "2.0")
--crashdump print debug crashdump to stderr when cluster startup fails
--custom-cni-url string install custom CNI from the URL (Talos cluster)
--disable-dhcp-hostname skip announcing hostname via DHCP (QEMU only)
--disk int default limit on disk size in MB (each VM) (default 6144)
--disk-image-path string disk image to use
--dns-domain string the dns domain to use for cluster (default "cluster.local")