fix: don't wait for the hostname in maintenance mode
Fixes #6119 With new stable default hostname feature, any default hostname is disabled until the machine config is available. Talos enters maintenance mode when the default config source is empty, so it doesn't have any machine config available at the moment maintenance service is started. Hostname might be set via different sources, e.g. kernel args or via DHCP before the machine config is available, but if all these sources are not available, hostname won't be set at all. This stops waiting for the hostname, and skips setting any DNS names in the maintenance mode certificate SANs if the hostname is not available. Also adds a regression test via new `--disable-dhcp-hostname` flag to `talosctl cluster create`. Signed-off-by: Andrey Smirnov <andrey.smirnov@talos-systems.com>
This commit is contained in:
parent
b15a639246
commit
2f2d97b6b5
@ -397,6 +397,14 @@ local integration_kubespan = Step("e2e-kubespan", target="e2e-qemu", privileged=
|
||||
"IMAGE_REGISTRY": local_registry,
|
||||
"WITH_CONFIG_PATCH": '[{"op": "replace", "path": "/cluster/discovery/registries/kubernetes/disabled", "value": false}]', # use Kubernetes discovery backend
|
||||
});
|
||||
local integration_default_hostname = Step("e2e-default-hostname", target="e2e-qemu", privileged=true, depends_on=[integration_kubespan], environment={
|
||||
# regression test: make sure Talos works in maintenance mode when no hostname is set
|
||||
"SHORT_INTEGRATION_TEST": "yes",
|
||||
"IMAGE_REGISTRY": local_registry,
|
||||
"VIA_MAINTENANCE_MODE": "true",
|
||||
"DISABLE_DHCP_HOSTNAME": "true",
|
||||
});
|
||||
|
||||
local integration_qemu_encrypted_vip = Step("e2e-encrypted-vip", target="e2e-qemu", privileged=true, depends_on=[load_artifacts], environment={
|
||||
"WITH_DISK_ENCRYPTION": "true",
|
||||
"WITH_VIRTUAL_IP": "true",
|
||||
@ -452,7 +460,7 @@ local integration_pipelines = [
|
||||
Pipeline('integration-provision-1', default_pipeline_steps + [integration_provision_tests_prepare, integration_provision_tests_track_1]) + integration_trigger(['integration-provision', 'integration-provision-1']),
|
||||
Pipeline('integration-provision-2', default_pipeline_steps + [integration_provision_tests_prepare, integration_provision_tests_track_2]) + integration_trigger(['integration-provision', 'integration-provision-2']),
|
||||
Pipeline('integration-misc', default_pipeline_steps + [integration_extensions
|
||||
, integration_cilium, integration_bios, integration_disk_image, integration_control_plane_port, integration_no_cluster_discovery, integration_kubespan]) + integration_trigger(['integration-misc']),
|
||||
, integration_cilium, integration_bios, integration_disk_image, integration_control_plane_port, integration_no_cluster_discovery, integration_kubespan, integration_default_hostname]) + integration_trigger(['integration-misc']),
|
||||
Pipeline('integration-qemu-encrypted-vip', default_pipeline_steps + [integration_qemu_encrypted_vip]) + integration_trigger(['integration-qemu-encrypted-vip']),
|
||||
Pipeline('integration-qemu-race', default_pipeline_steps + [build_race, integration_qemu_race]) + integration_trigger(['integration-qemu-race']),
|
||||
Pipeline('integration-qemu-csi', default_pipeline_steps + [integration_qemu_csi]) + integration_trigger(['integration-qemu-csi']),
|
||||
@ -464,7 +472,7 @@ local integration_pipelines = [
|
||||
Pipeline('cron-integration-provision-1', default_pipeline_steps + [integration_provision_tests_prepare, integration_provision_tests_track_1], [default_cron_pipeline]) + cron_trigger(['thrice-daily', 'nightly']),
|
||||
Pipeline('cron-integration-provision-2', default_pipeline_steps + [integration_provision_tests_prepare, integration_provision_tests_track_2], [default_cron_pipeline]) + cron_trigger(['thrice-daily', 'nightly']),
|
||||
Pipeline('cron-integration-misc', default_pipeline_steps + [integration_extensions
|
||||
, integration_cilium, integration_bios, integration_disk_image, integration_control_plane_port, integration_no_cluster_discovery, integration_kubespan], [default_cron_pipeline]) + cron_trigger(['thrice-daily', 'nightly']),
|
||||
, integration_cilium, integration_bios, integration_disk_image, integration_control_plane_port, integration_no_cluster_discovery, integration_kubespan, integration_default_hostname], [default_cron_pipeline]) + cron_trigger(['thrice-daily', 'nightly']),
|
||||
Pipeline('cron-integration-qemu-encrypted-vip', default_pipeline_steps + [integration_qemu_encrypted_vip], [default_cron_pipeline]) + cron_trigger(['thrice-daily', 'nightly']),
|
||||
Pipeline('cron-integration-qemu-race', default_pipeline_steps + [build_race, integration_qemu_race], [default_cron_pipeline]) + cron_trigger(['nightly']),
|
||||
Pipeline('cron-integration-qemu-csi', default_pipeline_steps + [integration_qemu_csi], [default_cron_pipeline]) + cron_trigger(['nightly']),
|
||||
|
@ -71,11 +71,6 @@ const (
|
||||
networkMTUFlag = "mtu"
|
||||
networkCIDRFlag = "cidr"
|
||||
nameserversFlag = "nameservers"
|
||||
cniBinPathFlag = "cni-bin-path"
|
||||
cniConfDirFlag = "cni-conf-dir"
|
||||
cniCacheDirFlag = "cni-cache-dir"
|
||||
cniBundleURLFlag = "cni-bundle-url"
|
||||
dockerDisableIPv6Flag = "docker-disable-ipv6"
|
||||
clusterDiskSizeFlag = "disk"
|
||||
clusterDisksFlag = "user-disk"
|
||||
customCNIUrlFlag = "custom-cni-url"
|
||||
@ -152,6 +147,7 @@ var (
|
||||
extraBootKernelArgs string
|
||||
dockerDisableIPv6 bool
|
||||
controlPlanePort int
|
||||
dhcpSkipHostname bool
|
||||
)
|
||||
|
||||
// createCmd represents the cluster up command.
|
||||
@ -287,6 +283,7 @@ func create(ctx context.Context, flags *pflag.FlagSet) (err error) {
|
||||
|
||||
BundleURL: cniBundleURL,
|
||||
},
|
||||
DHCPSkipHostname: dhcpSkipHostname,
|
||||
DockerDisableIPv6: dockerDisableIPv6,
|
||||
},
|
||||
|
||||
@ -895,10 +892,10 @@ func init() {
|
||||
createCmd.Flags().StringVar(&forceEndpoint, forceEndpointFlag, "", "use endpoint instead of provider defaults")
|
||||
createCmd.Flags().StringVar(&kubernetesVersion, "kubernetes-version", constants.DefaultKubernetesVersion, "desired kubernetes version to run")
|
||||
createCmd.Flags().StringVarP(&inputDir, inputDirFlag, "i", "", "location of pre-generated config files")
|
||||
createCmd.Flags().StringSliceVar(&cniBinPath, cniBinPathFlag, []string{filepath.Join(defaultCNIDir, "bin")}, "search path for CNI binaries (VM only)")
|
||||
createCmd.Flags().StringVar(&cniConfDir, cniConfDirFlag, filepath.Join(defaultCNIDir, "conf.d"), "CNI config directory path (VM only)")
|
||||
createCmd.Flags().StringVar(&cniCacheDir, cniCacheDirFlag, filepath.Join(defaultCNIDir, "cache"), "CNI cache directory path (VM only)")
|
||||
createCmd.Flags().StringVar(&cniBundleURL, cniBundleURLFlag, fmt.Sprintf("https://github.com/%s/talos/releases/download/%s/talosctl-cni-bundle-%s.tar.gz",
|
||||
createCmd.Flags().StringSliceVar(&cniBinPath, "cni-bin-path", []string{filepath.Join(defaultCNIDir, "bin")}, "search path for CNI binaries (VM only)")
|
||||
createCmd.Flags().StringVar(&cniConfDir, "cni-conf-dir", filepath.Join(defaultCNIDir, "conf.d"), "CNI config directory path (VM only)")
|
||||
createCmd.Flags().StringVar(&cniCacheDir, "cni-cache-dir", filepath.Join(defaultCNIDir, "cache"), "CNI cache directory path (VM only)")
|
||||
createCmd.Flags().StringVar(&cniBundleURL, "cni-bundle-url", fmt.Sprintf("https://github.com/%s/talos/releases/download/%s/talosctl-cni-bundle-%s.tar.gz",
|
||||
images.Username, trimVersion(version.Tag), constants.ArchVariable), "URL to download CNI bundle from (VM only)")
|
||||
createCmd.Flags().StringVarP(&ports,
|
||||
"exposed-ports",
|
||||
@ -924,8 +921,9 @@ func init() {
|
||||
createCmd.Flags().StringArrayVar(&configPatchWorker, "config-patch-worker", nil, "patch generated machineconfigs (applied to 'worker' type)")
|
||||
createCmd.Flags().BoolVar(&badRTC, "bad-rtc", false, "launch VM with bad RTC state (QEMU only)")
|
||||
createCmd.Flags().StringVar(&extraBootKernelArgs, "extra-boot-kernel-args", "", "add extra kernel args to the initial boot from vmlinuz and initramfs (QEMU only)")
|
||||
createCmd.Flags().BoolVar(&dockerDisableIPv6, dockerDisableIPv6Flag, false, "skip enabling IPv6 in containers (Docker only)")
|
||||
createCmd.Flags().BoolVar(&dockerDisableIPv6, "docker-disable-ipv6", false, "skip enabling IPv6 in containers (Docker only)")
|
||||
createCmd.Flags().IntVar(&controlPlanePort, controlPlanePortFlag, constants.DefaultControlPlanePort, "control plane port (load balancer and local API port)")
|
||||
createCmd.Flags().BoolVar(&dhcpSkipHostname, "disable-dhcp-hostname", false, "skip announcing hostname via DHCP (QEMU only)")
|
||||
|
||||
Cmd.AddCommand(createCmd)
|
||||
}
|
||||
@ -944,11 +942,6 @@ func checkForDefinedGenFlag(flags *pflag.FlagSet) string {
|
||||
networkMTUFlag,
|
||||
networkCIDRFlag,
|
||||
nameserversFlag,
|
||||
cniBinPathFlag,
|
||||
cniConfDirFlag,
|
||||
cniCacheDirFlag,
|
||||
cniBundleURLFlag,
|
||||
dockerDisableIPv6Flag,
|
||||
clusterDiskSizeFlag,
|
||||
clusterDisksFlag,
|
||||
customCNIUrlFlag,
|
||||
|
@ -63,6 +63,23 @@ case "${WITH_CONTROL_PLANE_PORT:-false}" in
|
||||
;;
|
||||
esac
|
||||
|
||||
case "${VIA_MAINTENANCE_MODE:-false}" in
|
||||
false)
|
||||
;;
|
||||
*)
|
||||
# apply config via maintenance mode
|
||||
QEMU_FLAGS="${QEMU_FLAGS} --skip-injecting-config --with-apply-config"
|
||||
;;
|
||||
esac
|
||||
|
||||
case "${DISABLE_DHCP_HOSTNAME:-false}" in
|
||||
false)
|
||||
;;
|
||||
*)
|
||||
QEMU_FLAGS="${QEMU_FLAGS} --disable-dhcp-hostname"
|
||||
;;
|
||||
esac
|
||||
|
||||
case "${USE_DISK_IMAGE:-false}" in
|
||||
false)
|
||||
DISK_IMAGE_FLAG=
|
||||
|
@ -34,9 +34,9 @@ import (
|
||||
//
|
||||
//nolint:gocyclo
|
||||
func Run(ctx context.Context, logger *log.Logger, r runtime.Runtime) ([]byte, error) {
|
||||
logger.Println("waiting for network address and hostname to be ready")
|
||||
logger.Println("waiting for network address to be ready")
|
||||
|
||||
if err := network.NewReadyCondition(r.State().V1Alpha2().Resources(), network.AddressReady, network.HostnameReady).Wait(ctx); err != nil {
|
||||
if err := network.NewReadyCondition(r.State().V1Alpha2().Resources(), network.AddressReady).Wait(ctx); err != nil {
|
||||
return nil, fmt.Errorf("error waiting for the network to be ready: %w", err)
|
||||
}
|
||||
|
||||
@ -52,12 +52,17 @@ func Run(ctx context.Context, logger *log.Logger, r runtime.Runtime) ([]byte, er
|
||||
|
||||
ips := currentAddresses.(*network.NodeAddress).TypedSpec().IPs()
|
||||
|
||||
// hostname might not be available yet, so use it only if it is available
|
||||
hostnameStatus, err := r.State().V1Alpha2().Resources().Get(ctx, resource.NewMetadata(network.NamespaceName, network.HostnameStatusType, network.HostnameID, resource.VersionUndefined))
|
||||
if err != nil {
|
||||
if err != nil && !state.IsNotFoundError(err) {
|
||||
return nil, fmt.Errorf("error getting node hostname: %w", err)
|
||||
}
|
||||
|
||||
dnsNames := hostnameStatus.(*network.HostnameStatus).TypedSpec().DNSNames()
|
||||
var dnsNames []string
|
||||
|
||||
if hostnameStatus != nil {
|
||||
dnsNames = hostnameStatus.(*network.HostnameStatus).TypedSpec().DNSNames()
|
||||
}
|
||||
|
||||
tlsConfig, provider, err := genTLSConfig(ips, dnsNames)
|
||||
if err != nil {
|
||||
|
@ -111,21 +111,45 @@ func (suite *DiscoverySuite) TestMembers() {
|
||||
continue
|
||||
}
|
||||
|
||||
memberByID := make(map[string]*cluster.Member)
|
||||
memberByName := slices.ToMap(members,
|
||||
func(member *cluster.Member) (string, *cluster.Member) {
|
||||
return member.Metadata().ID(), member
|
||||
},
|
||||
)
|
||||
|
||||
memberByIP := make(map[netaddr.IP]*cluster.Member)
|
||||
|
||||
for _, member := range members {
|
||||
memberByID[member.Metadata().ID()] = member
|
||||
for _, addr := range member.TypedSpec().Addresses {
|
||||
memberByIP[addr] = member
|
||||
}
|
||||
}
|
||||
|
||||
nodesInfo := suite.Cluster.Info().Nodes
|
||||
|
||||
for _, nodeInfo := range nodesInfo {
|
||||
matchingMember := memberByID[nodeInfo.Name]
|
||||
matchingMember := memberByName[nodeInfo.Name]
|
||||
|
||||
var matchingMemberByIP *cluster.Member
|
||||
|
||||
for _, nodeIPStd := range nodeInfo.IPs {
|
||||
nodeIP, ok := netaddr.FromStdIP(nodeIPStd)
|
||||
suite.Assert().True(ok)
|
||||
|
||||
matchingMemberByIP = memberByIP[nodeIP]
|
||||
|
||||
break
|
||||
}
|
||||
|
||||
// if hostnames are not set via DHCP, use match by IP
|
||||
if matchingMember == nil {
|
||||
matchingMember = matchingMemberByIP
|
||||
}
|
||||
|
||||
suite.Require().NotNil(matchingMember)
|
||||
|
||||
suite.Assert().Equal(nodeInfo.Type, matchingMember.TypedSpec().MachineType)
|
||||
suite.Assert().Equal(expectedTalosVersion, matchingMember.TypedSpec().OperatingSystem)
|
||||
suite.Assert().Equal(nodeInfo.Name, matchingMember.TypedSpec().Hostname)
|
||||
|
||||
for _, nodeIPStd := range nodeInfo.IPs {
|
||||
nodeIP, ok := netaddr.FromStdIP(nodeIPStd)
|
||||
|
@ -27,7 +27,7 @@ import (
|
||||
"github.com/talos-systems/talos/pkg/provision/providers/vm"
|
||||
)
|
||||
|
||||
//nolint:gocyclo
|
||||
//nolint:gocyclo,cyclop
|
||||
func (p *provisioner) createNode(state *vm.State, clusterReq provision.ClusterRequest, nodeReq provision.NodeRequest, opts *provision.Options) (provision.NodeInfo, error) {
|
||||
arch := Arch(opts.TargetArch)
|
||||
pidPath := state.GetRelativePath(fmt.Sprintf("%s.pid", nodeReq.Name))
|
||||
@ -131,7 +131,6 @@ func (p *provisioner) createNode(state *vm.State, clusterReq provision.ClusterRe
|
||||
CNI: clusterReq.Network.CNI,
|
||||
CIDRs: clusterReq.Network.CIDRs,
|
||||
IPs: nodeReq.IPs,
|
||||
Hostname: nodeReq.Name,
|
||||
GatewayAddrs: clusterReq.Network.GatewayAddrs,
|
||||
MTU: clusterReq.Network.MTU,
|
||||
Nameservers: clusterReq.Network.Nameservers,
|
||||
@ -140,6 +139,10 @@ func (p *provisioner) createNode(state *vm.State, clusterReq provision.ClusterRe
|
||||
APIPort: apiPort,
|
||||
}
|
||||
|
||||
if !clusterReq.Network.DHCPSkipHostname {
|
||||
launchConfig.Hostname = nodeReq.Name
|
||||
}
|
||||
|
||||
if !nodeReq.PXEBooted {
|
||||
launchConfig.KernelImagePath = strings.ReplaceAll(clusterReq.KernelPath, constants.ArchVariable, opts.TargetArch)
|
||||
launchConfig.InitrdPath = strings.ReplaceAll(clusterReq.InitramfsPath, constants.ArchVariable, opts.TargetArch)
|
||||
|
@ -60,14 +60,23 @@ func handlerDHCP4(serverIP net.IP, statePath string) server4.Handler {
|
||||
return
|
||||
}
|
||||
|
||||
resp, err := dhcpv4.NewReplyFromRequest(m,
|
||||
modifiers := []dhcpv4.Modifier{
|
||||
dhcpv4.WithNetmask(match.Netmask),
|
||||
dhcpv4.WithYourIP(match.IP),
|
||||
dhcpv4.WithOption(dhcpv4.OptHostName(match.Hostname)),
|
||||
dhcpv4.WithOption(dhcpv4.OptDNS(match.Nameservers...)),
|
||||
dhcpv4.WithOption(dhcpv4.OptRouter(match.Gateway)),
|
||||
dhcpv4.WithOption(dhcpv4.OptIPAddressLeaseTime(5*time.Minute)),
|
||||
dhcpv4.WithOption(dhcpv4.OptIPAddressLeaseTime(5 * time.Minute)),
|
||||
dhcpv4.WithOption(dhcpv4.OptServerIdentifier(serverIP)),
|
||||
}
|
||||
|
||||
if match.Hostname != "" {
|
||||
modifiers = append(modifiers,
|
||||
dhcpv4.WithOption(dhcpv4.OptHostName(match.Hostname)),
|
||||
)
|
||||
}
|
||||
|
||||
resp, err := dhcpv4.NewReplyFromRequest(m,
|
||||
modifiers...,
|
||||
)
|
||||
if err != nil {
|
||||
log.Printf("failure building response: %s", err)
|
||||
@ -153,7 +162,6 @@ func handlerDHCP6(serverHwAddr net.HardwareAddr, statePath string) server6.Handl
|
||||
|
||||
modifiers := []dhcpv6.Modifier{
|
||||
dhcpv6.WithDNS(match.Nameservers...),
|
||||
dhcpv6.WithFQDN(0, match.Hostname),
|
||||
dhcpv6.WithIANA(dhcpv6.OptIAAddress{
|
||||
IPv6Addr: match.IP,
|
||||
PreferredLifetime: 5 * time.Minute,
|
||||
@ -167,6 +175,12 @@ func handlerDHCP6(serverHwAddr net.HardwareAddr, statePath string) server6.Handl
|
||||
}),
|
||||
}
|
||||
|
||||
if match.Hostname != "" {
|
||||
modifiers = append(modifiers,
|
||||
dhcpv6.WithFQDN(0, match.Hostname),
|
||||
)
|
||||
}
|
||||
|
||||
var resp *dhcpv6.Message
|
||||
|
||||
switch msg.MessageType { //nolint:exhaustive
|
||||
|
@ -57,6 +57,9 @@ type NetworkRequest struct {
|
||||
// CNI-specific parameters.
|
||||
CNI CNIConfig
|
||||
|
||||
// DHCP options
|
||||
DHCPSkipHostname bool
|
||||
|
||||
// Docker-specific parameters.
|
||||
DockerDisableIPv6 bool
|
||||
}
|
||||
|
@ -108,6 +108,7 @@ talosctl cluster create [flags]
|
||||
--cpus-workers string the share of CPUs as fraction (each worker/VM) (default "2.0")
|
||||
--crashdump print debug crashdump to stderr when cluster startup fails
|
||||
--custom-cni-url string install custom CNI from the URL (Talos cluster)
|
||||
--disable-dhcp-hostname skip announcing hostname via DHCP (QEMU only)
|
||||
--disk int default limit on disk size in MB (each VM) (default 6144)
|
||||
--disk-image-path string disk image to use
|
||||
--dns-domain string the dns domain to use for cluster (default "cluster.local")
|
||||
|
Loading…
x
Reference in New Issue
Block a user