feat: add an option to skip downed peers in KubeSpan

Fixes #4248

This resolves the balance between security and connectivity.

Signed-off-by: Andrey Smirnov <andrey.smirnov@talos-systems.com>
This commit is contained in:
Andrey Smirnov 2021-09-29 19:47:39 +03:00
parent cbbd7c6821
commit 9a51aa8358
No known key found for this signature in database
GPG Key ID: 7B26396447AB6DFD
14 changed files with 107 additions and 8 deletions

View File

@ -45,6 +45,7 @@ var genConfigCmdFlags struct {
persistConfig bool
withExamples bool
withDocs bool
withKubeSpan bool
}
// genConfigCmd represents the `gen config` command.
@ -208,6 +209,15 @@ func writeV1Alpha1Config(args []string) error {
genOptions = append(genOptions, generate.WithVersionContract(versionContract))
}
if genConfigCmdFlags.withKubeSpan {
genOptions = append(genOptions,
generate.WithNetworkOptions(
v1alpha1.WithKubeSpan(),
),
generate.WithClusterDiscovery(),
)
}
genOptions = append(genOptions,
generate.WithInstallDisk(genConfigCmdFlags.installDisk),
generate.WithInstallImage(genConfigCmdFlags.installImage),
@ -273,6 +283,7 @@ func init() {
genConfigCmd.Flags().BoolVarP(&genConfigCmdFlags.persistConfig, "persist", "p", true, "the desired persist value for configs")
genConfigCmd.Flags().BoolVarP(&genConfigCmdFlags.withExamples, "with-examples", "", true, "renders all machine configs with the commented examples")
genConfigCmd.Flags().BoolVarP(&genConfigCmdFlags.withDocs, "with-docs", "", true, "renders all machine configs adding the documentation for each field")
genConfigCmd.Flags().BoolVarP(&genConfigCmdFlags.withKubeSpan, "with-kubespan", "", false, "enable KubeSpan feature")
gen.Cmd.AddCommand(genConfigCmd)
}

2
go.mod
View File

@ -73,7 +73,7 @@ require (
github.com/spf13/cobra v1.2.1
github.com/stretchr/testify v1.7.0
github.com/talos-systems/crypto v0.3.2
github.com/talos-systems/discovery-service v0.0.3-0.20210928170742-e9d5dfa15e92
github.com/talos-systems/discovery-service v0.0.3-0.20210929192103-b2e2079088a5
github.com/talos-systems/go-blockdevice v0.2.4-0.20210925062844-70d28650b398
github.com/talos-systems/go-cmd v0.1.0
github.com/talos-systems/go-debug v0.2.1

4
go.sum
View File

@ -1042,8 +1042,8 @@ github.com/syndtr/gocapability v0.0.0-20180916011248-d98352740cb2/go.mod h1:hkRG
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww=
github.com/talos-systems/crypto v0.3.2 h1:I+MC9ql6K29EMlbPzdSeHZInSRWdze1FX1qGGrlom8Q=
github.com/talos-systems/crypto v0.3.2/go.mod h1:xaNCB2/Bxaj+qrkdeodhRv5eKQVvKOGBBMj58MrIPY8=
github.com/talos-systems/discovery-service v0.0.3-0.20210928170742-e9d5dfa15e92 h1:Z381kVGNLIZyvmCN6yhJSA0k5ArrTApVXwxB6u6gZXM=
github.com/talos-systems/discovery-service v0.0.3-0.20210928170742-e9d5dfa15e92/go.mod h1:+9VWFbTcUChtlE0qc2fQ3Lyj1kj2AakFQ/ITnaB8Pd0=
github.com/talos-systems/discovery-service v0.0.3-0.20210929192103-b2e2079088a5 h1:wwR30BY2PgCCCGL/dXezlJATbfcGF2yon0CnywAWc1s=
github.com/talos-systems/discovery-service v0.0.3-0.20210929192103-b2e2079088a5/go.mod h1:+9VWFbTcUChtlE0qc2fQ3Lyj1kj2AakFQ/ITnaB8Pd0=
github.com/talos-systems/go-blockdevice v0.2.3/go.mod h1:qnn/zDc09I1DA2BUDDCOSA2D0P8pIDjN8pGiRoRaQig=
github.com/talos-systems/go-blockdevice v0.2.4-0.20210925062844-70d28650b398 h1:4NH2IPnswmMfhU0Jb39vtik8xa7J3eObB1rbxhKzpO4=
github.com/talos-systems/go-blockdevice v0.2.4-0.20210925062844-70d28650b398/go.mod h1:qnn/zDc09I1DA2BUDDCOSA2D0P8pIDjN8pGiRoRaQig=

View File

@ -73,6 +73,7 @@ func (ctrl *ConfigController) Run(ctx context.Context, r controller.Runtime, log
res.(*kubespan.Config).TypedSpec().Enabled = c.Machine().Network().KubeSpan().Enabled()
res.(*kubespan.Config).TypedSpec().ClusterID = c.Cluster().ID()
res.(*kubespan.Config).TypedSpec().SharedSecret = c.Cluster().Secret()
res.(*kubespan.Config).TypedSpec().ForceRouting = c.Machine().Network().KubeSpan().ForceRouting()
return nil
}); err != nil {

View File

@ -54,6 +54,7 @@ func (suite *ConfigSuite) TestReconcileConfig() {
suite.Assert().True(spec.Enabled)
suite.Assert().Equal("8XuV9TZHW08DOk3bVxQjH9ih_TBKjnh-j44tsCLSBzo=", spec.ClusterID)
suite.Assert().Equal("I+1In7fLnpcRIjUmEoeugZnSyFoTF6MztLxICL5Yu0s=", spec.SharedSecret)
suite.Assert().True(spec.ForceRouting)
return nil
},

View File

@ -339,9 +339,16 @@ func (ctrl *ManagerController) Run(ctx context.Context, r controller.Runtime, lo
// build full allowedIPs set
var allowedIPsBuilder netaddr.IPSetBuilder
for _, peerSpec := range peerSpecs {
for _, prefix := range peerSpec.AllowedIPs {
allowedIPsBuilder.AddPrefix(prefix)
for pubKey, peerSpec := range peerSpecs {
// list of statuses and specs should be in sync at this point
peerStatus := peerStatuses[pubKey]
// add allowedIPs to the nftables set if either routing is forced (for any peer state)
// or if the peer connection state is up.
if cfgSpec.ForceRouting || peerStatus.State == kubespan.PeerStateUp {
for _, prefix := range peerSpec.AllowedIPs {
allowedIPsBuilder.AddPrefix(prefix)
}
}
}

View File

@ -108,6 +108,7 @@ func (mock *mockNftablesManager) IPSet() *netaddr.IPSet {
return mock.ipSet
}
//nolint:gocyclo
func (suite *ManagerSuite) TestReconcile() {
mockWireguard := &mockWireguardClient{}
mockNfTables := &mockNftablesManager{}
@ -130,6 +131,7 @@ func (suite *ManagerSuite) TestReconcile() {
cfg := kubespan.NewConfig(config.NamespaceName, kubespan.ConfigID)
cfg.TypedSpec().Enabled = true
cfg.TypedSpec().SharedSecret = "TPbGXrYlvuXgAl8dERpwjlA5tnEMoihPDPxlovcLtVg="
cfg.TypedSpec().ForceRouting = true
suite.Require().NoError(suite.state.Create(suite.ctx, cfg))
mac, err := net.ParseMAC("ea:71:1b:b2:cc:ee")
@ -310,6 +312,28 @@ func (suite *ManagerSuite) TestReconcile() {
},
))
// update config and disable force routing, nothing should be routed
oldVersion := cfg.Metadata().Version()
cfg.TypedSpec().ForceRouting = false
cfg.Metadata().BumpVersion()
suite.Require().NoError(suite.state.Update(suite.ctx, oldVersion, cfg))
suite.Assert().NoError(retry.Constant(3*time.Second, retry.WithUnits(100*time.Millisecond)).Retry(
func() error {
ipSet := mockNfTables.IPSet()
if ipSet == nil {
return retry.ExpectedErrorf("ipset is nil")
}
if len(ipSet.Prefixes()) != 0 {
return retry.ExpectedErrorf("expected empty ipset: %v", ipSet.Ranges())
}
return nil
},
))
// report up status via wireguard mock
mockWireguard.update(&wgtypes.Device{
Peers: []wgtypes.Peer{
@ -345,8 +369,28 @@ func (suite *ManagerSuite) TestReconcile() {
))
}
// as the peers are now up, all traffic should be routed via KubeSpan
suite.Assert().NoError(retry.Constant(3*time.Second, retry.WithUnits(100*time.Millisecond)).Retry(
func() error {
ipSet := mockNfTables.IPSet()
if ipSet == nil {
return retry.ExpectedErrorf("ipset is nil")
}
ranges := fmt.Sprintf("%v", ipSet.Ranges())
expected := "[10.244.1.0-10.244.2.255]"
if ranges != expected {
return retry.ExpectedErrorf("ranges %s != expected %s", ranges, expected)
}
return nil
},
))
// update config and disable wireguard, everything should be cleaned up
oldVersion := cfg.Metadata().Version()
oldVersion = cfg.Metadata().Version()
cfg.TypedSpec().Enabled = false
cfg.Metadata().BumpVersion()
suite.Require().NoError(suite.state.Update(suite.ctx, oldVersion, cfg))

View File

@ -217,6 +217,7 @@ type Route interface {
// KubeSpan configures KubeSpan feature.
type KubeSpan interface {
Enabled() bool
ForceRouting() bool
}
// Time defines the requirements for a config that pertains to time related

View File

@ -904,6 +904,11 @@ func (k NetworkKubeSpan) Enabled() bool {
return k.KubeSpanEnabled
}
// ForceRouting implements KubeSpan interface.
func (k NetworkKubeSpan) ForceRouting() bool {
return !k.KubeSpanAllowDownPeerBypass
}
// Disabled implements the config.Provider interface.
func (t *TimeConfig) Disabled() bool {
return t.TimeDisabled

View File

@ -1933,6 +1933,12 @@ type NetworkKubeSpan struct {
// Enable the KubeSpan feature.
// Cluster discovery should be enabled with .cluster.discovery.enabled for KubeSpan to be enabled.
KubeSpanEnabled bool `yaml:"enabled"`
// description: |
// Skip sending traffic via KubeSpan if the peer connection state is not up.
// This provides configurable choice between connectivity and security: either traffic is always
// forced to go via KubeSpan (even if Wireguard peer connection is not up), or traffic can go directly
// to the peer if Wireguard connection can't be established.
KubeSpanAllowDownPeerBypass bool `yaml:"allowDownPeerBypass,omitempty"`
}
// ClusterDiscoveryConfig struct configures cluster membership discovery.

View File

@ -2010,12 +2010,17 @@ func init() {
FieldName: "kubespan",
},
}
NetworkKubeSpanDoc.Fields = make([]encoder.Doc, 1)
NetworkKubeSpanDoc.Fields = make([]encoder.Doc, 2)
NetworkKubeSpanDoc.Fields[0].Name = "enabled"
NetworkKubeSpanDoc.Fields[0].Type = "bool"
NetworkKubeSpanDoc.Fields[0].Note = ""
NetworkKubeSpanDoc.Fields[0].Description = "Enable the KubeSpan feature.\nCluster discovery should be enabled with .cluster.discovery.enabled for KubeSpan to be enabled."
NetworkKubeSpanDoc.Fields[0].Comments[encoder.LineComment] = "Enable the KubeSpan feature."
NetworkKubeSpanDoc.Fields[1].Name = "allowDownPeerBypass"
NetworkKubeSpanDoc.Fields[1].Type = "bool"
NetworkKubeSpanDoc.Fields[1].Note = ""
NetworkKubeSpanDoc.Fields[1].Description = "Skip sending traffic via KubeSpan if the peer connection state is not up.\nThis provides configurable choice between connectivity and security: either traffic is always\nforced to go via KubeSpan (even if Wireguard peer connection is not up), or traffic can go directly\nto the peer if Wireguard connection can't be established."
NetworkKubeSpanDoc.Fields[1].Comments[encoder.LineComment] = "Skip sending traffic via KubeSpan if the peer connection state is not up."
ClusterDiscoveryConfigDoc.Type = "ClusterDiscoveryConfig"
ClusterDiscoveryConfigDoc.Comments[encoder.LineComment] = "ClusterDiscoveryConfig struct configures cluster membership discovery."

View File

@ -30,6 +30,8 @@ type ConfigSpec struct {
Enabled bool `yaml:"enabled"`
ClusterID string `yaml:"clusterId"`
SharedSecret string `yaml:"sharedSecret"`
// Force routing via KubeSpan even if the peer connection is not up.
ForceRouting bool `yaml:"forceRouting"`
}
// NewConfig initializes a Config resource.

View File

@ -1122,6 +1122,7 @@ talosctl gen config <cluster name> <cluster endpoint> [flags]
--version string the desired machine config version to generate (default "v1alpha1")
--with-docs renders all machine configs adding the documentation for each field (default true)
--with-examples renders all machine configs with the commented examples (default true)
--with-kubespan enable KubeSpan feature
```
### Options inherited from parent commands

View File

@ -5262,6 +5262,21 @@ Cluster discovery should be enabled with .cluster.discovery.enabled for KubeSpan
</div>
<hr />
<div class="dd">
<code>allowDownPeerBypass</code> <i>bool</i>
</div>
<div class="dt">
Skip sending traffic via KubeSpan if the peer connection state is not up.
This provides configurable choice between connectivity and security: either traffic is always
forced to go via KubeSpan (even if Wireguard peer connection is not up), or traffic can go directly
to the peer if Wireguard connection can't be established.
</div>
<hr />