feat: reset with system disk wipe spec

Idea is to add an option to perform "selective" reset: default reset
operation is to wipe all partitions (triggering reinstall), while spec
allows only to wipe some of the operations.

Other operations are performed exactly in the same way for any reset
flow.

Possible use case: reset only `EPHEMERAL` partition.

Signed-off-by: Andrey Smirnov <smirnov.andrey@gmail.com>
This commit is contained in:
Andrey Smirnov 2020-12-09 15:49:46 +03:00 committed by talos-bot
parent c5ffe9f4f7
commit 54ed80e244
17 changed files with 2665 additions and 2118 deletions

View File

@ -141,9 +141,20 @@ message Event {
}
// rpc reset
message ResetPartitionSpec {
string label = 1;
bool wipe = 2;
}
message ResetRequest {
// Graceful indicates whether node should leave etcd before the upgrade, it also
// enforces etcd checks before leaving.
bool graceful = 1;
// Reboot indicates whether node should reboot or halt after resetting.
bool reboot = 2;
// System_partitions_to_wipe lists specific system disk partitions to be reset (wiped).
// If system_partitions_to_wipe is empty, all the partitions are erased.
repeated ResetPartitionSpec system_partitions_to_wipe = 3;
}
// The reset message containing the restart status.

View File

@ -0,0 +1,38 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
package install
// PartitionType in partition table.
type PartitionType = string
// GPT partition types.
//
// TODO: should be moved into the blockdevice library.
const (
EFISystemPartition PartitionType = "C12A7328-F81F-11D2-BA4B-00A0C93EC93B"
BIOSBootPartition PartitionType = "21686148-6449-6E6F-744E-656564454649"
LinuxFilesystemData PartitionType = "0FC63DAF-8483-4772-8E79-3D69D8477DE4"
)
// FileSystemType is used to format partitions.
type FileSystemType = string
// Filesystem types.
const (
FilesystemTypeNone FileSystemType = "none"
FilesystemTypeXFS FileSystemType = "xfs"
FilesystemTypeVFAT FileSystemType = "vfat"
)
// Partition default sizes.
const (
MiB = 1024 * 1024
EFISize = 100 * MiB
BIOSGrubSize = 1 * MiB
BootSize = 300 * MiB
MetaSize = 1 * MiB
StateSize = 100 * MiB
)

View File

@ -6,12 +6,8 @@ package install
import (
"bufio"
"bytes"
"compress/gzip"
"context"
"errors"
"fmt"
"io"
"log"
"os"
"path/filepath"
@ -22,14 +18,11 @@ import (
"github.com/talos-systems/go-blockdevice/blockdevice/partition/gpt"
"github.com/talos-systems/go-blockdevice/blockdevice/util"
"github.com/talos-systems/go-retry/retry"
"golang.org/x/sys/unix"
"github.com/talos-systems/talos/internal/app/machined/pkg/runtime"
"github.com/talos-systems/talos/internal/app/machined/pkg/runtime/v1alpha1/board"
"github.com/talos-systems/talos/internal/pkg/mount"
"github.com/talos-systems/talos/pkg/archiver"
"github.com/talos-systems/talos/pkg/machinery/constants"
"github.com/talos-systems/talos/pkg/makefs"
)
// Manifest represents the instructions for preparing all block devices
@ -48,85 +41,6 @@ type Device struct {
Zero bool
}
// Target represents an installation partition.
//
//nolint: golint, maligned
type Target struct {
Device string
Label string
PartitionType PartitionType
FileSystemType FileSystemType
LegacyBIOSBootable bool
Size uint64
Force bool
Assets []*Asset
// Preserve contents of the partition with the same label (if it exists).
PreserveContents bool
// Extra preserved locations (for upgrading from older versions of Talos).
//
// Used only if PreserveContents is true.
ExtraPreserveSources []PreserveSource
// Skip makes manifest skip any actions with the partition (creating, formatting).
//
// Skipped partitions should exist on the disk by the time manifest execution starts.
Skip bool
// set during execution
PartitionName string
Contents *bytes.Buffer
}
// Asset represents a file required by a target.
type Asset struct {
Source string
Destination string
}
// PreserveSource instructs Talos where to look for source files to preserve.
type PreserveSource struct {
Label string
FnmatchFilters []string
FileSystemType FileSystemType
}
// PartitionType in partition table.
type PartitionType = string
// GPT partition types.
//
// TODO: should be moved into the blockdevice library.
const (
EFISystemPartition PartitionType = "C12A7328-F81F-11D2-BA4B-00A0C93EC93B"
BIOSBootPartition PartitionType = "21686148-6449-6E6F-744E-656564454649"
LinuxFilesystemData PartitionType = "0FC63DAF-8483-4772-8E79-3D69D8477DE4"
)
// FileSystemType is used to format partitions.
type FileSystemType = string
// Filesystem types.
const (
FilesystemTypeNone FileSystemType = "none"
FilesystemTypeXFS FileSystemType = "xfs"
FilesystemTypeVFAT FileSystemType = "vfat"
)
// Partition default sizes.
const (
MiB = 1024 * 1024
EFISize = 100 * MiB
BIOSGrubSize = 1 * MiB
BootSize = 300 * MiB
MetaSize = 1 * MiB
StateSize = 100 * MiB
)
// NewManifest initializes and returns a Manifest.
//
//nolint: gocyclo
@ -190,35 +104,13 @@ func NewManifest(label string, sequence runtime.Sequence, bootPartitionFound boo
manifest.Targets[opts.Disk] = []*Target{}
}
efiTarget := &Target{
Device: opts.Disk,
Label: constants.EFIPartitionLabel,
PartitionType: EFISystemPartition,
FileSystemType: FilesystemTypeVFAT,
Size: EFISize,
Force: true,
}
biosTarget := &Target{
Device: opts.Disk,
Label: constants.BIOSGrubPartitionLabel,
PartitionType: BIOSBootPartition,
FileSystemType: FilesystemTypeNone,
LegacyBIOSBootable: true,
Size: BIOSGrubSize,
Force: true,
}
efiTarget := EFITarget(opts.Disk, nil)
biosTarget := BIOSTarget(opts.Disk, nil)
var bootTarget *Target
if opts.Bootloader {
bootTarget = &Target{
Device: opts.Disk,
Label: constants.BootPartitionLabel,
PartitionType: LinuxFilesystemData,
FileSystemType: FilesystemTypeXFS,
Size: BootSize,
Force: true,
bootTarget = BootTarget(opts.Disk, &Target{
PreserveContents: bootPartitionFound,
Assets: []*Asset{
{
@ -230,26 +122,14 @@ func NewManifest(label string, sequence runtime.Sequence, bootPartitionFound boo
Destination: filepath.Join(constants.BootMountPoint, label, constants.InitramfsAsset),
},
},
}
})
}
metaTarget := &Target{
Device: opts.Disk,
Label: constants.MetaPartitionLabel,
PartitionType: LinuxFilesystemData,
FileSystemType: FilesystemTypeNone,
Size: MetaSize,
Force: true,
metaTarget := MetaTarget(opts.Disk, &Target{
PreserveContents: bootPartitionFound,
}
})
stateTarget := &Target{
Device: opts.Disk,
Label: constants.StatePartitionLabel,
PartitionType: LinuxFilesystemData,
FileSystemType: FilesystemTypeXFS,
Size: StateSize,
Force: true,
stateTarget := StateTarget(opts.Disk, &Target{
PreserveContents: bootPartitionFound,
ExtraPreserveSources: []PreserveSource{
{
@ -258,19 +138,14 @@ func NewManifest(label string, sequence runtime.Sequence, bootPartitionFound boo
FnmatchFilters: []string{"config.yaml"},
},
},
}
})
ephemeralTarget := &Target{
Device: opts.Disk,
Label: constants.EphemeralPartitionLabel,
PartitionType: LinuxFilesystemData,
FileSystemType: FilesystemTypeXFS,
Size: 0,
}
ephemeralTarget := EphemeralTarget(opts.Disk, nil)
if opts.Force {
ephemeralTarget.Force = true
} else {
ephemeralTarget.Force = false
ephemeralTarget.Skip = true
stateTarget.Size = 0 // expand previous partition to cover whatever space is available
}
@ -640,17 +515,15 @@ func (m *Manifest) zeroDevice(device Device) (err error) {
// nolint: dupl, gocyclo
func (t *Target) Partition(pt *gpt.GPT, pos int, bd *blockdevice.BlockDevice) (err error) {
if t.Skip {
for _, part := range pt.Partitions().Items() {
if part.Name == t.Label {
t.PartitionName, err = util.PartPath(t.Device, int(part.Number))
if err != nil {
return err
}
var part *gpt.Partition
log.Printf("skipped %s (%s) size %d blocks", t.PartitionName, t.Label, part.Length())
part, err = t.Locate(pt)
if err != nil {
return err
}
break
}
if part != nil {
log.Printf("skipped %s (%s) size %d blocks", t.PartitionName, t.Label, part.Length())
}
return nil
@ -685,237 +558,3 @@ func (t *Target) Partition(pt *gpt.GPT, pos int, bd *blockdevice.BlockDevice) (e
return nil
}
// Format creates a filesystem on the device/partition.
//
//nolint: gocyclo
func (t *Target) Format() error {
if t.Skip {
return nil
}
if t.FileSystemType == FilesystemTypeNone {
return t.zeroPartition()
}
log.Printf("formatting partition %q as %q with label %q\n", t.PartitionName, t.FileSystemType, t.Label)
opts := []makefs.Option{makefs.WithForce(t.Force), makefs.WithLabel(t.Label)}
switch t.FileSystemType {
case FilesystemTypeVFAT:
return makefs.VFAT(t.PartitionName, opts...)
case FilesystemTypeXFS:
return makefs.XFS(t.PartitionName, opts...)
default:
return fmt.Errorf("unsupported filesystem type: %q", t.FileSystemType)
}
}
// Save copies the assets to the bootloader partition.
func (t *Target) Save() (err error) {
for _, asset := range t.Assets {
asset := asset
err = func() error {
var (
sourceFile *os.File
destFile *os.File
)
if sourceFile, err = os.Open(asset.Source); err != nil {
return err
}
// nolint: errcheck
defer sourceFile.Close()
if err = os.MkdirAll(filepath.Dir(asset.Destination), os.ModeDir); err != nil {
return err
}
if destFile, err = os.Create(asset.Destination); err != nil {
return err
}
// nolint: errcheck
defer destFile.Close()
log.Printf("copying %s to %s\n", sourceFile.Name(), destFile.Name())
if _, err = io.Copy(destFile, sourceFile); err != nil {
log.Printf("failed to copy %s to %s\n", sourceFile.Name(), destFile.Name())
return err
}
if err = destFile.Close(); err != nil {
log.Printf("failed to close %s", destFile.Name())
return err
}
if err = sourceFile.Close(); err != nil {
log.Printf("failed to close %s", sourceFile.Name())
return err
}
return nil
}()
if err != nil {
return err
}
}
return nil
}
func withTemporaryMounted(partPath string, flags uintptr, fileSystemType FileSystemType, label string, f func(mountPath string) error) error {
mountPath := filepath.Join(constants.SystemPath, "mnt")
mountpoints := mount.NewMountPoints()
mountpoint := mount.NewMountPoint(partPath, mountPath, fileSystemType, unix.MS_NOATIME|flags, "")
mountpoints.Set(label, mountpoint)
if err := mount.Mount(mountpoints); err != nil {
return fmt.Errorf("failed to mount %q: %w", partPath, err)
}
defer func() {
if err := mount.Unmount(mountpoints); err != nil {
log.Printf("failed to unmount: %s", err)
}
}()
return f(mountPath)
}
// SaveContents saves contents of partition to the target (in-memory).
func (t *Target) SaveContents(device Device, source *gpt.Partition, fileSystemType FileSystemType, fnmatchFilters []string) error {
partPath, err := util.PartPath(device.Device, int(source.Number))
if err != nil {
return err
}
if fileSystemType == FilesystemTypeNone {
err = t.saveRawContents(partPath)
} else {
err = t.saveFilesystemContents(partPath, fileSystemType, fnmatchFilters)
}
if err != nil {
t.Contents = nil
return err
}
log.Printf("preserved contents of %q: %d bytes", t.Label, t.Contents.Len())
return nil
}
func (t *Target) saveRawContents(partPath string) error {
src, err := os.Open(partPath)
if err != nil {
return fmt.Errorf("error opening source partition: %q", err)
}
defer src.Close() //nolint: errcheck
t.Contents = bytes.NewBuffer(nil)
zw := gzip.NewWriter(t.Contents)
defer zw.Close() //nolint: errcheck
_, err = io.Copy(zw, src)
if err != nil {
return fmt.Errorf("error copying partition %q contents: %w", partPath, err)
}
return src.Close()
}
func (t *Target) saveFilesystemContents(partPath string, fileSystemType FileSystemType, fnmatchFilters []string) error {
t.Contents = bytes.NewBuffer(nil)
return withTemporaryMounted(partPath, unix.MS_RDONLY, fileSystemType, t.Label, func(mountPath string) error {
return archiver.TarGz(context.TODO(), mountPath, t.Contents, archiver.WithFnmatchPatterns(fnmatchFilters...))
})
}
// RestoreContents restores previously saved contents to the disk.
func (t *Target) RestoreContents() error {
if t.Contents == nil {
return nil
}
var err error
if t.FileSystemType == FilesystemTypeNone {
err = t.restoreRawContents()
} else {
err = t.restoreFilesystemContents()
}
t.Contents = nil
if err != nil {
return err
}
log.Printf("restored contents of %q", t.Label)
return nil
}
func (t *Target) restoreRawContents() error {
dst, err := os.OpenFile(t.PartitionName, os.O_WRONLY, 0)
if err != nil {
return fmt.Errorf("error opening source partition: %q", err)
}
defer dst.Close() //nolint: errcheck
zr, err := gzip.NewReader(t.Contents)
if err != nil {
return err
}
_, err = io.Copy(dst, zr)
if err != nil {
return fmt.Errorf("error restoring partition %q contents: %w", t.PartitionName, err)
}
return dst.Close()
}
func (t *Target) restoreFilesystemContents() error {
return withTemporaryMounted(t.PartitionName, 0, t.FileSystemType, t.Label, func(mountPath string) error {
return archiver.UntarGz(context.TODO(), t.Contents, mountPath)
})
}
// zeroPartition fills the partition with zeroes.
func (t *Target) zeroPartition() (err error) {
log.Printf("zeroing out %q", t.PartitionName)
zeroes, err := os.Open("/dev/zero")
if err != nil {
return err
}
defer zeroes.Close() //nolint: errcheck
part, err := os.OpenFile(t.PartitionName, os.O_WRONLY, 0)
if err != nil {
return err
}
defer part.Close() //nolint: errcheck
_, err = io.CopyN(part, zeroes, int64(t.Size))
return err
}

View File

@ -109,7 +109,7 @@ func (suite *manifestSuite) verifyBlockdevice(manifest *install.Manifest, curren
// verify partition table
suite.Assert().Len(table.Partitions(), 6)
suite.Assert().Len(table.Partitions().Items(), 6)
part := table.Partitions().Items()[0]
suite.Assert().Equal(install.EFISystemPartition, strings.ToUpper(part.Type.String()))
@ -244,6 +244,7 @@ func (suite *manifestSuite) TestExecuteManifestClean() {
Disk: suite.loopbackDevice.Name(),
Bootloader: true,
Force: true,
Board: constants.BoardNone,
})
suite.Require().NoError(err)
@ -259,6 +260,7 @@ func (suite *manifestSuite) TestExecuteManifestForce() {
Disk: suite.loopbackDevice.Name(),
Bootloader: true,
Force: true,
Board: constants.BoardNone,
})
suite.Require().NoError(err)
@ -273,6 +275,7 @@ func (suite *manifestSuite) TestExecuteManifestForce() {
Bootloader: true,
Force: true,
Zero: true,
Board: constants.BoardNone,
})
suite.Require().NoError(err)
@ -288,6 +291,7 @@ func (suite *manifestSuite) TestExecuteManifestPreserve() {
Disk: suite.loopbackDevice.Name(),
Bootloader: true,
Force: true,
Board: constants.BoardNone,
})
suite.Require().NoError(err)
@ -301,6 +305,7 @@ func (suite *manifestSuite) TestExecuteManifestPreserve() {
Disk: suite.loopbackDevice.Name(),
Bootloader: true,
Force: false,
Board: constants.BoardNone,
})
suite.Require().NoError(err)
@ -320,6 +325,7 @@ func (suite *manifestSuite) TestExecuteManifestLegacyForce() {
Disk: suite.loopbackDevice.Name(),
Bootloader: true,
Force: true,
Board: constants.BoardNone,
})
suite.Require().NoError(err)
@ -339,6 +345,7 @@ func (suite *manifestSuite) TestExecuteManifestLegacyPreserve() {
Disk: suite.loopbackDevice.Name(),
Bootloader: true,
Force: false,
Board: constants.BoardNone,
})
suite.Require().NoError(err)
@ -386,15 +393,10 @@ func (suite *manifestSuite) createTalosLegacyLayout() {
bd, err := blockdevice.Open(suite.loopbackDevice.Name())
suite.Require().NoError(err)
_, err = gpt.New(bd.Device())
suite.Require().NoError(err)
suite.Require().NoError(err)
defer bd.Close() //nolint: errcheck
// create Talos 0.6 partitions
table, err := bd.PartitionTable()
table, err := gpt.New(bd.Device())
suite.Require().NoError(err)
partBoot, err := table.Add(512*install.MiB,

View File

@ -0,0 +1,425 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
package install
import (
"bytes"
"compress/gzip"
"context"
"fmt"
"io"
"log"
"os"
"path/filepath"
"github.com/talos-systems/go-blockdevice/blockdevice/partition/gpt"
"github.com/talos-systems/go-blockdevice/blockdevice/util"
"golang.org/x/sys/unix"
"github.com/talos-systems/talos/internal/pkg/mount"
"github.com/talos-systems/talos/pkg/archiver"
"github.com/talos-systems/talos/pkg/machinery/constants"
"github.com/talos-systems/talos/pkg/makefs"
)
// Target represents an installation partition.
//
//nolint: golint, maligned
type Target struct {
Device string
Label string
PartitionType PartitionType
FileSystemType FileSystemType
LegacyBIOSBootable bool
Size uint64
Force bool
Assets []*Asset
// Preserve contents of the partition with the same label (if it exists).
PreserveContents bool
// Extra preserved locations (for upgrading from older versions of Talos).
//
// Used only if PreserveContents is true.
ExtraPreserveSources []PreserveSource
// Skip makes manifest skip any actions with the partition (creating, formatting).
//
// Skipped partitions should exist on the disk by the time manifest execution starts.
Skip bool
// set during execution
PartitionName string
Contents *bytes.Buffer
}
// Asset represents a file required by a target.
type Asset struct {
Source string
Destination string
}
// PreserveSource instructs Talos where to look for source files to preserve.
type PreserveSource struct {
Label string
FnmatchFilters []string
FileSystemType FileSystemType
}
// EFITarget builds the default EFI target.
func EFITarget(device string, extra *Target) *Target {
target := &Target{
Device: device,
Label: constants.EFIPartitionLabel,
PartitionType: EFISystemPartition,
FileSystemType: FilesystemTypeVFAT,
Size: EFISize,
Force: true,
}
return target.enhance(extra)
}
// BIOSTarget builds the default BIOS target.
func BIOSTarget(device string, extra *Target) *Target {
target := &Target{
Device: device,
Label: constants.BIOSGrubPartitionLabel,
PartitionType: BIOSBootPartition,
FileSystemType: FilesystemTypeNone,
LegacyBIOSBootable: true,
Size: BIOSGrubSize,
Force: true,
}
return target.enhance(extra)
}
// BootTarget builds the default boot target.
func BootTarget(device string, extra *Target) *Target {
target := &Target{
Device: device,
Label: constants.BootPartitionLabel,
PartitionType: LinuxFilesystemData,
FileSystemType: FilesystemTypeXFS,
Size: BootSize,
Force: true,
}
return target.enhance(extra)
}
// MetaTarget builds the default meta target.
func MetaTarget(device string, extra *Target) *Target {
target := &Target{
Device: device,
Label: constants.MetaPartitionLabel,
PartitionType: LinuxFilesystemData,
FileSystemType: FilesystemTypeNone,
Size: MetaSize,
Force: true,
}
return target.enhance(extra)
}
// StateTarget builds the default state target.
func StateTarget(device string, extra *Target) *Target {
target := &Target{
Device: device,
Label: constants.StatePartitionLabel,
PartitionType: LinuxFilesystemData,
FileSystemType: FilesystemTypeXFS,
Size: StateSize,
Force: true,
}
return target.enhance(extra)
}
// EphemeralTarget builds the default ephemeral target.
func EphemeralTarget(device string, extra *Target) *Target {
target := &Target{
Device: device,
Label: constants.EphemeralPartitionLabel,
PartitionType: LinuxFilesystemData,
FileSystemType: FilesystemTypeXFS,
Size: 0,
Force: true,
}
return target.enhance(extra)
}
func (t *Target) enhance(extra *Target) *Target {
if extra == nil {
return t
}
t.Assets = extra.Assets
t.PreserveContents = extra.PreserveContents
t.ExtraPreserveSources = extra.ExtraPreserveSources
t.Skip = extra.Skip
return t
}
func (t *Target) String() string {
return fmt.Sprintf("%s (%q)", t.PartitionName, t.Label)
}
// Locate existing partition on the disk.
func (t *Target) Locate(pt *gpt.GPT) (*gpt.Partition, error) {
for _, part := range pt.Partitions().Items() {
if part.Name == t.Label {
var err error
t.PartitionName, err = util.PartPath(t.Device, int(part.Number))
if err != nil {
return part, err
}
return part, nil
}
}
return nil, nil
}
// Format creates a filesystem on the device/partition.
//
//nolint: gocyclo
func (t *Target) Format() error {
if t.Skip {
return nil
}
if t.FileSystemType == FilesystemTypeNone {
return t.zeroPartition()
}
log.Printf("formatting partition %q as %q with label %q\n", t.PartitionName, t.FileSystemType, t.Label)
opts := []makefs.Option{makefs.WithForce(t.Force), makefs.WithLabel(t.Label)}
switch t.FileSystemType {
case FilesystemTypeVFAT:
return makefs.VFAT(t.PartitionName, opts...)
case FilesystemTypeXFS:
return makefs.XFS(t.PartitionName, opts...)
default:
return fmt.Errorf("unsupported filesystem type: %q", t.FileSystemType)
}
}
// Save copies the assets to the bootloader partition.
func (t *Target) Save() (err error) {
for _, asset := range t.Assets {
asset := asset
err = func() error {
var (
sourceFile *os.File
destFile *os.File
)
if sourceFile, err = os.Open(asset.Source); err != nil {
return err
}
// nolint: errcheck
defer sourceFile.Close()
if err = os.MkdirAll(filepath.Dir(asset.Destination), os.ModeDir); err != nil {
return err
}
if destFile, err = os.Create(asset.Destination); err != nil {
return err
}
// nolint: errcheck
defer destFile.Close()
log.Printf("copying %s to %s\n", sourceFile.Name(), destFile.Name())
if _, err = io.Copy(destFile, sourceFile); err != nil {
log.Printf("failed to copy %s to %s\n", sourceFile.Name(), destFile.Name())
return err
}
if err = destFile.Close(); err != nil {
log.Printf("failed to close %s", destFile.Name())
return err
}
if err = sourceFile.Close(); err != nil {
log.Printf("failed to close %s", sourceFile.Name())
return err
}
return nil
}()
if err != nil {
return err
}
}
return nil
}
func withTemporaryMounted(partPath string, flags uintptr, fileSystemType FileSystemType, label string, f func(mountPath string) error) error {
mountPath := filepath.Join(constants.SystemPath, "mnt")
mountpoints := mount.NewMountPoints()
mountpoint := mount.NewMountPoint(partPath, mountPath, fileSystemType, unix.MS_NOATIME|flags, "")
mountpoints.Set(label, mountpoint)
if err := mount.Mount(mountpoints); err != nil {
return fmt.Errorf("failed to mount %q: %w", partPath, err)
}
defer func() {
if err := mount.Unmount(mountpoints); err != nil {
log.Printf("failed to unmount: %s", err)
}
}()
return f(mountPath)
}
// SaveContents saves contents of partition to the target (in-memory).
func (t *Target) SaveContents(device Device, source *gpt.Partition, fileSystemType FileSystemType, fnmatchFilters []string) error {
partPath, err := util.PartPath(device.Device, int(source.Number))
if err != nil {
return err
}
if fileSystemType == FilesystemTypeNone {
err = t.saveRawContents(partPath)
} else {
err = t.saveFilesystemContents(partPath, fileSystemType, fnmatchFilters)
}
if err != nil {
t.Contents = nil
return err
}
log.Printf("preserved contents of %q: %d bytes", t.Label, t.Contents.Len())
return nil
}
func (t *Target) saveRawContents(partPath string) error {
src, err := os.Open(partPath)
if err != nil {
return fmt.Errorf("error opening source partition: %q", err)
}
defer src.Close() //nolint: errcheck
t.Contents = bytes.NewBuffer(nil)
zw := gzip.NewWriter(t.Contents)
defer zw.Close() //nolint: errcheck
_, err = io.Copy(zw, src)
if err != nil {
return fmt.Errorf("error copying partition %q contents: %w", partPath, err)
}
return src.Close()
}
func (t *Target) saveFilesystemContents(partPath string, fileSystemType FileSystemType, fnmatchFilters []string) error {
t.Contents = bytes.NewBuffer(nil)
return withTemporaryMounted(partPath, unix.MS_RDONLY, fileSystemType, t.Label, func(mountPath string) error {
return archiver.TarGz(context.TODO(), mountPath, t.Contents, archiver.WithFnmatchPatterns(fnmatchFilters...))
})
}
// RestoreContents restores previously saved contents to the disk.
func (t *Target) RestoreContents() error {
if t.Contents == nil {
return nil
}
var err error
if t.FileSystemType == FilesystemTypeNone {
err = t.restoreRawContents()
} else {
err = t.restoreFilesystemContents()
}
t.Contents = nil
if err != nil {
return err
}
log.Printf("restored contents of %q", t.Label)
return nil
}
func (t *Target) restoreRawContents() error {
dst, err := os.OpenFile(t.PartitionName, os.O_WRONLY, 0)
if err != nil {
return fmt.Errorf("error opening source partition: %q", err)
}
defer dst.Close() //nolint: errcheck
zr, err := gzip.NewReader(t.Contents)
if err != nil {
return err
}
_, err = io.Copy(dst, zr)
if err != nil {
return fmt.Errorf("error restoring partition %q contents: %w", t.PartitionName, err)
}
return dst.Close()
}
func (t *Target) restoreFilesystemContents() error {
return withTemporaryMounted(t.PartitionName, 0, t.FileSystemType, t.Label, func(mountPath string) error {
return archiver.UntarGz(context.TODO(), t.Contents, mountPath)
})
}
// zeroPartition fills the partition with zeroes.
func (t *Target) zeroPartition() (err error) {
log.Printf("zeroing out %q", t.PartitionName)
zeroes, err := os.Open("/dev/zero")
if err != nil {
return err
}
defer zeroes.Close() //nolint: errcheck
part, err := os.OpenFile(t.PartitionName, os.O_WRONLY, 0)
if err != nil {
return err
}
defer part.Close() //nolint: errcheck
_, err = io.CopyN(part, zeroes, int64(t.Size))
return err
}

View File

@ -10,13 +10,15 @@ import (
"github.com/spf13/cobra"
"github.com/talos-systems/talos/pkg/machinery/api/machine"
"github.com/talos-systems/talos/pkg/machinery/client"
)
var (
graceful bool
reboot bool
)
var resetCmdFlags struct {
graceful bool
reboot bool
systemLabelsToWipe []string
}
// resetCmd represents the reset command.
var resetCmd = &cobra.Command{
@ -26,7 +28,20 @@ var resetCmd = &cobra.Command{
Args: cobra.NoArgs,
RunE: func(cmd *cobra.Command, args []string) error {
return WithClient(func(ctx context.Context, c *client.Client) error {
if err := c.Reset(ctx, graceful, reboot); err != nil {
var systemPartitionsToWipe []*machine.ResetPartitionSpec
for _, label := range resetCmdFlags.systemLabelsToWipe {
systemPartitionsToWipe = append(systemPartitionsToWipe, &machine.ResetPartitionSpec{
Label: label,
Wipe: true,
})
}
if err := c.ResetGeneric(ctx, &machine.ResetRequest{
Graceful: resetCmdFlags.graceful,
Reboot: resetCmdFlags.reboot,
SystemPartitionsToWipe: systemPartitionsToWipe,
}); err != nil {
return fmt.Errorf("error executing reset: %s", err)
}
@ -36,7 +51,8 @@ var resetCmd = &cobra.Command{
}
func init() {
resetCmd.Flags().BoolVar(&graceful, "graceful", true, "if true, attempt to cordon/drain node and leave etcd (if applicable)")
resetCmd.Flags().BoolVar(&reboot, "reboot", false, "if true, reboot the node after resetting instead of shutting down")
resetCmd.Flags().BoolVar(&resetCmdFlags.graceful, "graceful", true, "if true, attempt to cordon/drain node and leave etcd (if applicable)")
resetCmd.Flags().BoolVar(&resetCmdFlags.reboot, "reboot", false, "if true, reboot the node after resetting instead of shutting down")
resetCmd.Flags().StringSliceVar(&resetCmdFlags.systemLabelsToWipe, "system-labels-to-wipe", nil, "if set, just wipe selected system disk partitions by label but keep other partitions intact")
addCommand(resetCmd)
}

View File

@ -31,10 +31,12 @@ import (
multierror "github.com/hashicorp/go-multierror"
"github.com/prometheus/procfs"
"github.com/rs/xid"
"github.com/talos-systems/go-blockdevice/blockdevice/partition/gpt"
"go.etcd.io/etcd/clientv3/concurrency"
"golang.org/x/sys/unix"
"google.golang.org/grpc"
installer "github.com/talos-systems/talos/cmd/installer/pkg/install"
"github.com/talos-systems/talos/internal/app/machined/internal/install"
"github.com/talos-systems/talos/internal/app/machined/pkg/runtime"
"github.com/talos-systems/talos/internal/app/machined/pkg/runtime/v1alpha1/bootloader"
@ -422,14 +424,81 @@ func (s *Server) Upgrade(ctx context.Context, in *machine.UpgradeRequest) (reply
return reply, nil
}
// ResetOptions implements runtime.ResetOptions interface.
type ResetOptions struct {
*machine.ResetRequest
systemDiskTargets []*installer.Target
}
// GetSystemDiskTargets implements runtime.ResetOptions interface.
func (opt *ResetOptions) GetSystemDiskTargets() []runtime.PartitionTarget {
if opt.systemDiskTargets == nil {
return nil
}
result := make([]runtime.PartitionTarget, len(opt.systemDiskTargets))
for i := range result {
result[i] = opt.systemDiskTargets[i]
}
return result
}
// Reset resets the node.
//
// nolint: dupl
// nolint: dupl, gocyclo
func (s *Server) Reset(ctx context.Context, in *machine.ResetRequest) (reply *machine.ResetResponse, err error) {
log.Printf("reset request received")
opts := ResetOptions{
ResetRequest: in,
}
if len(in.GetSystemPartitionsToWipe()) > 0 {
bd := s.Controller.Runtime().State().Machine().Disk().BlockDevice
var pt *gpt.GPT
pt, err = bd.PartitionTable()
if err != nil {
return nil, fmt.Errorf("error reading partition table: %w", err)
}
for _, spec := range in.GetSystemPartitionsToWipe() {
var target *installer.Target
switch spec.Label {
case constants.EFIPartitionLabel:
target = installer.EFITarget(bd.Device().Name(), nil)
case constants.BIOSGrubPartitionLabel:
target = installer.BIOSTarget(bd.Device().Name(), nil)
case constants.BootPartitionLabel:
target = installer.BootTarget(bd.Device().Name(), nil)
case constants.MetaPartitionLabel:
target = installer.MetaTarget(bd.Device().Name(), nil)
case constants.StatePartitionLabel:
target = installer.StateTarget(bd.Device().Name(), nil)
case constants.EphemeralPartitionLabel:
target = installer.EphemeralTarget(bd.Device().Name(), nil)
default:
return nil, fmt.Errorf("label %q is not supported", spec.Label)
}
_, err = target.Locate(pt)
if err != nil {
return nil, fmt.Errorf("failed location partition with label %q: %w", spec.Label, err)
}
if spec.Wipe {
opts.systemDiskTargets = append(opts.systemDiskTargets, target)
}
}
}
go func() {
if err := s.Controller.Run(runtime.SequenceReset, in); err != nil {
if err := s.Controller.Run(runtime.SequenceReset, &opts); err != nil {
log.Println("reset failed:", err)
if err != runtime.ErrLocked {

View File

@ -91,6 +91,19 @@ func ParseSequence(s string) (seq Sequence, err error) {
return seq, nil
}
// ResetOptions are parameters to Reset sequence.
type ResetOptions interface {
GetGraceful() bool
GetReboot() bool
GetSystemDiskTargets() []PartitionTarget
}
// PartitionTarget provides interface to the disk partition.
type PartitionTarget interface {
fmt.Stringer
Format() error
}
// Sequencer describes the set of sequences required for the lifecycle
// management of the operating system.
type Sequencer interface {
@ -101,7 +114,7 @@ type Sequencer interface {
Install(Runtime) []Phase
Reboot(Runtime) []Phase
Recover(Runtime, *machine.RecoverRequest) []Phase
Reset(Runtime, *machine.ResetRequest) []Phase
Reset(Runtime, ResetOptions) []Phase
Shutdown(Runtime) []Phase
Upgrade(Runtime, *machine.UpgradeRequest) []Phase
}

View File

@ -385,11 +385,11 @@ func (c *Controller) phases(seq runtime.Sequence, data interface{}) ([]runtime.P
phases = c.s.Upgrade(c.r, in)
case runtime.SequenceReset:
var (
in *machine.ResetRequest
in runtime.ResetOptions
ok bool
)
if in, ok = data.(*machine.ResetRequest); !ok {
if in, ok = data.(runtime.ResetOptions); !ok {
return nil, runtime.ErrInvalidSequenceData
}

View File

@ -301,7 +301,7 @@ func (*Sequencer) Recover(r runtime.Runtime, in *machineapi.RecoverRequest) []ru
}
// Reset is the reset sequence.
func (*Sequencer) Reset(r runtime.Runtime, in *machineapi.ResetRequest) []runtime.Phase {
func (*Sequencer) Reset(r runtime.Runtime, in runtime.ResetOptions) []runtime.Phase {
phases := PhaseList{}
switch r.State().Platform().Mode() { //nolint: exhaustive
@ -324,13 +324,28 @@ func (*Sequencer) Reset(r runtime.Runtime, in *machineapi.ResetRequest) []runtim
in.GetGraceful(),
"cleanup",
RemoveAllPods,
).AppendList(stopAllPhaselist(r)).
Append(
"reset",
ResetSystemDisk,
).Append(
).AppendWhen(
!in.GetGraceful(),
"cleanup",
StopAllPods,
).AppendList(
stopAllPhaselist(r),
).AppendWhen(
len(in.GetSystemDiskTargets()) == 0,
"reset",
ResetSystemDisk,
).AppendWhen(
len(in.GetSystemDiskTargets()) > 0,
"resetSpec",
ResetSystemDiskSpec,
).AppendWhen(
in.GetReboot(),
"reboot",
Reboot,
).AppendWhen(
!in.GetReboot(),
"shutdown",
Shutdown,
)
}

View File

@ -1296,6 +1296,26 @@ func ResetSystemDisk(seq runtime.Sequence, data interface{}) (runtime.TaskExecut
}, "resetSystemDisk"
}
// ResetSystemDiskSpec represents the task to reset the system disk by spec.
func ResetSystemDiskSpec(seq runtime.Sequence, data interface{}) (runtime.TaskExecutionFunc, string) {
return func(ctx context.Context, logger *log.Logger, r runtime.Runtime) (err error) {
in, ok := data.(runtime.ResetOptions)
if !ok {
return fmt.Errorf("unexpected runtime data")
}
for _, target := range in.GetSystemDiskTargets() {
if err = target.Format(); err != nil {
return fmt.Errorf("failed wiping partition %s: %w", target, err)
}
}
logger.Printf("successfully reset system disk by the spec")
return nil
}, "resetSystemDiskSpec"
}
// VerifyDiskAvailability represents the task for verifying that the system
// disk is not in use.
func VerifyDiskAvailability(seq runtime.Sequence, data interface{}) (runtime.TaskExecutionFunc, string) {

View File

@ -211,7 +211,7 @@ func TestSequencer_Reboot(t *testing.T) {
func TestSequencer_Reset(t *testing.T) {
type args struct {
r runtime.Runtime
in *machine.ResetRequest
in runtime.ResetOptions
}
tests := []struct {

View File

@ -8,6 +8,9 @@ package api
import (
"context"
"crypto/sha256"
"encoding/hex"
"io"
"sort"
"testing"
"time"
@ -16,7 +19,10 @@ import (
"google.golang.org/grpc/status"
"github.com/talos-systems/talos/internal/integration/base"
machineapi "github.com/talos-systems/talos/pkg/machinery/api/machine"
"github.com/talos-systems/talos/pkg/machinery/client"
"github.com/talos-systems/talos/pkg/machinery/config/types/v1alpha1/machine"
"github.com/talos-systems/talos/pkg/machinery/constants"
)
// ResetSuite ...
@ -49,6 +55,35 @@ func (suite *ResetSuite) TearDownTest() {
}
}
func (suite *ResetSuite) hashKubeletCert(ctx context.Context, node string) (string, error) {
reqCtx, reqCtxCancel := context.WithTimeout(ctx, 10*time.Second)
defer reqCtxCancel()
reqCtx = client.WithNodes(reqCtx, node)
reader, errCh, err := suite.Client.Read(reqCtx, "/var/lib/kubelet/pki/kubelet-client-current.pem")
if err != nil {
return "", err
}
defer reader.Close() //nolint: errcheck
hash := sha256.New()
_, err = io.Copy(hash, reader)
if err != nil {
return "", err
}
for err = range errCh {
if err != nil {
return "", err
}
}
return hex.EncodeToString(hash.Sum(nil)), reader.Close()
}
// TestResetNodeByNode Resets cluster node by node, waiting for health between Resets.
func (suite *ResetSuite) TestResetNodeByNode() {
if !suite.Capabilities().SupportsReboot {
@ -85,12 +120,12 @@ func (suite *ResetSuite) TestResetNodeByNode() {
suite.T().Log("Resetting node", node)
// TODO: there is no good way to assert that node was reset and disk contents were really wiped
preReset, err := suite.hashKubeletCert(suite.ctx, node)
suite.Require().NoError(err)
// uptime should go down after Reset, as it reboots the node
suite.AssertRebooted(suite.ctx, node, func(nodeCtx context.Context) error {
// force reboot after reset, as this is the only mode we can test
err := suite.Client.Reset(nodeCtx, true, true)
err = suite.Client.Reset(nodeCtx, true, true)
if err != nil {
if s, ok := status.FromError(err); ok && s.Code() == codes.Unavailable {
// ignore errors if reboot happens before response is fully received
@ -101,9 +136,151 @@ func (suite *ResetSuite) TestResetNodeByNode() {
return err
}, 10*time.Minute)
postReset, err := suite.hashKubeletCert(suite.ctx, node)
suite.Require().NoError(err)
suite.Assert().NotEqual(preReset, postReset, "reset should lead to new kubelet cert being generated")
}
}
// TestResetNoGraceful resets a worker in !graceful to test the flow.
//
// We can't reset control plane node in !graceful mode as it won't be able to join back the cluster.
func (suite *ResetSuite) TestResetNoGraceful() {
if !suite.Capabilities().SupportsReboot {
suite.T().Skip("cluster doesn't support reboot (and reset)")
}
if suite.Cluster == nil {
suite.T().Skip("without full cluster state reset test is not reliable (can't wait for cluster readiness in between resets)")
}
node := suite.RandomDiscoveredNode(machine.TypeJoin)
suite.T().Log("Resetting node !graceful", node)
preReset, err := suite.hashKubeletCert(suite.ctx, node)
suite.Require().NoError(err)
suite.AssertRebooted(suite.ctx, node, func(nodeCtx context.Context) error {
// force reboot after reset, as this is the only mode we can test
err = suite.Client.Reset(nodeCtx, false, true)
if err != nil {
if s, ok := status.FromError(err); ok && s.Code() == codes.Unavailable {
// ignore errors if reboot happens before response is fully received
err = nil
}
}
return err
}, 5*time.Minute)
postReset, err := suite.hashKubeletCert(suite.ctx, node)
suite.Require().NoError(err)
suite.Assert().NotEqual(preReset, postReset, "reset should lead to new kubelet cert being generated")
}
// TestResetWithSpecEphemeral resets only ephemeral partition on the node.
//
//nolint: dupl
func (suite *ResetSuite) TestResetWithSpecEphemeral() {
if !suite.Capabilities().SupportsReboot {
suite.T().Skip("cluster doesn't support reboot (and reset)")
}
if suite.Cluster == nil {
suite.T().Skip("without full cluster state reset test is not reliable (can't wait for cluster readiness in between resets)")
}
node := suite.RandomDiscoveredNode()
suite.T().Log("Resetting node with spec=[EPHEMERAL]", node)
preReset, err := suite.hashKubeletCert(suite.ctx, node)
suite.Require().NoError(err)
suite.AssertRebooted(suite.ctx, node, func(nodeCtx context.Context) error {
// force reboot after reset, as this is the only mode we can test
err = suite.Client.ResetGeneric(nodeCtx, &machineapi.ResetRequest{
Reboot: true,
Graceful: true,
SystemPartitionsToWipe: []*machineapi.ResetPartitionSpec{
{
Label: constants.EphemeralPartitionLabel,
Wipe: true,
},
},
})
if err != nil {
if s, ok := status.FromError(err); ok && s.Code() == codes.Unavailable {
// ignore errors if reboot happens before response is fully received
err = nil
}
}
return err
}, 5*time.Minute)
postReset, err := suite.hashKubeletCert(suite.ctx, node)
suite.Require().NoError(err)
suite.Assert().NotEqual(preReset, postReset, "reset should lead to new kubelet cert being generated")
}
// TestResetWithSpecState resets only state partition on the node.
//
// As ephemeral partition is not reset, so kubelet cert shouldn't change.
//
//nolint: dupl
func (suite *ResetSuite) TestResetWithSpecState() {
if !suite.Capabilities().SupportsReboot {
suite.T().Skip("cluster doesn't support reboot (and reset)")
}
if suite.Cluster == nil {
suite.T().Skip("without full cluster state reset test is not reliable (can't wait for cluster readiness in between resets)")
}
node := suite.RandomDiscoveredNode()
suite.T().Log("Resetting node with spec=[STATE]", node)
preReset, err := suite.hashKubeletCert(suite.ctx, node)
suite.Require().NoError(err)
suite.AssertRebooted(suite.ctx, node, func(nodeCtx context.Context) error {
// force reboot after reset, as this is the only mode we can test
err = suite.Client.ResetGeneric(nodeCtx, &machineapi.ResetRequest{
Reboot: true,
Graceful: true,
SystemPartitionsToWipe: []*machineapi.ResetPartitionSpec{
{
Label: constants.StatePartitionLabel,
Wipe: true,
},
},
})
if err != nil {
if s, ok := status.FromError(err); ok && s.Code() == codes.Unavailable {
// ignore errors if reboot happens before response is fully received
err = nil
}
}
return err
}, 5*time.Minute)
postReset, err := suite.hashKubeletCert(suite.ctx, node)
suite.Require().NoError(err)
suite.Assert().Equal(preReset, postReset, "ephemeral partition was not reset")
}
func init() {
allSuites = append(allSuites, new(ResetSuite))
}

File diff suppressed because it is too large Load Diff

View File

@ -445,6 +445,17 @@ func (c *Client) Reset(ctx context.Context, graceful, reboot bool) (err error) {
return
}
// ResetGeneric implements the proto.MachineServiceClient interface.
func (c *Client) ResetGeneric(ctx context.Context, req *machineapi.ResetRequest) (err error) {
resp, err := c.MachineClient.Reset(ctx, req)
if err == nil {
_, err = FilterMessages(resp, err)
}
return
}
// Reboot implements the proto.MachineServiceClient interface.
func (c *Client) Reboot(ctx context.Context) (err error) {
resp, err := c.MachineClient.Reboot(ctx, &empty.Empty{})

View File

@ -89,6 +89,7 @@ title: API
- [RecoverRequest](#machine.RecoverRequest)
- [RecoverResponse](#machine.RecoverResponse)
- [Reset](#machine.Reset)
- [ResetPartitionSpec](#machine.ResetPartitionSpec)
- [ResetRequest](#machine.ResetRequest)
- [ResetResponse](#machine.ResetResponse)
- [Restart](#machine.Restart)
@ -1582,16 +1583,33 @@ The reset message containing the restart status.
<a name="machine.ResetRequest"></a>
<a name="machine.ResetPartitionSpec"></a>
### ResetRequest
### ResetPartitionSpec
rpc reset
| Field | Type | Label | Description |
| ----- | ---- | ----- | ----------- |
| graceful | [bool](#bool) | | |
| reboot | [bool](#bool) | | |
| label | [string](#string) | | |
| wipe | [bool](#bool) | | |
<a name="machine.ResetRequest"></a>
### ResetRequest
| Field | Type | Label | Description |
| ----- | ---- | ----- | ----------- |
| graceful | [bool](#bool) | | Graceful indicates whether node should leave etcd before the upgrade, it also enforces etcd checks before leaving. |
| reboot | [bool](#bool) | | Reboot indicates whether node should reboot or halt after resetting. |
| system_partitions_to_wipe | [ResetPartitionSpec](#machine.ResetPartitionSpec) | repeated | System_partitions_to_wipe lists specific system disk partitions to be reset (wiped). If system_partitions_to_wipe is empty, all the partitions are erased. |

View File

@ -1267,9 +1267,10 @@ talosctl reset [flags]
### Options
```
--graceful if true, attempt to cordon/drain node and leave etcd (if applicable) (default true)
-h, --help help for reset
--reboot if true, reboot the node after resetting instead of shutting down
--graceful if true, attempt to cordon/drain node and leave etcd (if applicable) (default true)
-h, --help help for reset
--reboot if true, reboot the node after resetting instead of shutting down
--system-labels-to-wipe strings if set, just wipe selected system disk partitions by label but keep other partitions intact
```
### Options inherited from parent commands