fix: sync RTC in timed, sync time before fetching packet metadata

1. When time is set by timed, also sync RTC with system time, otherwise
time might be off after a reboot.

2. Before fetching Packet (Equinix Metal) metadata over `https://`, do
one-time time sync in the background, as if the clock skew is big
enough, TLS certificate will be considered invalid and boot fails.

Signed-off-by: Andrey Smirnov <smirnov.andrey@gmail.com>
This commit is contained in:
Andrey Smirnov 2020-12-18 17:19:14 +03:00 committed by Andrey Smirnov
parent f90aa613ac
commit c4624078ce
7 changed files with 71 additions and 19 deletions

1
go.mod
View File

@ -67,6 +67,7 @@ require (
github.com/talos-systems/grpc-proxy v0.2.0
github.com/talos-systems/net v0.2.0
github.com/talos-systems/talos/pkg/machinery v0.0.0-20200818212414-6a7cc0264819
github.com/u-root/u-root v7.0.0+incompatible
github.com/vishvananda/netns v0.0.0-20200728191858-db3c7e526aae // indirect
github.com/vmware-tanzu/sonobuoy v0.19.0
github.com/vmware/vmw-guestinfo v0.0.0-20200218095840-687661b8bd8e

View File

@ -112,8 +112,6 @@ func (*Sequencer) Initialize(r runtime.Runtime) []runtime.Phase {
).Append(
"discoverNetwork",
SetupDiscoveryNetwork,
// We MUST mount the boot partition so that this task can attempt to read
// the config on disk.
).AppendWhen(
r.State().Machine().Installed(),
"mountSystem",
@ -121,9 +119,6 @@ func (*Sequencer) Initialize(r runtime.Runtime) []runtime.Phase {
).Append(
"config",
LoadConfig,
// We unmount the boot partition here to simplify subsequent sequences.
// If we leave it mounted, it becomes tricky trying to figure out if we
// need to mount the boot partition.
).AppendWhen(
r.State().Machine().Installed(),
"unmountSystem",

View File

@ -47,6 +47,7 @@ import (
"github.com/talos-systems/talos/internal/app/machined/pkg/system/services"
"github.com/talos-systems/talos/internal/app/maintenance"
"github.com/talos-systems/talos/internal/app/networkd/pkg/networkd"
"github.com/talos-systems/talos/internal/app/timed/pkg/ntp"
"github.com/talos-systems/talos/internal/pkg/containers/cri/containerd"
"github.com/talos-systems/talos/internal/pkg/cri"
"github.com/talos-systems/talos/internal/pkg/etcd"
@ -516,7 +517,31 @@ func SaveConfig(seq runtime.Sequence, data interface{}) (runtime.TaskExecutionFu
}, "saveConfig"
}
func singleTimeSync(ctx context.Context) (cancel context.CancelFunc) {
ctx, cancel = context.WithCancel(ctx)
go func() {
ntpClient, err := ntp.NewNTPClient()
if err != nil {
log.Printf("failed to build one-time ntp client")
return
}
if err = ntpClient.QueryAndSetTime(ctx); err != nil {
if !errors.Is(err, context.Canceled) {
log.Printf("failed to do one-time time sync: %s", err)
}
}
}()
return
}
func fetchConfig(ctx context.Context, r runtime.Runtime) (out []byte, err error) {
cancel := singleTimeSync(ctx)
defer cancel()
var b []byte
if b, err = r.State().Platform().Configuration(ctx); err != nil {

View File

@ -82,6 +82,7 @@ func (n *Timed) Runner(r runtime.Runtime) (runner.Runner, error) {
}
mounts := []specs.Mount{
{Type: "bind", Destination: "/dev", Source: "/dev", Options: []string{"rbind", "rshared", "rw"}},
{Type: "bind", Destination: filepath.Dir(constants.TimeSocketPath), Source: filepath.Dir(constants.TimeSocketPath), Options: []string{"rbind", "rw"}},
}
@ -112,6 +113,7 @@ func (n *Timed) Runner(r runtime.Runtime) (runner.Runner, error) {
}),
oci.WithHostNamespace(specs.NetworkNamespace),
oci.WithMounts(mounts),
oci.WithAllDevicesAllowed,
),
),
restart.WithType(restart.Forever),

View File

@ -6,6 +6,7 @@ package ntp
import (
"bytes"
"context"
"fmt"
"log"
"math/rand"
@ -16,6 +17,7 @@ import (
"github.com/beevik/ntp"
"github.com/hashicorp/go-multierror"
"github.com/talos-systems/go-retry/retry"
"github.com/u-root/u-root/pkg/rtc"
"github.com/talos-systems/talos/internal/app/timed/pkg/timex"
)
@ -26,7 +28,8 @@ type NTP struct {
MinPoll time.Duration
MaxPoll time.Duration
ready uint32
ready uint32
rtcClock *rtc.RTC
}
// NewNTPClient instantiates a new ntp client for the
@ -39,6 +42,13 @@ func NewNTPClient(opts ...Option) (*NTP, error) {
result = multierror.Append(setter(ntp))
}
var err error
ntp.rtcClock, err = rtc.OpenRTC()
if err != nil {
log.Printf("failure opening RTC, ignored: %s", err)
}
return ntp, result.ErrorOrNil()
}
@ -51,7 +61,7 @@ func (n *NTP) Ready() bool {
// We dont ever want the daemon to stop, so we only log
// errors.
func (n *NTP) Daemon() (err error) {
if err = n.QueryAndSetTime(); err != nil {
if err = n.QueryAndSetTime(context.Background()); err != nil {
log.Println(err)
// if initial time sync fails, restart the service for more aggressive retry
@ -65,15 +75,21 @@ func (n *NTP) Daemon() (err error) {
randSleep := time.Duration(rand.Intn(int(n.MaxPoll.Seconds()))) * time.Second
time.Sleep(randSleep + n.MinPoll)
if err = n.QueryAndSetTime(); err != nil {
if err = n.QueryAndSetTime(context.Background()); err != nil {
log.Println(err)
}
}
}
// Query polls the ntp server and verifies a successful response.
func (n *NTP) Query() (resp *ntp.Response, err error) {
func (n *NTP) Query(ctx context.Context) (resp *ntp.Response, err error) {
err = retry.Constant(n.MaxPoll, retry.WithUnits(n.MinPoll), retry.WithJitter(250*time.Millisecond)).Retry(func() error {
select {
case <-ctx.Done():
return retry.UnexpectedError(ctx.Err())
default:
}
resp, err = ntp.Query(n.Server)
if err != nil {
log.Printf("query error: %v", err)
@ -101,14 +117,14 @@ func (n *NTP) GetTime() time.Time {
}
// QueryAndSetTime queries the NTP server and sets the time.
func (n *NTP) QueryAndSetTime() (err error) {
func (n *NTP) QueryAndSetTime(ctx context.Context) (err error) {
var resp *ntp.Response
if resp, err = n.Query(); err != nil {
if resp, err = n.Query(ctx); err != nil {
return fmt.Errorf("error querying %s for time, %s", n.Server, err)
}
if err = adjustTime(resp.ClockOffset); err != nil {
if err = n.adjustTime(resp.ClockOffset); err != nil {
return fmt.Errorf("failed to set time, %s", err)
}
@ -118,18 +134,30 @@ func (n *NTP) QueryAndSetTime() (err error) {
}
// SetTime sets the system time based on the query response.
func setTime(adjustedTime time.Time) error {
func (n *NTP) setTime(adjustedTime time.Time) error {
log.Printf("setting time to %s", adjustedTime)
timeval := syscall.NsecToTimeval(adjustedTime.UnixNano())
return syscall.Settimeofday(&timeval)
if err := syscall.Settimeofday(&timeval); err != nil {
return err
}
if n.rtcClock != nil {
if err := n.rtcClock.Set(adjustedTime); err != nil {
log.Printf("error syncing RTC: %s", err)
} else {
log.Printf("synchronized RTC with system clock")
}
}
return nil
}
// adjustTime adds an offset to the current time.
func adjustTime(offset time.Duration) error {
func (n *NTP) adjustTime(offset time.Duration) error {
if offset < -AdjustTimeLimit || offset > AdjustTimeLimit {
return setTime(time.Now().Add(offset))
return n.setTime(time.Now().Add(offset))
}
var buf bytes.Buffer

View File

@ -5,6 +5,7 @@
package ntp_test
import (
"context"
"testing"
"github.com/stretchr/testify/suite"
@ -28,7 +29,7 @@ func (suite *NtpSuite) TestQuery() {
n, err := ntp.NewNTPClient(ntp.WithServer(testServer))
suite.Assert().NoError(err)
_, err = n.Query()
_, err = n.Query(context.Background())
suite.Assert().NoError(err)
}

View File

@ -41,7 +41,7 @@ func (r *Registrator) Register(s *grpc.Server) {
func (r *Registrator) Time(ctx context.Context, in *empty.Empty) (reply *timeapi.TimeResponse, err error) {
reply = &timeapi.TimeResponse{}
rt, err := r.Timed.Query()
rt, err := r.Timed.Query(ctx)
if err != nil {
return reply, err
}
@ -58,7 +58,7 @@ func (r *Registrator) TimeCheck(ctx context.Context, in *timeapi.TimeRequest) (r
return reply, err
}
rt, err := tc.Query()
rt, err := tc.Query(ctx)
if err != nil {
return reply, err
}