fix: take into account the moment seen when cleaning up CRI images

Fixes #8069

The image age from the CRI is the moment the image was pulled, so if it
was pulled long time ago, the previous version would nuke the image as
soon as it is unreferenced. The new version would allow the image to
stay for the full grace period in case the rollback is requested.

Signed-off-by: Andrey Smirnov <andrey.smirnov@siderolabs.com>
This commit is contained in:
Andrey Smirnov 2024-01-31 20:58:07 +04:00
parent aa03204b86
commit 17567f19be
No known key found for this signature in database
GPG Key ID: FE042E3D4085A811
2 changed files with 26 additions and 5 deletions

View File

@ -39,6 +39,8 @@ const ImageGCGracePeriod = 4 * ImageCleanupInterval
type CRIImageGCController struct {
ImageServiceProvider func() (ImageServiceProvider, error)
Clock clock.Clock
imageFirstSeenUnreferenced map[string]time.Time
}
// ImageServiceProvider wraps the containerd image service.
@ -116,6 +118,10 @@ func (ctrl *CRIImageGCController) Run(ctx context.Context, r controller.Runtime,
ctrl.Clock = clock.New()
}
if ctrl.imageFirstSeenUnreferenced == nil {
ctrl.imageFirstSeenUnreferenced = map[string]time.Time{}
}
var (
criIsUp bool
expectedImages []string
@ -273,10 +279,23 @@ func (ctrl *CRIImageGCController) cleanup(ctx context.Context, logger *zap.Logge
if shouldKeep {
logger.Debug("image is referenced, skipping garbage collection", zap.String("image", image.Name))
delete(ctrl.imageFirstSeenUnreferenced, image.Name)
continue
}
imageAge := ctrl.Clock.Since(image.CreatedAt)
if _, ok := ctrl.imageFirstSeenUnreferenced[image.Name]; !ok {
ctrl.imageFirstSeenUnreferenced[image.Name] = ctrl.Clock.Now()
}
// calculate image age two ways, and pick the minimum:
// * as CRI reports it, which is the time image got pulled
// * as we see it, this means the image won't be deleted until it reaches the age of ImageGCGracePeriod from the moment it became unreferenced
imageAgeCRI := ctrl.Clock.Since(image.CreatedAt)
imageAgeInternal := ctrl.Clock.Since(ctrl.imageFirstSeenUnreferenced[image.Name])
imageAge := min(imageAgeCRI, imageAgeInternal)
if imageAge < ImageGCGracePeriod {
logger.Debug("skipping image cleanup, as it's below minimum age", zap.String("image", image.Name), zap.Duration("age", imageAge))
@ -287,6 +306,7 @@ func (ctrl *CRIImageGCController) cleanup(ctx context.Context, logger *zap.Logge
return fmt.Errorf("failed to delete an image %s: %w", image.Name, err)
}
delete(ctrl.imageFirstSeenUnreferenced, image.Name)
logger.Info("deleted an image", zap.String("image", image.Name))
}

View File

@ -111,8 +111,9 @@ func (suite *CRIImageGCSuite) TestReconcile() {
},
}, // ok to be gc'd
{
Name: "sha256:6b094bd0b063a1172eec7da249eccbb48cc48333800569363d67c747960cfa0a",
CreatedAt: suite.fakeClock.Now().Add(-2 * runtimectrl.ImageGCGracePeriod),
Name: "sha256:6b094bd0b063a1172eec7da249eccbb48cc48333800569363d67c747960cfa0a",
// the image age is more than the grace period, but the controller won't remove due to the check on the last seen unreferenced timestamp
CreatedAt: suite.fakeClock.Now().Add(-4 * runtimectrl.ImageGCGracePeriod),
Target: v1.Descriptor{
Digest: must(digest.Parse("sha256:6b094bd0b063a1172eec7da249eccbb48cc48333800569363d67c747960cfa0a")),
},
@ -123,7 +124,7 @@ func (suite *CRIImageGCSuite) TestReconcile() {
Target: v1.Descriptor{
Digest: must(digest.Parse("sha256:7051a34bcd2522e58a2291d1aa065667f225fd07e4445590b091e86c6799b135")),
},
}, // current image
}, // current image``
{
Name: "registry.io/org/image1@sha256:7051a34bcd2522e58a2291d1aa065667f225fd07e4445590b091e86c6799b135",
CreatedAt: suite.fakeClock.Now().Add(-2 * runtimectrl.ImageGCGracePeriod),
@ -140,7 +141,7 @@ func (suite *CRIImageGCSuite) TestReconcile() {
}, // current image, digest ref
{
Name: "registry.io/org/image1:v1.3.8",
CreatedAt: suite.fakeClock.Now(),
CreatedAt: suite.fakeClock.Now().Add(runtimectrl.ImageGCGracePeriod),
Target: v1.Descriptor{
Digest: must(digest.Parse("sha256:fd03335dd2e7163e5e36e933a0c735d7fec6f42b33ddafad0bc54f333e4a23c0")),
},