mirror of
https://github.com/ostreedev/ostree.git
synced 2025-01-18 10:04:17 +03:00
Add an ostree-boot-complete.service
to propagate staging failures
Quite a while ago we added staged deployments, which solved a bunch of issues around the `/etc` merge. However...a persistent problem since then is that any failures in that process that happened in the *previous* boot are not very visible. We ship custom code in `rpm-ostree status` to query the previous journal. But that has a few problems - one is that on systems that have been up a while, that failure message may even get rotated out. And second, some systems may not even have a persistent journal at all. A general thing we do in e.g. Fedora CoreOS testing is to check for systemd unit failures. We do that both in our automated tests, and we even ship code that displays them on ssh logins. And beyond that obviously a lot of other projects do the same; it's easy via `systemctl --failed`. So to make failures more visible, change our `ostree-finalize-staged.service` to have an internal wrapper around the process that "catches" any errors, and copies the error message into a file in `/boot/ostree`. Then, a new `ostree-boot-complete.service` looks for this file on startup and re-emits the error message, and fails. It also deletes the file. The rationale is to avoid *continually* warning. For example we need to handle the case when an upgrade process creates a new staged deployment. Now, we could change the ostree core code to delete the warning file when that happens instead, but this is trying to be a conservative change. This should make failures here much more visible as is.
This commit is contained in:
parent
98587a72db
commit
d3d3e4ea13
@ -38,6 +38,7 @@ endif
|
||||
if BUILDOPT_SYSTEMD
|
||||
systemdsystemunit_DATA = src/boot/ostree-prepare-root.service \
|
||||
src/boot/ostree-remount.service \
|
||||
src/boot/ostree-boot-complete.service \
|
||||
src/boot/ostree-finalize-staged.service \
|
||||
src/boot/ostree-finalize-staged.path \
|
||||
$(NULL)
|
||||
@ -64,6 +65,7 @@ endif
|
||||
EXTRA_DIST += src/boot/dracut/module-setup.sh \
|
||||
src/boot/dracut/ostree.conf \
|
||||
src/boot/mkinitcpio \
|
||||
src/boot/ostree-boot-complete.service \
|
||||
src/boot/ostree-prepare-root.service \
|
||||
src/boot/ostree-finalize-staged.path \
|
||||
src/boot/ostree-remount.service \
|
||||
|
@ -70,6 +70,7 @@ ostree_SOURCES += \
|
||||
src/ostree/ot-admin-builtin-diff.c \
|
||||
src/ostree/ot-admin-builtin-deploy.c \
|
||||
src/ostree/ot-admin-builtin-finalize-staged.c \
|
||||
src/ostree/ot-admin-builtin-boot-complete.c \
|
||||
src/ostree/ot-admin-builtin-undeploy.c \
|
||||
src/ostree/ot-admin-builtin-instutil.c \
|
||||
src/ostree/ot-admin-builtin-cleanup.c \
|
||||
|
33
src/boot/ostree-boot-complete.service
Normal file
33
src/boot/ostree-boot-complete.service
Normal file
@ -0,0 +1,33 @@
|
||||
# Copyright (C) 2022 Red Hat, Inc.
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
[Unit]
|
||||
Description=OSTree Complete Boot
|
||||
Documentation=man:ostree(1)
|
||||
# For now, this is the only condition on which we start, but it's
|
||||
# marked as a triggering condition in case in the future we want
|
||||
# to do something else.
|
||||
ConditionPathExists=|/boot/ostree/finalize-failure.stamp
|
||||
RequiresMountsFor=/boot
|
||||
# Ensure that we propagate the failure into the current boot before
|
||||
# any further finalization attempts.
|
||||
Before=ostree-finalize-staged.service
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
# To write to /boot while keeping it read-only
|
||||
MountFlags=slave
|
||||
RemainAfterExit=yes
|
||||
ExecStart=/usr/bin/ostree admin boot-complete
|
@ -51,6 +51,7 @@ ostree_cmd__private__ (void)
|
||||
_ostree_repo_static_delta_delete,
|
||||
_ostree_repo_verify_bindings,
|
||||
_ostree_sysroot_finalize_staged,
|
||||
_ostree_sysroot_boot_complete,
|
||||
};
|
||||
|
||||
return &table;
|
||||
|
@ -33,6 +33,7 @@ typedef struct {
|
||||
gboolean (* ostree_static_delta_delete) (OstreeRepo *repo, const char *delta_id, GCancellable *cancellable, GError **error);
|
||||
gboolean (* ostree_repo_verify_bindings) (const char *collection_id, const char *ref_name, GVariant *commit, GError **error);
|
||||
gboolean (* ostree_finalize_staged) (OstreeSysroot *sysroot, GCancellable *cancellable, GError **error);
|
||||
gboolean (* ostree_boot_complete) (OstreeSysroot *sysroot, GCancellable *cancellable, GError **error);
|
||||
} OstreeCmdPrivateVTable;
|
||||
|
||||
/* Note this not really "public", we just export the symbol, but not the header */
|
||||
|
@ -134,6 +134,8 @@ require_internal_units (const char *normal_dir,
|
||||
return FALSE;
|
||||
if (symlinkat (SYSTEM_DATA_UNIT_PATH "/ostree-finalize-staged.path", normal_dir_dfd, "multi-user.target.wants/ostree-finalize-staged.path") < 0)
|
||||
return glnx_throw_errno_prefix (error, "symlinkat");
|
||||
if (symlinkat (SYSTEM_DATA_UNIT_PATH "/ostree-boot-complete.service", normal_dir_dfd, "multi-user.target.wants/ostree-boot-complete.service") < 0)
|
||||
return glnx_throw_errno_prefix (error, "symlinkat");
|
||||
|
||||
return TRUE;
|
||||
#else
|
||||
|
@ -3375,10 +3375,10 @@ ostree_sysroot_stage_tree_with_options (OstreeSysroot *self,
|
||||
}
|
||||
|
||||
/* Invoked at shutdown time by ostree-finalize-staged.service */
|
||||
gboolean
|
||||
_ostree_sysroot_finalize_staged (OstreeSysroot *self,
|
||||
GCancellable *cancellable,
|
||||
GError **error)
|
||||
static gboolean
|
||||
_ostree_sysroot_finalize_staged_inner (OstreeSysroot *self,
|
||||
GCancellable *cancellable,
|
||||
GError **error)
|
||||
{
|
||||
/* It's totally fine if there's no staged deployment; perhaps down the line
|
||||
* though we could teach the ostree cmdline to tell systemd to activate the
|
||||
@ -3475,9 +3475,63 @@ _ostree_sysroot_finalize_staged (OstreeSysroot *self,
|
||||
if (!ostree_sysroot_prepare_cleanup (self, cancellable, error))
|
||||
return FALSE;
|
||||
|
||||
// Cleanup will have closed some FDs, re-ensure writability
|
||||
if (!_ostree_sysroot_ensure_writable (self, error))
|
||||
return FALSE;
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
/* Invoked at shutdown time by ostree-finalize-staged.service */
|
||||
gboolean
|
||||
_ostree_sysroot_finalize_staged (OstreeSysroot *self,
|
||||
GCancellable *cancellable,
|
||||
GError **error)
|
||||
{
|
||||
g_autoptr(GError) finalization_error = NULL;
|
||||
if (!_ostree_sysroot_ensure_boot_fd (self, error))
|
||||
return FALSE;
|
||||
if (!_ostree_sysroot_finalize_staged_inner (self, cancellable, &finalization_error))
|
||||
{
|
||||
g_autoptr(GError) writing_error = NULL;
|
||||
g_assert_cmpint (self->boot_fd, !=, -1);
|
||||
if (!glnx_file_replace_contents_at (self->boot_fd, _OSTREE_FINALIZE_STAGED_FAILURE_PATH,
|
||||
(guint8*)finalization_error->message, -1,
|
||||
0, cancellable, &writing_error))
|
||||
{
|
||||
// We somehow failed to write the failure message...that's not great. Maybe ENOSPC on /boot.
|
||||
g_printerr ("Failed to write %s: %s\n", _OSTREE_FINALIZE_STAGED_FAILURE_PATH, writing_error->message);
|
||||
}
|
||||
g_propagate_error (error, g_steal_pointer (&finalization_error));
|
||||
return FALSE;
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
/* Invoked at bootup time by ostree-boot-complete.service */
|
||||
gboolean
|
||||
_ostree_sysroot_boot_complete (OstreeSysroot *self,
|
||||
GCancellable *cancellable,
|
||||
GError **error)
|
||||
{
|
||||
if (!_ostree_sysroot_ensure_boot_fd (self, error))
|
||||
return FALSE;
|
||||
|
||||
glnx_autofd int failure_fd = -1;
|
||||
if (!ot_openat_ignore_enoent (self->boot_fd, _OSTREE_FINALIZE_STAGED_FAILURE_PATH, &failure_fd, error))
|
||||
return FALSE;
|
||||
// If we didn't find a failure log, then there's nothing to do right now.
|
||||
// (Actually this unit shouldn't even be invoked, but we may do more in the future)
|
||||
if (failure_fd == -1)
|
||||
return TRUE;
|
||||
g_autofree char *failure_data = glnx_fd_readall_utf8 (failure_fd, NULL, cancellable, error);
|
||||
if (failure_data == NULL)
|
||||
return glnx_prefix_error (error, "Reading from %s", _OSTREE_FINALIZE_STAGED_FAILURE_PATH);
|
||||
// Remove the file; we don't want to continually error out.
|
||||
(void) unlinkat (self->boot_fd, _OSTREE_FINALIZE_STAGED_FAILURE_PATH, 0);
|
||||
return glnx_throw (error, "ostree-finalize-staged.service failed on previous boot: %s", failure_data);
|
||||
}
|
||||
|
||||
/**
|
||||
* ostree_sysroot_deployment_set_kargs:
|
||||
* @self: Sysroot
|
||||
|
@ -96,6 +96,9 @@ struct OstreeSysroot {
|
||||
#define _OSTREE_SYSROOT_BOOT_INITRAMFS_OVERLAYS "ostree/initramfs-overlays"
|
||||
#define _OSTREE_SYSROOT_INITRAMFS_OVERLAYS "boot/" _OSTREE_SYSROOT_BOOT_INITRAMFS_OVERLAYS
|
||||
|
||||
// Relative to /boot, consumed by ostree-boot-complete.service
|
||||
#define _OSTREE_FINALIZE_STAGED_FAILURE_PATH "ostree/finalize-failure.stamp"
|
||||
|
||||
gboolean
|
||||
_ostree_sysroot_ensure_writable (OstreeSysroot *self,
|
||||
GError **error);
|
||||
@ -142,6 +145,10 @@ gboolean
|
||||
_ostree_sysroot_finalize_staged (OstreeSysroot *self,
|
||||
GCancellable *cancellable,
|
||||
GError **error);
|
||||
gboolean
|
||||
_ostree_sysroot_boot_complete (OstreeSysroot *self,
|
||||
GCancellable *cancellable,
|
||||
GError **error);
|
||||
|
||||
OstreeDeployment *
|
||||
_ostree_sysroot_deserialize_deployment_from_variant (GVariant *v,
|
||||
|
@ -357,6 +357,8 @@ _ostree_sysroot_ensure_writable (OstreeSysroot *self,
|
||||
ostree_sysroot_unload (self);
|
||||
if (!ensure_sysroot_fd (self, error))
|
||||
return FALSE;
|
||||
if (!_ostree_sysroot_ensure_boot_fd (self, error))
|
||||
return FALSE;
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
58
src/ostree/ot-admin-builtin-boot-complete.c
Normal file
58
src/ostree/ot-admin-builtin-boot-complete.c
Normal file
@ -0,0 +1,58 @@
|
||||
/*
|
||||
* Copyright (C) 2022 Red Hat, Inc.
|
||||
*
|
||||
* SPDX-License-Identifier: LGPL-2.0+
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "ot-main.h"
|
||||
#include "ot-admin-builtins.h"
|
||||
#include "ot-admin-functions.h"
|
||||
#include "ostree.h"
|
||||
#include "otutil.h"
|
||||
|
||||
#include "ostree-cmdprivate.h"
|
||||
|
||||
static GOptionEntry options[] = {
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
gboolean
|
||||
ot_admin_builtin_boot_complete (int argc, char **argv, OstreeCommandInvocation *invocation, GCancellable *cancellable, GError **error)
|
||||
{
|
||||
/* Just a sanity check; we shouldn't be called outside of the service though.
|
||||
*/
|
||||
struct stat stbuf;
|
||||
if (fstatat (AT_FDCWD, OSTREE_PATH_BOOTED, &stbuf, 0) < 0)
|
||||
return TRUE;
|
||||
// We must have been invoked via systemd which should have set up a mount namespace.
|
||||
g_assert (getenv ("INVOCATION_ID"));
|
||||
|
||||
g_autoptr(GOptionContext) context = g_option_context_new ("");
|
||||
g_autoptr(OstreeSysroot) sysroot = NULL;
|
||||
if (!ostree_admin_option_context_parse (context, options, &argc, &argv,
|
||||
OSTREE_ADMIN_BUILTIN_FLAG_SUPERUSER,
|
||||
invocation, &sysroot, cancellable, error))
|
||||
return FALSE;
|
||||
|
||||
if (!ostree_cmd__private__()->ostree_boot_complete (sysroot, cancellable, error))
|
||||
return FALSE;
|
||||
|
||||
return TRUE;
|
||||
}
|
@ -39,6 +39,7 @@ BUILTINPROTO(deploy);
|
||||
BUILTINPROTO(cleanup);
|
||||
BUILTINPROTO(pin);
|
||||
BUILTINPROTO(finalize_staged);
|
||||
BUILTINPROTO(boot_complete);
|
||||
BUILTINPROTO(unlock);
|
||||
BUILTINPROTO(status);
|
||||
BUILTINPROTO(set_origin);
|
||||
|
@ -43,6 +43,9 @@ static OstreeCommand admin_subcommands[] = {
|
||||
{ "finalize-staged", OSTREE_BUILTIN_FLAG_NO_REPO | OSTREE_BUILTIN_FLAG_HIDDEN,
|
||||
ot_admin_builtin_finalize_staged,
|
||||
"Internal command to run at shutdown time" },
|
||||
{ "boot-complete", OSTREE_BUILTIN_FLAG_NO_REPO | OSTREE_BUILTIN_FLAG_HIDDEN,
|
||||
ot_admin_builtin_boot_complete,
|
||||
"Internal command to run at boot after an update was applied" },
|
||||
{ "init-fs", OSTREE_BUILTIN_FLAG_NO_REPO,
|
||||
ot_admin_builtin_init_fs,
|
||||
"Initialize a root filesystem" },
|
||||
|
@ -146,6 +146,18 @@ EOF
|
||||
# Cleanup refs
|
||||
ostree refs --delete staged-deploy nonstaged-deploy
|
||||
echo "ok cleanup refs"
|
||||
|
||||
# Now finally, try breaking staged updates and verify that ostree-boot-complete fails on the next boot
|
||||
unshare -m /bin/sh -c 'mount -o remount,rw /boot; chattr +i /boot'
|
||||
rpm-ostree kargs --append=foo=bar
|
||||
/tmp/autopkgtest-reboot "3"
|
||||
;;
|
||||
"3")
|
||||
(systemctl status ostree-boot-complete.service || true) | tee out.txt
|
||||
assert_file_has_content out.txt 'error: ostree-finalize-staged.service failed on previous boot.*Operation not permitted'
|
||||
systemctl show -p Result ostree-boot-complete.service > out.txt
|
||||
assert_file_has_content out.txt='Result=exit-code'
|
||||
echo "ok boot-complete.service"
|
||||
;;
|
||||
*) fatal "Unexpected AUTOPKGTEST_REBOOT_MARK=${AUTOPKGTEST_REBOOT_MARK}" ;;
|
||||
esac
|
||||
|
Loading…
x
Reference in New Issue
Block a user