mirror of
https://github.com/ostreedev/ostree.git
synced 2024-12-22 17:35:55 +03:00
lib/deploy: Use a FIFREEZE/FITHAW cycle for /boot
See: http://marc.info/?l=linux-fsdevel&m=149520244919284&w=2 XFS doesn't flush the journal on `syncfs()`. GRUB doesn't know how to follow the XFS journal, so if the filesystem is in a dirty state (possible with xfs `/boot`, extremely likely with `/`, if the journaled data includes content for `/boot`, the system may be unbootable if a system crash occurs. Fix this by doing a `FIFREEZE`+`FITHAW` cycle. Now, most people probably would have replaced the `syncfs()` invocation with those two ioctls. But this would have become (I believe) the *only* place in libostree where we weren't safe against interruption. The failure mode would be ugly; nothing else would be able to write to the filesystem until manual intervention. The real fix here I think is to land an atomic `FIFREEZETHAW` ioctl in the kernel. I might try a patch. In the meantime though, let's jump through some hoops and set up a "watchdog" child process that acts as a fallback unfreezer. Closes: https://github.com/ostreedev/ostree/issues/876 Closes: #1049 Approved by: jlebon
This commit is contained in:
parent
9f8f351cd4
commit
8642ef5ab3
@ -22,8 +22,14 @@
|
||||
|
||||
#include <gio/gunixinputstream.h>
|
||||
#include <gio/gunixoutputstream.h>
|
||||
#include <glib-unix.h>
|
||||
#include <sys/mount.h>
|
||||
#include <sys/statvfs.h>
|
||||
#include <sys/socket.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/poll.h>
|
||||
#include <linux/fs.h>
|
||||
#include <err.h>
|
||||
|
||||
#ifdef HAVE_LIBMOUNT
|
||||
#include <libmount.h>
|
||||
@ -973,18 +979,126 @@ checksum_from_kernel_src (const char *name,
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
/* We used to syncfs(), but that doesn't flush the journal on XFS,
|
||||
* and since GRUB2 can't read the XFS journal, the system
|
||||
* could fail to boot.
|
||||
*
|
||||
* http://marc.info/?l=linux-fsdevel&m=149520244919284&w=2
|
||||
* https://github.com/ostreedev/ostree/pull/1049
|
||||
*/
|
||||
static gboolean
|
||||
syncfs_dir_at (int dfd,
|
||||
const char *path,
|
||||
GCancellable *cancellable,
|
||||
GError **error)
|
||||
fsfreeze_thaw_cycle (OstreeSysroot *self,
|
||||
int rootfs_dfd,
|
||||
GCancellable *cancellable,
|
||||
GError **error)
|
||||
{
|
||||
glnx_fd_close int child_dfd = -1;
|
||||
if (!glnx_opendirat (dfd, path, TRUE, &child_dfd, error))
|
||||
return FALSE;
|
||||
if (syncfs (child_dfd) != 0)
|
||||
return glnx_throw_errno_prefix (error, "syncfs(%s)", path);
|
||||
GLNX_AUTO_PREFIX_ERROR ("During fsfreeze-thaw", error);
|
||||
|
||||
int sockpair[2];
|
||||
if (socketpair (AF_UNIX, SOCK_SEQPACKET | SOCK_CLOEXEC, 0, sockpair) < 0)
|
||||
return glnx_throw_errno_prefix (error, "socketpair");
|
||||
glnx_fd_close int sock_parent = sockpair[0];
|
||||
glnx_fd_close int sock_watchdog = sockpair[1];
|
||||
|
||||
pid_t pid = fork ();
|
||||
if (pid < 0)
|
||||
return glnx_throw_errno_prefix (error, "fork");
|
||||
|
||||
const gboolean debug_fifreeze = (self->debug_flags & OSTREE_SYSROOT_DEBUG_TEST_FIFREEZE)>0;
|
||||
char c = '!';
|
||||
if (pid == 0) /* Child watchdog/unfreezer process. */
|
||||
{
|
||||
(void) close (glnx_steal_fd (&sock_parent));
|
||||
/* Daemonize, and mask SIGINT/SIGTERM, so we're likely to survive e.g.
|
||||
* someone doing a `systemctl restart rpm-ostreed` or a Ctrl-C of
|
||||
* `ostree admin upgrade`.
|
||||
*/
|
||||
if (daemon (0, debug_fifreeze ? 1 : 0) < 0)
|
||||
err (1, "daemon");
|
||||
int sigs[] = { SIGINT, SIGTERM };
|
||||
for (guint i = 0; i < G_N_ELEMENTS (sigs); i++)
|
||||
{
|
||||
if (signal (sigs[i], SIG_IGN) == SIG_ERR)
|
||||
err (1, "signal");
|
||||
}
|
||||
/* Tell the parent we're ready */
|
||||
if (write (sock_watchdog, &c, sizeof (c)) != 1)
|
||||
err (1, "write");
|
||||
/* Wait for the parent to say it's going to freeze. */
|
||||
ssize_t bytes_read = TEMP_FAILURE_RETRY (read (sock_watchdog, &c, sizeof (c)));
|
||||
if (bytes_read < 0)
|
||||
err (1, "read");
|
||||
if (bytes_read != 1)
|
||||
errx (1, "failed to read from parent");
|
||||
/* Now we wait for the second message from the parent saying the freeze is
|
||||
* complete. We have a 30 second timeout; if somehow the parent hasn't
|
||||
* signaled completion, go ahead and unfreeze. But for debugging, just 1
|
||||
* second to avoid exessively lengthining the test suite.
|
||||
*/
|
||||
const int timeout_ms = debug_fifreeze ? 1000 : 30000;
|
||||
struct pollfd pfds[1];
|
||||
pfds[0].fd = sock_watchdog;
|
||||
pfds[0].events = POLLIN | POLLHUP;
|
||||
int r = TEMP_FAILURE_RETRY (poll (pfds, 1, timeout_ms));
|
||||
/* Do a thaw if we hit an error, or if the poll timed out */
|
||||
if (r <= 0)
|
||||
{
|
||||
if (TEMP_FAILURE_RETRY (ioctl (rootfs_dfd, FITHAW, 0)) != 0)
|
||||
{
|
||||
if (errno == EPERM)
|
||||
; /* Ignore this for the test suite */
|
||||
else
|
||||
err (1, "FITHAW");
|
||||
}
|
||||
/* But if we got an error from poll, let's log it */
|
||||
if (r < 0)
|
||||
err (1, "poll");
|
||||
}
|
||||
if (debug_fifreeze)
|
||||
g_printerr ("fifreeze watchdog was run\n");
|
||||
exit (EXIT_SUCCESS);
|
||||
}
|
||||
else /* Parent process. */
|
||||
{
|
||||
(void) close (glnx_steal_fd (&sock_watchdog));
|
||||
/* Wait for the watchdog to say it's set up; mainly that it's
|
||||
* masked SIGTERM successfully.
|
||||
*/
|
||||
ssize_t bytes_read = TEMP_FAILURE_RETRY (read (sock_parent, &c, sizeof (c)));
|
||||
if (bytes_read < 0)
|
||||
return glnx_throw_errno_prefix (error, "read(watchdog init)");
|
||||
if (bytes_read != 1)
|
||||
return glnx_throw (error, "read(watchdog init)");
|
||||
/* And tell the watchdog that we're ready to start */
|
||||
if (write (sock_parent, &c, sizeof (c)) != sizeof (c))
|
||||
return glnx_throw_errno_prefix (error, "write(watchdog start)");
|
||||
/* Testing infrastructure */
|
||||
if (debug_fifreeze)
|
||||
return glnx_throw (error, "aborting due to test-fifreeze");
|
||||
/* Do a freeze/thaw cycle; TODO add a FIFREEZETHAW ioctl */
|
||||
if (ioctl (rootfs_dfd, FIFREEZE, 0) != 0)
|
||||
{
|
||||
/* Not supported, or we're running in the unit tests (as non-root)?
|
||||
* OK, let's just do a syncfs.
|
||||
*/
|
||||
if (G_IN_SET (errno, EOPNOTSUPP, EPERM))
|
||||
{
|
||||
if (TEMP_FAILURE_RETRY (syncfs (rootfs_dfd)) != 0)
|
||||
return glnx_throw_errno_prefix (error, "syncfs");
|
||||
/* Write the completion, and return */
|
||||
if (write (sock_parent, &c, sizeof (c)) != sizeof (c))
|
||||
return glnx_throw_errno_prefix (error, "write(watchdog syncfs complete)");
|
||||
return TRUE;
|
||||
}
|
||||
else
|
||||
return glnx_throw_errno_prefix (error, "ioctl(FIFREEZE)");
|
||||
}
|
||||
/* And finally thaw, then signal our completion to the watchdog */
|
||||
if (TEMP_FAILURE_RETRY (ioctl (rootfs_dfd, FITHAW, 0)) != 0)
|
||||
return glnx_throw_errno_prefix (error, "ioctl(FITHAW)");
|
||||
if (write (sock_parent, &c, sizeof (c)) != sizeof (c))
|
||||
return glnx_throw_errno_prefix (error, "write(watchdog FITHAW complete)");
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
@ -1012,7 +1126,10 @@ full_system_sync (OstreeSysroot *self,
|
||||
out_stats->root_syncfs_msec = (end_msec - start_msec);
|
||||
|
||||
start_msec = g_get_monotonic_time () / 1000;
|
||||
if (!syncfs_dir_at (self->sysroot_fd, "boot", cancellable, error))
|
||||
glnx_fd_close int boot_dfd = -1;
|
||||
if (!glnx_opendirat (self->sysroot_fd, "boot", TRUE, &boot_dfd, error))
|
||||
return FALSE;
|
||||
if (!fsfreeze_thaw_cycle (self, boot_dfd, cancellable, error))
|
||||
return FALSE;
|
||||
end_msec = g_get_monotonic_time () / 1000;
|
||||
out_stats->boot_syncfs_msec = (end_msec - start_msec);
|
||||
|
@ -33,7 +33,8 @@ typedef enum {
|
||||
OSTREE_SYSROOT_DEBUG_MUTABLE_DEPLOYMENTS = 1 << 0,
|
||||
/* See https://github.com/ostreedev/ostree/pull/759 */
|
||||
OSTREE_SYSROOT_DEBUG_NO_XATTRS = 1 << 1,
|
||||
|
||||
/* https://github.com/ostreedev/ostree/pull/1049 */
|
||||
OSTREE_SYSROOT_DEBUG_TEST_FIFREEZE = 1 << 2,
|
||||
} OstreeSysrootDebugFlags;
|
||||
|
||||
/**
|
||||
|
@ -166,6 +166,7 @@ ostree_sysroot_init (OstreeSysroot *self)
|
||||
{
|
||||
const GDebugKey keys[] = {
|
||||
{ "mutable-deployments", OSTREE_SYSROOT_DEBUG_MUTABLE_DEPLOYMENTS },
|
||||
{ "test-fifreeze", OSTREE_SYSROOT_DEBUG_TEST_FIFREEZE },
|
||||
{ "no-xattrs", OSTREE_SYSROOT_DEBUG_NO_XATTRS },
|
||||
};
|
||||
|
||||
|
@ -249,3 +249,11 @@ ${CMD_PREFIX} ostree --sysroot=${deployment} remote add --set=gpg-verify=false r
|
||||
assert_not_file_has_content sysroot/ostree/repo/config remote-test-nonphysical
|
||||
assert_file_has_content ${deployment}/etc/ostree/remotes.d/remote-test-nonphysical.conf testos-repo
|
||||
echo "ok remote add nonphysical sysroot"
|
||||
|
||||
if env OSTREE_SYSROOT_DEBUG="${OSTREE_SYSROOT_DEBUG},test-fifreeze" \
|
||||
${CMD_PREFIX} ostree admin deploy --os=testos testos:testos/buildmaster/x86_64-runtime 2>err.txt; then
|
||||
fatal "fifreeze-test exited successfully?"
|
||||
fi
|
||||
assert_file_has_content err.txt "fifreeze watchdog was run"
|
||||
assert_file_has_content err.txt "During fsfreeze-thaw: aborting due to test-fifreeze"
|
||||
echo "ok fifreeze test"
|
||||
|
@ -19,7 +19,7 @@
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
echo "1..18"
|
||||
echo "1..19"
|
||||
|
||||
. $(dirname $0)/libtest.sh
|
||||
|
||||
|
@ -19,7 +19,7 @@
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
echo "1..18"
|
||||
echo "1..19"
|
||||
|
||||
. $(dirname $0)/libtest.sh
|
||||
|
||||
|
@ -20,7 +20,7 @@
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
echo "1..19"
|
||||
echo "1..20"
|
||||
|
||||
. $(dirname $0)/libtest.sh
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user