status: Detect if staging failed in previous boot

Sample output:

```
$ rpm-ostree status
State: idle
Warning: failed to finalize previous deployment
         check `journalctl -b -1 -u ostree-finalize-staged.service`
AutomaticUpdates: disabled
...
```

(Though open to tweaking it).

I also played with directly invoking `journalctl` for the user, but that
can get really spammy with e.g. `os-prober` output and such.

I wrote this in Rust using journal API wrappers because I also plan to
implement the `history` command in Rust and will also enhance that new
`journal` module there for that.

Requires: https://github.com/ostreedev/ostree/pull/1750
Requires: https://github.com/jmesmon/rust-systemd/pull/54
(Though I've pointed the manifest at my branch for now for CI).

Closes: #1567

Closes: #1601
Approved by: cgwalters
This commit is contained in:
Jonathan Lebon 2018-10-05 12:57:00 -04:00 committed by Atomic Bot
parent dbdbaf87ee
commit f6c3616148
5 changed files with 135 additions and 2 deletions

View File

@ -1,7 +1,7 @@
[package]
name = "rpmostree-rust"
version = "0.1.0"
authors = ["Colin Walters <walters@verbum.org>"]
authors = ["Colin Walters <walters@verbum.org>", "Jonathan Lebon <jonathan@jlebon.com>"]
[dependencies]
serde = "1.0.78"
@ -16,6 +16,11 @@ tempfile = "3.0.3"
openat = "0.1.15"
curl = "0.4.14"
c_utf8 = "0.1.0"
systemd = "0.3.0"
# Until https://github.com/jmesmon/rust-systemd/pull/54 gets merged
[patch.crates-io]
systemd = { git = "https://github.com/jlebon/rust-systemd", branch = "pr/add-monotonic" }
[lib]
name = "rpmostree_rust"

67
rust/src/journal.rs Normal file
View File

@ -0,0 +1,67 @@
/*
* Copyright (C) 2018 Red Hat, Inc.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
extern crate systemd;
use self::systemd::id128::Id128;
use self::systemd::journal;
use std::io;
static OSTREE_FINALIZE_STAGED_SERVICE: &'static str = "ostree-finalize-staged.service";
static OSTREE_DEPLOYMENT_FINALIZING_MSG_ID: &'static str = "e8646cd63dff4625b77909a8e7a40994";
static OSTREE_DEPLOYMENT_COMPLETE_MSG_ID: &'static str = "dd440e3e549083b63d0efc7dc15255f1";
/// Look for a failure from ostree-finalized-stage.service in the journal of the previous boot.
pub fn journal_find_staging_failure() -> io::Result<bool> {
let mut j = journal::Journal::open(journal::JournalFiles::System, false, true)?;
// first, go to the first entry of the current boot
let boot_id = Id128::from_boot()?;
j.match_add("_BOOT_ID", boot_id.to_string().as_str())?;
j.seek(journal::JournalSeek::Head)?;
j.match_flush()?;
// Now, go backwards until we hit the first entry from the previous boot. In theory that should
// just be a single `sd_journal_previous()` call, but we need a loop here, see:
// https://github.com/systemd/systemd/commit/dc00966228ff90c554fd034e588ea55eb605ec52
let mut previous_boot_id: Id128 = boot_id.clone();
while previous_boot_id == boot_id {
match j.previous_record()? {
Some(_) => previous_boot_id = j.monotonic_timestamp()?.1,
None => return Ok(false), // no previous boot!
}
}
// we just need it as a string from now on
let previous_boot_id = previous_boot_id.to_string();
// look for OSTree's finalization msg
j.match_add("MESSAGE_ID", OSTREE_DEPLOYMENT_FINALIZING_MSG_ID)?;
j.match_add("_SYSTEMD_UNIT", OSTREE_FINALIZE_STAGED_SERVICE)?;
j.match_add("_BOOT_ID", previous_boot_id.as_str())?;
if j.previous_record()? == None {
return Ok(false); // didn't run (or staged deployment was cleaned up)
}
// and now check if it actually completed the transaction
j.match_flush()?;
j.match_add("MESSAGE_ID", OSTREE_DEPLOYMENT_COMPLETE_MSG_ID)?;
j.match_add("_SYSTEMD_UNIT", OSTREE_FINALIZE_STAGED_SERVICE)?;
j.match_add("_BOOT_ID", previous_boot_id.as_str())?;
Ok(j.next_record()? == None)
}

View File

@ -41,6 +41,8 @@ mod glibutils;
use glibutils::*;
mod treefile;
use treefile::*;
mod journal;
use journal::*;
mod utils;
/* Wrapper functions for translating from C to Rust */
@ -193,3 +195,21 @@ pub extern "C" fn ror_download_to_fd(
}
}
}
#[no_mangle]
pub extern "C" fn ror_journal_find_staging_failure(
did_fail: *mut libc::c_int,
gerror: *mut *mut glib_sys::GError,
) -> libc::c_int {
assert!(!did_fail.is_null());
match journal_find_staging_failure() {
Ok(b) => {
unsafe { *did_fail = if b { 1 } else { 0 } };
1 as libc::c_int
},
Err(e) => {
error_to_glib(&e, gerror);
0 as libc::c_int
}
}
}

View File

@ -34,6 +34,7 @@
#include "rpmostree-util.h"
#include "rpmostree-core.h"
#include "rpmostree-rpm-util.h"
#include "rpmostree-rust.h"
#include "libsd-locale-util.h"
#include "libsd-time-util.h"
@ -48,6 +49,11 @@
#define RPMOSTREE_AUTOMATIC_SERVICE_OBJPATH \
"/org/freedesktop/systemd1/unit/rpm_2dostreed_2dautomatic_2eservice"
#define OSTREE_FINALIZE_STAGED_SERVICE_UNIT \
"ostree-finalize-staged.service"
#define SD_MESSAGE_UNIT_STOPPED_STR SD_ID128_MAKE_STR(9d,1a,aa,27,d6,01,40,bd,96,36,54,38,aa,d2,02,86)
static gboolean opt_pretty;
static gboolean opt_verbose;
static gboolean opt_verbose_advisories;
@ -290,6 +296,20 @@ print_daemon_state (RPMOSTreeSysroot *sysroot_proxy,
const char *policy = rpmostree_sysroot_get_automatic_update_policy (sysroot_proxy);
g_print ("State: %s\n", txn_proxy ? "busy" : "idle");
gboolean staging_failure;
if (!ror_journal_find_staging_failure (&staging_failure, error))
return glnx_prefix_error (error, "While querying journal");
if (staging_failure)
{
g_print ("%s%sWarning: failed to finalize previous deployment\n"
" check `journalctl -b -1 -u %s`%s%s\n",
get_red_start (), get_bold_start (),
OSTREE_FINALIZE_STAGED_SERVICE_UNIT,
get_bold_end (), get_red_end ());
}
g_print ("AutomaticUpdates: ");
if (g_str_equal (policy, "none"))
g_print ("disabled\n");

View File

@ -47,7 +47,7 @@ vm_rpmostree cleanup -p
# Add metadata string containing EnfOfLife attribtue
META_ENDOFLIFE_MESSAGE="this is a test for metadata message"
commit=$(vm_cmd ostree commit -b vmcheck \
--tree=ref=vmcheck --add-metadata-string=ostree.endoflife=\"${META_ENDOFLIFE_MESSAGE}\")
--tree=ref=vmcheck --add-metadata-string=ostree.endoflife="'${META_ENDOFLIFE_MESSAGE}'")
vm_rpmostree upgrade
vm_assert_status_jq ".deployments[0][\"endoflife\"] == \"${META_ENDOFLIFE_MESSAGE}\""
echo "ok endoflife metadata gets parsed correctly"
@ -216,3 +216,24 @@ if ! vm_rpmostree install refresh-md-new-pkg --dry-run; then
fi
vm_stop_httpd vmcheck
echo "ok refresh-md"
# check that a failed staging shows up in status
# first create a staged deployment
vm_build_rpm test-stage-fail
vm_rpmostree install test-stage-fail
vm_pending_is_staged
# OK, now make sure we'll fail. One nuclear way to do this is to just delete the
# deployment root it expects to exist. I played with overriding the service file
# so we just do e.g. /usr/bin/false, but the issue is we still want the "start"
# journal msg to be emitted.
vm_cmd rm -rf $(vm_get_deployment_root 0)
# and now check that we notice there was a failure in `status`
vm_reboot
vm_cmd journalctl -b -1 -u ostree-finalize-staged.service > svc.txt
assert_file_has_content svc.txt "error: opendir"
vm_rpmostree status > status.txt
assert_file_has_content status.txt "failed to finalize previous deployment"
echo "ok previous staged failure in status"