compose: Hash all treefile externals and flattened manifest

Move hashing to the Rust side so that we can easily hash over the final
set of inputs after parsing. This means that we now hash over all the
externals, like `add-files` references, any `postprocess-script` script,
and `passwd` and `group` files.

The original motivation for this was that hashing over a reserialized
version of the treefile was not deterministic now that treefiles include
hash tables (i.e. `add-commit-metadata`). So I initially included each
individual treefile as part of the hash.

I realized afterwards that just switching to `BTreeMap` fixes this, so
we can keep hashing only the final flattened reserialized treefile so we
ignore comments and whitespace too. But since I already wrote the patch,
and it fixes a real issue today... here we are.

One notable change though is that we now hash the treefile in non-pretty
mode to increase the chances that the serialized form remains stable.
Ironically, this change is likely to cause a no-op commit once it gets
to pipelines which iterate quickly. All for the greater good though.

Closes: #1865
Approved by: cgwalters
This commit is contained in:
Jonathan Lebon 2019-07-08 21:10:17 -04:00 committed by Atomic Bot
parent 3326510719
commit b381e0294f
2 changed files with 65 additions and 35 deletions

View File

@ -39,7 +39,7 @@ const INCLUDE_MAXDEPTH: u32 = 50;
/// a TreeComposeConfig.
struct TreefileExternals {
postprocess_script: Option<fs::File>,
add_files: collections::HashMap<String, fs::File>,
add_files: collections::BTreeMap<String, fs::File>,
passwd: Option<fs::File>,
group: Option<fs::File>,
}
@ -56,6 +56,8 @@ pub struct Treefile {
rojig_spec: Option<CUtf8Buf>,
serialized: CUtf8Buf,
externals: TreefileExternals,
// This is a checksum over *all* the treefile inputs (recursed treefiles + externals).
checksum: CUtf8Buf,
}
// We only use this while parsing
@ -239,7 +241,7 @@ fn treefile_parse<P: AsRef<Path>>(
} else {
None
};
let mut add_files: collections::HashMap<String, fs::File> = collections::HashMap::new();
let mut add_files: BTreeMap<String, fs::File> = BTreeMap::new();
if let Some(ref add_file_names) = tf.add_files.as_ref() {
for (name, _) in add_file_names.iter() {
add_files.insert(name.clone(), utils::open_file(filename.with_file_name(name))?);
@ -361,9 +363,7 @@ fn treefile_merge_externals(dest: &mut TreefileExternals, src: &mut TreefileExte
}
// add-files is an array and hence has append semantics.
for (k, v) in src.add_files.drain() {
dest.add_files.insert(k, v);
}
dest.add_files.append(&mut src.add_files);
// passwd/group are basic values
if dest.passwd.is_none() {
@ -432,6 +432,13 @@ impl Treefile {
(None, None)
};
let serialized = Treefile::serialize_json_string(&parsed.config)?;
// Notice we hash the *reserialization* of the final flattened treefile only so that e.g.
// comments/whitespace/hash table key reorderings don't trigger a respin. We could take
// this further by using a custom `serialize_with` for Vecs where ordering doesn't matter
// (or just sort the Vecs).
let mut hasher = glib::Checksum::new(glib::ChecksumType::Sha256);
parsed.config.hasher_update(&mut hasher)?;
parsed.externals.hasher_update(&mut hasher)?;
Ok(Box::new(Treefile {
primary_dfd: dfd,
parsed: parsed.config,
@ -440,6 +447,7 @@ impl Treefile {
rojig_spec: rojig_spec,
serialized: serialized,
externals: parsed.externals,
checksum: CUtf8Buf::from_string(hasher.get_string().unwrap()),
}))
}
@ -516,6 +524,42 @@ for x in *; do mv ${{x}} %{{buildroot}}%{{_prefix}}/lib/ostree-jigdo/%{{name}};
}
}
fn hash_file(hasher: &mut glib::Checksum, mut f: &fs::File) -> Fallible<()> {
let mut reader = io::BufReader::with_capacity(128 * 1024, f);
loop {
// have to scope fill_buf() so we can consume() below
let n = {
let buf = reader.fill_buf()?;
hasher.update(buf);
buf.len()
};
if n == 0 {
break;
}
reader.consume(n);
}
f.seek(io::SeekFrom::Start(0))?;
Ok(())
}
impl TreefileExternals {
fn hasher_update(&self, hasher: &mut glib::Checksum) -> Fallible<()> {
if let Some(ref f) = self.postprocess_script {
hash_file(hasher, f)?;
}
if let Some(ref f) = self.passwd {
hash_file(hasher, f)?;
}
if let Some(ref f) = self.group {
hash_file(hasher, f)?;
}
for f in self.add_files.values() {
hash_file(hasher, f)?;
}
Ok(())
}
}
/// For increased readability in YAML/JSON, we support whitespace in individual
/// array elements.
fn whitespace_split_packages(pkgs: &[String]) -> Vec<String> {
@ -767,6 +811,13 @@ impl TreeComposeConfig {
Ok(self)
}
fn hasher_update(&self, hasher: &mut glib::Checksum) -> Fallible<()> {
// don't use pretty mode to increase the chances of a stable serialization
// https://github.com/projectatomic/rpm-ostree/pull/1865
hasher.update(serde_json::to_vec(self)?.as_slice());
Ok(())
}
}
#[cfg(test)]
@ -1228,6 +1279,12 @@ mod ffi {
.unwrap_or(ptr::null_mut())
}
#[no_mangle]
pub extern "C" fn ror_treefile_get_checksum(tf: *mut Treefile) -> *const libc::c_char {
let tf = ref_from_raw_ptr(tf);
tf.checksum.as_ptr()
}
#[no_mangle]
pub extern "C" fn ror_treefile_free(tf: *mut Treefile) {
if tf.is_null() {

View File

@ -58,36 +58,9 @@ rpmostree_composeutil_checksum (HyGoal goal,
{
g_autoptr(GChecksum) checksum = g_checksum_new (G_CHECKSUM_SHA256);
/* Hash in the raw treefile; this means reordering the input packages
* or adding a comment will cause a recompose, but let's be conservative
* here.
*/
g_checksum_update (checksum, (guint8*)ror_treefile_get_json_string (tf), -1);
if (json_object_has_member (treefile, "add-files"))
{
JsonArray *add_files = json_object_get_array_member (treefile, "add-files");
guint i, len = json_array_get_length (add_files);
for (i = 0; i < len; i++)
{
JsonArray *add_el = json_array_get_array_element (add_files, i);
if (!add_el)
return glnx_throw (error, "Element in add-files is not an array");
const char *src = _rpmostree_jsonutil_array_require_string_element (add_el, 0, error);
if (!src)
return FALSE;
int src_fd = ror_treefile_get_add_file_fd (tf, src);
g_assert_cmpint (src_fd, !=, -1);
g_autoptr(GBytes) bytes = glnx_fd_readall_bytes (src_fd, NULL, FALSE);
gsize len;
const guint8* buf = g_bytes_get_data (bytes, &len);
g_checksum_update (checksum, (const guint8 *) buf, len);
}
}
/* FIXME; we should also hash the post script */
/* Hash in the treefile inputs (this includes all externals like postprocess, add-files,
* etc... and the final flattened treefile -- see treefile.rs for more details). */
g_checksum_update (checksum, (guint8*)ror_treefile_get_checksum (tf), -1);
/* Hash in each package */
if (!rpmostree_dnf_add_checksum_goal (checksum, goal, NULL, error))