jigdo V5: Use number of objects as cache invalidation trigger

Changes in a server-side tree can cause the need for clients to import different
objects from packages. For example, turning on documentation. Another more
subtle case is where an object might "move" from package A to B by being deleted
from A - then the jigdo build process will pick the B version.

We need a "cache validation key"; a way for the server to tell the client that
the objects it should import from the package have changed. Initially I was
thinking of using the libostree "content hash" but that would be awkward as we'd
have to do an import on the server side too.

After more consideration I realized a simple *count* of the number of objects
actually works, because (as I note in a comment) changing a file in the tree
will result in it ending up in the jigdoRPM (and count as a deletion).  And
obviously adding or removing objects changes the count too.

In fact we could have done this *without* breaking the format by just having the
client start recording the number of xattr entries, but this adds greater
flexibility down the line since we can in theory change how we do cache
invalidation if we *really* need to (but at the cost of triggering clients to
redownload packages).

Note the client logic got moved around as now we need to parse all the xattrs
before we decide what packages to download.

My test case here is turning on docs - I noticed this actually affects *every*
package which was surprising to me; I expected at least some packages wouldn't
have docs. I'll double check this.

It'd be good to have a "moving object" case too which I may look at.

Closes: https://github.com/projectatomic/rpm-ostree/issues/1197

Closes: #1256
Approved by: jlebon
This commit is contained in:
Colin Walters 2018-02-15 15:07:39 -05:00 committed by Atomic Bot
parent a4d8980d59
commit 5879b96a64
8 changed files with 136 additions and 44 deletions

View File

@ -1821,7 +1821,7 @@ setup_jigdo_state (RpmOstreeContext *self,
DnfReldep *provide = dnf_reldep_list_index (provides, i);
const char *provide_str = dnf_reldep_to_string (provide);
if (g_str_equal (provide_str, RPMOSTREE_JIGDO_PROVIDE_V4))
if (g_str_equal (provide_str, RPMOSTREE_JIGDO_PROVIDE_V5))
{
found_vprovide = TRUE;
}
@ -1843,7 +1843,7 @@ setup_jigdo_state (RpmOstreeContext *self,
if (!found_vprovide)
return glnx_throw (error, "Package '%s' does not have Provides: %s",
dnf_package_get_nevra (self->jigdo_pkg), RPMOSTREE_JIGDO_PROVIDE_V4);
dnf_package_get_nevra (self->jigdo_pkg), RPMOSTREE_JIGDO_PROVIDE_V5);
if (!self->jigdo_checksum)
return glnx_throw (error, "Package '%s' does not have Provides: %s",
dnf_package_get_nevra (self->jigdo_pkg), RPMOSTREE_JIGDO_PROVIDE_COMMIT);

View File

@ -73,6 +73,7 @@ struct RpmOstreeImporter
char *ostree_branch;
gboolean jigdo_mode;
char *jigdo_cacheid;
GVariant *jigdo_xattr_table;
GVariant *jigdo_xattrs;
GVariant *jigdo_next_xattrs; /* passed from filter to xattr cb */
@ -101,6 +102,7 @@ rpmostree_importer_finalize (GObject *object)
g_free (self->hdr_sha256);
g_free (self->jigdo_cacheid);
g_clear_pointer (&self->jigdo_xattr_table, (GDestroyNotify)g_variant_unref);
g_clear_pointer (&self->jigdo_xattrs, (GDestroyNotify)g_variant_unref);
g_clear_pointer (&self->jigdo_next_xattrs, (GDestroyNotify)g_variant_unref);
@ -296,7 +298,9 @@ rpmostree_importer_set_jigdo_mode (RpmOstreeImporter *self,
{
self->jigdo_mode = TRUE;
self->jigdo_xattr_table = g_variant_ref (xattr_table);
self->jigdo_xattrs = g_variant_ref (xattrs);
g_variant_get (xattrs, "(s@a(su))",
&self->jigdo_cacheid,
&self->jigdo_xattrs);
}
static void
@ -483,6 +487,9 @@ build_metadata_variant (RpmOstreeImporter *self,
g_variant_builder_add (&metadata_builder, "{sv}",
"rpmostree.jigdo",
g_variant_new_boolean (TRUE));
g_variant_builder_add (&metadata_builder, "{sv}",
"rpmostree.jigdo_cacheid",
g_variant_new_string (self->jigdo_cacheid));
g_variant_builder_add (&metadata_builder, "{sv}",
"rpmostree.jigdo_n_skipped",
g_variant_new_uint32 (self->n_jigdo_skipped));

View File

@ -549,22 +549,16 @@ rpmostree_jigdo_assembler_get_xattr_table (RpmOstreeJigdoAssembler *self)
/* Loop over each package, returning its xattr set (as indexes into the xattr table) */
gboolean
rpmostree_jigdo_assembler_next_xattrs (RpmOstreeJigdoAssembler *self,
GVariant **out_objid_to_xattrs,
GCancellable *cancellable,
GError **error)
GVariant **out_objid_to_xattrs,
GCancellable *cancellable,
GError **error)
{
/* Init output variable for EOF state now */
*out_objid_to_xattrs = NULL;
/* If we haven't loaded the xattr string table, do so */
if (self->state < STATE_XATTRS_TABLE)
{
gboolean eof;
struct archive_entry *entry;
if (!jigdo_next_entry (self, &eof, &entry, cancellable, error))
struct archive_entry *entry = jigdo_require_next_entry (self, cancellable, error);
if (!entry)
return FALSE;
if (eof)
return TRUE; /* 🔚 Early return */
const char *pathname = peel_entry_pathname (entry, error);
if (!pathname)
@ -581,6 +575,9 @@ rpmostree_jigdo_assembler_next_xattrs (RpmOstreeJigdoAssembler *self,
self->state = STATE_XATTRS_TABLE;
}
/* Init output variable for EOF state now */
*out_objid_to_xattrs = NULL;
/* Look for an xattr entry */
gboolean eof;
struct archive_entry *entry;

View File

@ -618,7 +618,7 @@ generate_spec (RpmOstreeCommit2JigdoContext *self,
g_autoptr(GString) replacement = g_string_new ("");
g_string_append_len (replacement, spec_contents, meta - spec_contents);
g_string_append (replacement, "# Generated by rpm-ostree\n");
g_string_append (replacement, "Provides: " RPMOSTREE_JIGDO_PROVIDE_V4 "\n");
g_string_append (replacement, "Provides: " RPMOSTREE_JIGDO_PROVIDE_V5 "\n");
/* Add provides for the commit hash */
g_string_append (replacement, "Provides: " RPMOSTREE_JIGDO_PROVIDE_COMMIT);
g_string_append_printf (replacement, "(%s)\n", ostree_commit_sha256);
@ -934,13 +934,29 @@ write_commit2jigdo (RpmOstreeCommit2JigdoContext *self,
/* Ensure the objid array is sorted so we can bsearch it */
g_ptr_array_sort (objidxattrs, cmp_objidxattrs);
/* I am fairly sure that a simple count of the number of objects is
* sufficient as a cache invalidation mechanism. Scenarios:
*
* - We change the content of a file: Since we do object based imports,
* it will be a new object; it'd end up in jigdoRPM.
* - We start wanting an existing pkg object (e.g. docs): Counting works
* - An object migrates (again add/remove): Counting works
*
* And I can't think of a scenario that isn't one of "add,remove,change".
*/
g_autofree char *cacheid = g_strdup_printf ("%u", objidxattrs->len);
/* Build up the variant from sorted data */
g_autoptr(GVariantBuilder) objid_xattr_builder = g_variant_builder_new ((GVariantType*)"a(su)");
g_autoptr(GVariantBuilder) objid_xattr_builder = g_variant_builder_new (RPMOSTREE_JIGDO_XATTRS_PKG_VARIANT_FORMAT);
g_variant_builder_add (objid_xattr_builder, "s", cacheid);
g_variant_builder_open (objid_xattr_builder, G_VARIANT_TYPE ("a(su)"));
for (guint i = 0; i < objidxattrs->len; i++)
{
GVariant *objidxattr = objidxattrs->pdata[i];
g_variant_builder_add (objid_xattr_builder, "@(su)", objidxattr);
}
g_variant_builder_close (objid_xattr_builder);
g_autoptr(GVariant) objid_xattrs_final = g_variant_ref_sink (g_variant_builder_end (objid_xattr_builder));
g_autofree char *path = g_strconcat (RPMOSTREE_JIGDO_XATTRS_PKG_DIR, "/", nevra, NULL);

View File

@ -75,6 +75,50 @@ compare_pkgs (gconstpointer ap,
return dnf_package_cmp (*a, *b);
}
/* Walk over the list of cacheids for each package; if we have
* a cached jigdo pkg with a different cacheid, invalidate it.
*/
static gboolean
invalidate_changed_cacheids (RpmOstreeContext *self,
DnfPackage *pkg,
GVariant *pkg_objid_to_xattrs,
guint *out_n_invalidated,
GCancellable *cancellable,
GError **error)
{
GLNX_AUTO_PREFIX_ERROR ("During jigdo pkgcache invalidation", error);
OstreeRepo *pkgcache_repo = self->pkgcache_repo ?: self->ostreerepo;
const char *cacheid;
g_variant_get (pkg_objid_to_xattrs, "(&s@a(su))", &cacheid, NULL);
/* See if we have it cached */
g_autofree char *jigdo_branch = rpmostree_get_jigdo_branch_pkg (pkg);
g_autofree char *cached_rev = NULL;
if (!ostree_repo_resolve_rev (pkgcache_repo, jigdo_branch, TRUE,
&cached_rev, error))
return FALSE;
/* Not cached? That's fine, on to the next */
if (!cached_rev)
return TRUE; /* Early return */
g_autoptr(GVariant) commit = NULL;
if (!ostree_repo_load_commit (pkgcache_repo, cached_rev, &commit, NULL, error))
return FALSE;
g_autoptr(GVariant) metadata = g_variant_get_child_value (commit, 0);
g_autoptr(GVariantDict) metadata_dict = g_variant_dict_new (metadata);
const char *current_cacheid = NULL;
g_variant_dict_lookup (metadata_dict, "rpmostree.jigdo_cacheid", "&s", &current_cacheid);
if (g_strcmp0 (current_cacheid, cacheid))
{
if (!ostree_repo_set_ref_immediate (pkgcache_repo, NULL, jigdo_branch, NULL,
cancellable, error))
return FALSE;
(*out_n_invalidated)++;
}
return TRUE;
}
/* Core logic for performing a jigdo assembly client side. The high level flow is:
*
* - Download rpm-md
@ -210,6 +254,28 @@ rpmostree_context_execute_jigdo (RpmOstreeContext *self,
return FALSE;
txn.initialized = FALSE;
/* Process the xattrs, including the cacheids before we compute what we need
* to download.
*/
g_autoptr(GHashTable) pkg_to_xattrs = g_hash_table_new_full (NULL, NULL,
(GDestroyNotify)g_object_unref,
(GDestroyNotify)g_variant_unref);
guint n_invalidated = 0;
for (guint i = 0; i < pkgs_required->len; i++)
{
DnfPackage *pkg = pkgs_required->pdata[i];
g_autoptr(GVariant) objid_to_xattrs = NULL;
if (!rpmostree_jigdo_assembler_next_xattrs (jigdo, &objid_to_xattrs, cancellable, error))
return FALSE;
if (!objid_to_xattrs)
return glnx_throw (error, "missing xattr entry: %s", dnf_package_get_name (pkg));
if (!invalidate_changed_cacheids (self, pkg, objid_to_xattrs,
&n_invalidated, cancellable, error))
return FALSE;
g_hash_table_insert (pkg_to_xattrs, g_object_ref (pkg), g_steal_pointer (&objid_to_xattrs));
}
/* And now, process the jigdo set */
if (!rpmostree_context_set_packages (self, pkgs_required, cancellable, error))
return FALSE;
@ -227,28 +293,14 @@ rpmostree_context_execute_jigdo (RpmOstreeContext *self,
g_hash_table_add (pkgset_to_import, pkg);
}
g_autofree char *dlsize_fmt = g_format_size (dlsize);
rpmostree_output_message ("%u packages to import, download size: %s", pkgs_to_import->len, dlsize_fmt);
if (n_invalidated > 0)
rpmostree_output_message ("%u/%u packages to import (%u changed), download size: %s",
pkgs_to_import->len, n_requires, n_invalidated, dlsize_fmt);
else
rpmostree_output_message ("%u/%u packages to import, download size: %s",
pkgs_to_import->len, n_requires, dlsize_fmt);
}
/* Parse the xattr data in the jigdoRPM */
g_autoptr(GHashTable) pkg_to_xattrs = g_hash_table_new_full (NULL, NULL,
(GDestroyNotify)g_object_unref,
(GDestroyNotify)g_variant_unref);
for (guint i = 0; i < pkgs_required->len; i++)
{
DnfPackage *pkg = pkgs_required->pdata[i];
const gboolean should_import = g_hash_table_contains (pkgset_to_import, pkg);
g_autoptr(GVariant) objid_to_xattrs = NULL;
if (!rpmostree_jigdo_assembler_next_xattrs (jigdo, &objid_to_xattrs, cancellable, error))
return FALSE;
if (!objid_to_xattrs)
return glnx_throw (error, "missing xattr entry: %s", dnf_package_get_name (pkg));
if (!should_import)
continue;
g_hash_table_insert (pkg_to_xattrs, g_object_ref (pkg), g_steal_pointer (&objid_to_xattrs));
}
/* Start the download and import, using the xattr data from the jigdoRPM */
if (!rpmostree_context_download (self, cancellable, error))
return FALSE;

View File

@ -26,10 +26,10 @@
#include <rpm/rpmlib.h>
#include <libdnf/libdnf.h>
/* An OIRPM is structured as an ordered set of files/directories; we use numeric
/* A jigdoRPM is structured as an ordered set of files/directories; we use numeric
* prefixes to ensure ordering. Most of the files are in GVariant format.
*
* An OIRPM starts with the OSTree commit object and its detached metadata,
* The first entry in a jigdoRPM is the OSTree commit and its detached metadata,
* so that can be GPG verified first - if that fails, we can then cleanly
* abort.
*
@ -50,7 +50,9 @@
* of xattrs; then there is a GVariant for each package which contains
* a mapping of "objid" to an unsigned integer index into the xattr table.
* The "objid" can either be a full path, or a basename if that basename is
* unique inside a particular package.
* unique inside a particular package. Since v5, there is also a "cacheid"
* which is used to invalidate client-side caching:
* https://github.com/projectatomic/rpm-ostree/issues/1197
*/
/* Use a numeric prefix to ensure predictable ordering */
@ -66,11 +68,12 @@
#define RPMOSTREE_JIGDO_XATTRS_TABLE "06xattrs/00table"
#define RPMOSTREE_JIGDO_XATTRS_PKG_DIR "06xattrs/pkg"
/* Array of xattr (name, value) pairs */
#define RPMOSTREE_JIGDO_XATTRS_TABLE_VARIANT_FORMAT (G_VARIANT_TYPE ("aa(ayay)"))
/* NEVRA + xattr table */
#define RPMOSTREE_JIGDO_XATTRS_PKG_VARIANT_FORMAT (G_VARIANT_TYPE ("a(su)"))
/* cacheid + map of objid to index into table ↑ */
#define RPMOSTREE_JIGDO_XATTRS_PKG_VARIANT_FORMAT (G_VARIANT_TYPE ("(sa(su))"))
#define RPMOSTREE_JIGDO_PROVIDE_V4 "rpmostree-jigdo(v4)"
#define RPMOSTREE_JIGDO_PROVIDE_V5 "rpmostree-jigdo(v5)"
#define RPMOSTREE_JIGDO_PROVIDE_COMMIT "rpmostree-jigdo-commit"
/* This one goes in the spec file to use as our replacement */

View File

@ -17,6 +17,7 @@ build_rpm test-pkg \
echo gpgcheck=0 >> yumrepo.repo
ln yumrepo.repo composedata/test-repo.repo
pyappendjsonmember "packages" '["test-pkg"]'
pysetjsonmember "documentation" 'False'
# Need unified core for this, as well as a cachedir
mkdir cache
runcompose --ex-unified-core --cachedir $(pwd)/cache --add-metadata-string version=42.0
@ -50,7 +51,7 @@ do_jigdo2commit() {
}
do_jigdo2commit
# there will generally be pkgs not in the jigdo set, but let's at least assert it's > 0
assert_file_has_content jigdo2commit-out.txt ${npkgs}' packages to import'
assert_file_has_content jigdo2commit-out.txt ${npkgs}/${npkgs}' packages to import'
ostree --repo=jigdo-unpack-repo rev-parse ${rev}
ostree --repo=jigdo-unpack-repo fsck
ostree --repo=jigdo-unpack-repo refs > jigdo-refs.txt
@ -88,7 +89,7 @@ assert_file_has_content requires.txt 'test-pkg(.*) = 1.0-1'
# And pull it; we should download the newer version by default
do_jigdo2commit
# Now we should only download 2 packages
assert_file_has_content jigdo2commit-out.txt '2 packages to import'
assert_file_has_content jigdo2commit-out.txt '2/[1-9][0-9]* packages to import'
for x in ${origrev} ${newrev}; do
ostree --repo=jigdo-unpack-repo rev-parse ${x}
done
@ -100,3 +101,16 @@ assert_file_has_content jigdo-refs.txt 'rpmostree/jigdo/test-pkg/1.0-1.x86__64'
assert_file_has_content jigdo-refs.txt 'rpmostree/jigdo/test-pkg/1.1-1.x86__64'
echo "ok jigdo ♲📦 update!"
# Add all docs to test https://github.com/projectatomic/rpm-ostree/issues/1197
pysetjsonmember "documentation" 'True'
runcompose --ex-unified-core --cachedir $(pwd)/cache --add-metadata-string version=42.2
newrev=$(ostree --repo=${repobuild} rev-parse ${treeref})
do_commit2jigdo ${newrev}
find jigdo-output -name '*.rpm' | tee rpms.txt
assert_file_has_content rpms.txt 'fedora-atomic-host-42.2.*x86_64'
do_jigdo2commit
# Not every package has docs, but there are going to need to be changes
assert_file_has_content jigdo2commit-out.txt '[1-9][0-9]*/[1-9][0-9]* packages to import ([1-9][0-9]* changed)'
ostree --repo=jigdo-unpack-repo ls -R ${newrev} >/dev/null
echo "ok jigdo ♲📦 updated docs"

View File

@ -24,6 +24,9 @@ set -euo pipefail
set -x
echo "ok SKIP for v5 bump; remove after FAHC is rebuilt"
exit 0
# Test rebasing to https://pagure.io/fedora-atomic-host-continuous
# in rojig:// mode.