pull: Error on depth pull with missing head commit

When pulling with depth, missing parent commits are ignored. However,
the check was applying to any commit, which means that it would succeed
even if the requested commit was missing. This might happen on a
corrupted remote repo or when using ref data from a stale summary.

To achieve this, the semantics of the `commit_to_depth` hash table is
changed slightly to only ever includes parent commits. This makes it
easy to detect when a parent commit is being referenced (although there
is a minor bug there when multiple refs are being pulled) while keeping
references to commits that need their `commitpartial` files cleaned up.
It also means that the table is only populated on depth pulls, which
saves some memory and processing in the common depth=0 case.

Fixes: #2265
This commit is contained in:
Dan Nicholson 2021-01-11 12:40:38 -07:00
parent b4f06b47a3
commit 20047ff1fe
3 changed files with 57 additions and 35 deletions

View File

@ -88,7 +88,7 @@ typedef struct {
GHashTable *ref_keyring_map; /* Maps OstreeCollectionRef to keyring remote name */
GPtrArray *static_delta_superblocks;
GHashTable *expected_commit_sizes; /* Maps commit checksum to known size */
GHashTable *commit_to_depth; /* Maps commit checksum maximum depth */
GHashTable *commit_to_depth; /* Maps parent commit checksum maximum depth */
GHashTable *scanned_metadata; /* Maps object name to itself */
GHashTable *fetched_detached_metadata; /* Map<checksum,GVariant> */
GHashTable *requested_metadata; /* Maps object name to itself */

View File

@ -1113,6 +1113,18 @@ on_metadata_written (GObject *object,
check_outstanding_requests_handle_error (pull_data, &local_error);
}
static gboolean
is_parent_commit (OtPullData *pull_data,
const char *checksum)
{
/* FIXME: Only parent commits are added to the commit_to_depth table,
* so if the checksum isn't in the table then a new commit chain is
* being started. However, if the desired commit was a parent in a
* previously followed chain, then this will be wrong.
*/
return g_hash_table_contains (pull_data->commit_to_depth, checksum);
}
static void
meta_fetch_on_complete (GObject *object,
GAsyncResult *result,
@ -1158,7 +1170,8 @@ meta_fetch_on_complete (GObject *object,
* We may be pulling from a partial repository that ends in
* a dangling parent reference. */
else if (objtype == OSTREE_OBJECT_TYPE_COMMIT &&
pull_data->maxdepth != 0)
pull_data->maxdepth != 0 &&
is_parent_commit (pull_data, checksum))
{
g_clear_error (&local_error);
/* If the remote repo supports tombstone commits, check if the commit was intentionally
@ -1542,8 +1555,6 @@ scan_commit_object (OtPullData *pull_data,
else
{
depth = pull_data->maxdepth;
g_hash_table_insert (pull_data->commit_to_depth, g_strdup (checksum),
GINT_TO_POINTER (depth));
}
#ifndef OSTREE_DISABLE_GPGME
@ -1684,40 +1695,19 @@ scan_commit_object (OtPullData *pull_data,
return FALSE;
}
if (parent_csum_bytes != NULL && pull_data->maxdepth == -1)
{
queue_scan_one_metadata_object_c (pull_data, parent_csum_bytes,
OSTREE_OBJECT_TYPE_COMMIT, NULL,
recursion_depth + 1, NULL);
}
else if (parent_csum_bytes != NULL && depth > 0)
if (parent_csum_bytes != NULL && (pull_data->maxdepth == -1 || depth > 0))
{
char parent_checksum[OSTREE_SHA256_STRING_LEN+1];
gpointer parent_depthp;
int parent_depth;
ostree_checksum_inplace_from_bytes (parent_csum_bytes, parent_checksum);
if (g_hash_table_lookup_extended (pull_data->commit_to_depth, parent_checksum,
NULL, &parent_depthp))
{
parent_depth = GPOINTER_TO_INT (parent_depthp);
}
else
{
parent_depth = depth - 1;
}
if (parent_depth >= 0)
{
g_hash_table_insert (pull_data->commit_to_depth, g_strdup (parent_checksum),
GINT_TO_POINTER (parent_depth));
queue_scan_one_metadata_object_c (pull_data, parent_csum_bytes,
OSTREE_OBJECT_TYPE_COMMIT,
NULL,
recursion_depth + 1,
NULL);
}
int parent_depth = (depth > 0) ? depth - 1 : -1;
g_hash_table_insert (pull_data->commit_to_depth, g_strdup (parent_checksum),
GINT_TO_POINTER (parent_depth));
queue_scan_one_metadata_object_c (pull_data, parent_csum_bytes,
OSTREE_OBJECT_TYPE_COMMIT,
NULL,
recursion_depth + 1,
NULL);
}
/* We only recurse to looking whether we need dirtree/dirmeta

View File

@ -25,7 +25,7 @@ set -euo pipefail
setup_fake_remote_repo1 "archive"
echo '1..1'
echo '1..3'
cd ${test_tmpdir}
mkdir repo
@ -63,3 +63,35 @@ find repo/state -name '*.commitpartial' | wc -l > commitpartialcount
assert_file_has_content commitpartialcount "^0$"
echo "ok pull depth"
# Check that pulling with depth != 0 succeeds with a missing parent
# commit. Prune the remote to truncate the history.
cd ${test_tmpdir}
${CMD_PREFIX} ostree --repo=ostree-srv/gnomerepo prune --refs-only --depth=0
rm -rf repo/refs/heads/* repo/refs/remotes/* repo/objects/*/*.commit
${CMD_PREFIX} ostree --repo=repo pull --depth=1 origin main
find repo/objects -name '*.commit' | wc -l > commitcount
assert_file_has_content commitcount "^1$"
find repo/state -name '*.commitpartial' | wc -l > commitpartialcount
assert_file_has_content commitpartialcount "^0$"
rm -rf repo/refs/heads/* repo/refs/remotes/* repo/objects/*/*.commit
${CMD_PREFIX} ostree --repo=repo pull --depth=-1 origin main
find repo/objects -name '*.commit' | wc -l > commitcount
assert_file_has_content commitcount "^1$"
find repo/state -name '*.commitpartial' | wc -l > commitpartialcount
assert_file_has_content commitpartialcount "^0$"
echo "ok pull depth missing parent"
# Check that it errors if the ref head commit is missing.
cd ${test_tmpdir}
rm -f ostree-srv/gnomerepo/objects/*/*.commit
rm -rf repo/refs/heads/* repo/refs/remotes/* repo/objects/*/*.commit
if ${CMD_PREFIX} ostree --repo=repo pull --depth=-1 origin main; then
fatal "Pull with depth -1 succeeded with missing HEAD commit"
fi
echo "ok pull depth missing HEAD commit"