ostree repo commit: Speed up composing trees with --tree=ref

Running `ostree commit --tree=ref=a --tree=dir=b` involves reading the
whole of a into an `OstreeMutableTree` before composing `b` on top.  This
is inefficient if a is a complete rootfs and b is just touching one file.
We process O(size of a + size of b) directories rather than
O(number of touched dirs).

This commit makes `ostree commit` more efficient at composing multiple
directories together.  With `ostree_mutable_tree_fill_empty_from_dirtree`
we create a lazy `OstreeMutableTree` which only reads the underlying
information from disk when needed.  We don't need to read all the
subdirectories just to get the checksum of a tree already checked into the
repo.

This provides great speedups when composing a rootfs out of multiple other
rootfs as we do in our build system.  We compose multiple containers
together with:

    ostree commit --tree=ref=base-rootfs --tree=ref=container1 --tree=ref=container2

and it is much faster now.

As a test I ran

    time ostree --repo=... commit --orphan --tree=ref=big-rootfs --tree=dir=modified_etc

Where modified_etc contained a modified sudoers file under /etc.  I used
`strace` to count syscalls and I seperatly took timing measurements.  To
test with a cold cache I ran

    sync && echo 3 | sudo tee /proc/sys/vm/drop_caches

Results:

|                      | Before | After |
| -------------------- | ------ | ----- |
| Time (cold cache)    |   8.1s | 0.12s |
| Time (warm cache)    |   3.7s | 0.08s |
| `openat` calls       |  53589 |   246 |
| `fgetxattr` calls    |  78916 |     0 |

I'm not sure if this will have some negative interaction with the
`_ostree_repo_commit_modifier_apply` which is short-circuited here.  I
think it was disabled for `--tree=ref=` anyway, but I'm not certain.  All
the tests pass anyway.

I originally implemented this in terms of the `OstreeRepoFile` APIs, but
it was *way* less efficient, opening and reading many files unnecessarily.

Closes: #1643
Approved by: cgwalters
This commit is contained in:
William Manley 2018-06-22 15:28:49 +01:00 committed by Atomic Bot
parent 11eb0bd227
commit c7b12a8730
5 changed files with 267 additions and 4 deletions

View File

@ -250,6 +250,8 @@ OstreeLzmaDecompressorClass
<FILE>ostree-mutable-tree</FILE> <FILE>ostree-mutable-tree</FILE>
OstreeMutableTree OstreeMutableTree
ostree_mutable_tree_new ostree_mutable_tree_new
ostree_mutable_tree_new_from_checksum
ostree_mutable_tree_check_error
ostree_mutable_tree_set_metadata_checksum ostree_mutable_tree_set_metadata_checksum
ostree_mutable_tree_get_metadata_checksum ostree_mutable_tree_get_metadata_checksum
ostree_mutable_tree_set_contents_checksum ostree_mutable_tree_set_contents_checksum
@ -261,6 +263,7 @@ ostree_mutable_tree_ensure_parent_dirs
ostree_mutable_tree_walk ostree_mutable_tree_walk
ostree_mutable_tree_get_subdirs ostree_mutable_tree_get_subdirs
ostree_mutable_tree_get_files ostree_mutable_tree_get_files
ostree_mutable_tree_fill_empty_from_dirtree
<SUBSECTION Standard> <SUBSECTION Standard>
OSTREE_MUTABLE_TREE OSTREE_MUTABLE_TREE
OSTREE_IS_MUTABLE_TREE OSTREE_IS_MUTABLE_TREE

View File

@ -19,6 +19,10 @@
/* Add new symbols here. Release commits should copy this section into -released.sym. */ /* Add new symbols here. Release commits should copy this section into -released.sym. */
LIBOSTREE_2018.7 { LIBOSTREE_2018.7 {
global:
ostree_mutable_tree_fill_empty_from_dirtree;
ostree_mutable_tree_new_from_checksum;
ostree_mutable_tree_check_error;
} LIBOSTREE_2018.6; } LIBOSTREE_2018.6;
/* Stub section for the stable release *after* this development one; don't /* Stub section for the stable release *after* this development one; don't

View File

@ -26,6 +26,8 @@
#include "otutil.h" #include "otutil.h"
#include "ostree.h" #include "ostree.h"
#include "ostree-core-private.h"
/** /**
* SECTION:ostree-mutable-tree * SECTION:ostree-mutable-tree
* @title: In-memory modifiable filesystem tree * @title: In-memory modifiable filesystem tree
@ -38,6 +40,15 @@
* programmatically. * programmatically.
*/ */
typedef enum {
MTREE_STATE_WHOLE,
/* MTREE_STATE_LAZY allows us to not read files and subdirs from the objects
* on disk until they're actually needed - often they won't be needed at
* all. */
MTREE_STATE_LAZY
} OstreeMutableTreeState;
/** /**
* OstreeMutableTree: * OstreeMutableTree:
* *
@ -55,6 +66,8 @@ struct OstreeMutableTree
* it sets parent = NULL on all its children (see remove_child_mtree) */ * it sets parent = NULL on all its children (see remove_child_mtree) */
OstreeMutableTree *parent; OstreeMutableTree *parent;
OstreeMutableTreeState state;
/* This is the checksum of the Dirtree object that corresponds to the current /* This is the checksum of the Dirtree object that corresponds to the current
* contents of this directory. contents_checksum can be NULL if the SHA was * contents of this directory. contents_checksum can be NULL if the SHA was
* never calculated or contents of this mtree or any subdirectory has been * never calculated or contents of this mtree or any subdirectory has been
@ -71,7 +84,16 @@ struct OstreeMutableTree
* and xattrs of this directory. This can be NULL. */ * and xattrs of this directory. This can be NULL. */
char *metadata_checksum; char *metadata_checksum;
/* const char* filename -> const char* checksum */ /* ======== Valid for state LAZY: =========== */
/* The repo so we can look up the checksums. */
OstreeRepo *repo;
GError *cached_error;
/* ======== Valid for state WHOLE: ========== */
/* const char* filename -> const char* checksum. */
GHashTable *files; GHashTable *files;
/* const char* filename -> OstreeMutableTree* subtree */ /* const char* filename -> OstreeMutableTree* subtree */
@ -80,8 +102,6 @@ struct OstreeMutableTree
G_DEFINE_TYPE (OstreeMutableTree, ostree_mutable_tree, G_TYPE_OBJECT) G_DEFINE_TYPE (OstreeMutableTree, ostree_mutable_tree, G_TYPE_OBJECT)
static void invalidate_contents_checksum (OstreeMutableTree *self);
static void static void
ostree_mutable_tree_finalize (GObject *object) ostree_mutable_tree_finalize (GObject *object)
{ {
@ -92,9 +112,12 @@ ostree_mutable_tree_finalize (GObject *object)
g_free (self->contents_checksum); g_free (self->contents_checksum);
g_free (self->metadata_checksum); g_free (self->metadata_checksum);
g_clear_pointer (&self->cached_error, g_error_free);
g_hash_table_destroy (self->files); g_hash_table_destroy (self->files);
g_hash_table_destroy (self->subdirs); g_hash_table_destroy (self->subdirs);
g_clear_object (&self->repo);
G_OBJECT_CLASS (ostree_mutable_tree_parent_class)->finalize (object); G_OBJECT_CLASS (ostree_mutable_tree_parent_class)->finalize (object);
} }
@ -117,7 +140,6 @@ insert_child_mtree (OstreeMutableTree *self, const gchar* name,
g_assert_null (child->parent); g_assert_null (child->parent);
g_hash_table_insert (self->subdirs, g_strdup (name), child); g_hash_table_insert (self->subdirs, g_strdup (name), child);
child->parent = self; child->parent = self;
invalidate_contents_checksum (self);
} }
static void static void
@ -139,6 +161,7 @@ ostree_mutable_tree_init (OstreeMutableTree *self)
g_free, g_free); g_free, g_free);
self->subdirs = g_hash_table_new_full (g_str_hash, g_str_equal, self->subdirs = g_hash_table_new_full (g_str_hash, g_str_equal,
g_free, remove_child_mtree); g_free, remove_child_mtree);
self->state = MTREE_STATE_WHOLE;
} }
static void static void
@ -153,6 +176,78 @@ invalidate_contents_checksum (OstreeMutableTree *self)
} }
} }
/* Go from state LAZY to state WHOLE by reading the tree from disk */
static gboolean
_ostree_mutable_tree_make_whole (OstreeMutableTree *self,
GCancellable *cancellable,
GError **error)
{
if (self->state == MTREE_STATE_WHOLE)
return TRUE;
g_assert_cmpuint (self->state, ==, MTREE_STATE_LAZY);
g_assert_nonnull (self->repo);
g_assert_nonnull (self->contents_checksum);
g_assert_nonnull (self->metadata_checksum);
g_assert_cmpuint (g_hash_table_size (self->files), ==, 0);
g_assert_cmpuint (g_hash_table_size (self->subdirs), ==, 0);
g_autoptr(GVariant) dirtree = NULL;
if (!ostree_repo_load_variant (self->repo, OSTREE_OBJECT_TYPE_DIR_TREE,
self->contents_checksum, &dirtree, error))
return FALSE;
{
g_autoptr(GVariant) dir_file_contents = g_variant_get_child_value (dirtree, 0);
GVariantIter viter;
g_variant_iter_init (&viter, dir_file_contents);
const char *fname;
GVariant *contents_csum_v = NULL;
while (g_variant_iter_loop (&viter, "(&s@ay)", &fname, &contents_csum_v))
{
char tmp_checksum[OSTREE_SHA256_STRING_LEN + 1];
_ostree_checksum_inplace_from_bytes_v (contents_csum_v, tmp_checksum);
g_hash_table_insert (self->files, g_strdup (fname),
g_strdup (tmp_checksum));
}
}
/* Process subdirectories */
{
g_autoptr(GVariant) dir_subdirs = g_variant_get_child_value (dirtree, 1);
const char *dname;
GVariant *subdirtree_csum_v = NULL;
GVariant *subdirmeta_csum_v = NULL;
GVariantIter viter;
g_variant_iter_init (&viter, dir_subdirs);
while (g_variant_iter_loop (&viter, "(&s@ay@ay)", &dname,
&subdirtree_csum_v, &subdirmeta_csum_v))
{
char subdirtree_checksum[OSTREE_SHA256_STRING_LEN+1];
_ostree_checksum_inplace_from_bytes_v (subdirtree_csum_v, subdirtree_checksum);
char subdirmeta_checksum[OSTREE_SHA256_STRING_LEN+1];
_ostree_checksum_inplace_from_bytes_v (subdirmeta_csum_v, subdirmeta_checksum);
insert_child_mtree (self, dname, ostree_mutable_tree_new_from_checksum (
self->repo, subdirtree_checksum, subdirmeta_checksum));
}
}
g_clear_object (&self->repo);
self->state = MTREE_STATE_WHOLE;
return TRUE;
}
/* _ostree_mutable_tree_make_whole can fail if state == MTREE_STATE_LAZY, but
* we have getters that preceed the existence of MTREE_STATE_LAZY which can't
* return errors. So instead this function will fail and print a warning. */
static gboolean
_assert_ostree_mutable_tree_make_whole (OstreeMutableTree *self)
{
if (self->cached_error)
return FALSE;
return _ostree_mutable_tree_make_whole (self, NULL, &self->cached_error);
}
void void
ostree_mutable_tree_set_metadata_checksum (OstreeMutableTree *self, ostree_mutable_tree_set_metadata_checksum (OstreeMutableTree *self,
const char *checksum) const char *checksum)
@ -183,6 +278,8 @@ ostree_mutable_tree_set_contents_checksum (OstreeMutableTree *self,
"already has a checksum set. Old checksum %s, new checksum %s", "already has a checksum set. Old checksum %s, new checksum %s",
self->contents_checksum, checksum); self->contents_checksum, checksum);
_assert_ostree_mutable_tree_make_whole (self);
g_free (self->contents_checksum); g_free (self->contents_checksum);
self->contents_checksum = g_strdup (checksum); self->contents_checksum = g_strdup (checksum);
} }
@ -215,6 +312,9 @@ ostree_mutable_tree_replace_file (OstreeMutableTree *self,
if (!ot_util_filename_validate (name, error)) if (!ot_util_filename_validate (name, error))
goto out; goto out;
if (!_ostree_mutable_tree_make_whole (self, NULL, error))
goto out;
if (g_hash_table_lookup (self->subdirs, name)) if (g_hash_table_lookup (self->subdirs, name))
{ {
g_set_error (error, G_IO_ERROR, G_IO_ERROR_FAILED, g_set_error (error, G_IO_ERROR, G_IO_ERROR_FAILED,
@ -256,6 +356,9 @@ ostree_mutable_tree_ensure_dir (OstreeMutableTree *self,
if (!ot_util_filename_validate (name, error)) if (!ot_util_filename_validate (name, error))
goto out; goto out;
if (!_ostree_mutable_tree_make_whole (self, NULL, error))
goto out;
if (g_hash_table_lookup (self->files, name)) if (g_hash_table_lookup (self->files, name))
{ {
g_set_error (error, G_IO_ERROR, G_IO_ERROR_FAILED, g_set_error (error, G_IO_ERROR, G_IO_ERROR_FAILED,
@ -267,6 +370,7 @@ ostree_mutable_tree_ensure_dir (OstreeMutableTree *self,
if (!ret_dir) if (!ret_dir)
{ {
ret_dir = ostree_mutable_tree_new (); ret_dir = ostree_mutable_tree_new ();
invalidate_contents_checksum (self);
insert_child_mtree (self, name, g_object_ref (ret_dir)); insert_child_mtree (self, name, g_object_ref (ret_dir));
} }
@ -287,6 +391,9 @@ ostree_mutable_tree_lookup (OstreeMutableTree *self,
g_autoptr(OstreeMutableTree) ret_subdir = NULL; g_autoptr(OstreeMutableTree) ret_subdir = NULL;
g_autofree char *ret_file_checksum = NULL; g_autofree char *ret_file_checksum = NULL;
if (!_ostree_mutable_tree_make_whole (self, NULL, error))
goto out;
ret_subdir = ot_gobject_refz (g_hash_table_lookup (self->subdirs, name)); ret_subdir = ot_gobject_refz (g_hash_table_lookup (self->subdirs, name));
if (!ret_subdir) if (!ret_subdir)
{ {
@ -328,6 +435,9 @@ ostree_mutable_tree_ensure_parent_dirs (OstreeMutableTree *self,
OstreeMutableTree *subdir = self; /* nofree */ OstreeMutableTree *subdir = self; /* nofree */
g_autoptr(OstreeMutableTree) ret_parent = NULL; g_autoptr(OstreeMutableTree) ret_parent = NULL;
if (!_ostree_mutable_tree_make_whole (self, NULL, error))
goto out;
g_assert (metadata_checksum != NULL); g_assert (metadata_checksum != NULL);
if (!self->metadata_checksum) if (!self->metadata_checksum)
@ -348,6 +458,7 @@ ostree_mutable_tree_ensure_parent_dirs (OstreeMutableTree *self,
next = g_hash_table_lookup (subdir->subdirs, name); next = g_hash_table_lookup (subdir->subdirs, name);
if (!next) if (!next)
{ {
invalidate_contents_checksum (subdir);
next = ostree_mutable_tree_new (); next = ostree_mutable_tree_new ();
ostree_mutable_tree_set_metadata_checksum (next, metadata_checksum); ostree_mutable_tree_set_metadata_checksum (next, metadata_checksum);
insert_child_mtree (subdir, g_strdup (name), next); insert_child_mtree (subdir, g_strdup (name), next);
@ -364,6 +475,71 @@ ostree_mutable_tree_ensure_parent_dirs (OstreeMutableTree *self,
return ret; return ret;
} }
const char empty_tree_csum[] = "6e340b9cffb37a989ca544e6bb780a2c78901d3fb33738768511a30617afa01d";
/**
* ostree_mutable_tree_fill_empty_from_dirtree:
*
* Merges @self with the tree given by @contents_checksum and
* @metadata_checksum, but only if it's possible without writing new objects to
* the @repo. We can do this if either @self is empty, the tree given by
* @contents_checksum is empty or if both trees already have the same
* @contents_checksum.
*
* Returns: @TRUE if merge was successful, @FALSE if it was not possible.
*
* This function enables optimisations when composing trees. The provided
* checksums are not loaded or checked when this function is called. Instead
* the contents will be loaded only when needed.
*/
gboolean
ostree_mutable_tree_fill_empty_from_dirtree (OstreeMutableTree *self,
OstreeRepo *repo,
const char *contents_checksum,
const char *metadata_checksum)
{
g_return_val_if_fail (repo, FALSE);
g_return_val_if_fail (contents_checksum, FALSE);
g_return_val_if_fail (metadata_checksum, FALSE);
switch (self->state)
{
case MTREE_STATE_LAZY:
{
if (g_strcmp0 (contents_checksum, self->contents_checksum) == 0 ||
g_strcmp0 (empty_tree_csum, self->contents_checksum) == 0)
break;
if (g_strcmp0 (empty_tree_csum, contents_checksum) == 0)
{
/* Adding an empty tree to a full one - stick with the old contents */
contents_checksum = self->contents_checksum;
break;
}
else
return FALSE;
}
case MTREE_STATE_WHOLE:
if (g_hash_table_size (self->files) == 0 &&
g_hash_table_size (self->subdirs) == 0)
break;
/* We're not empty - can't convert to a LAZY tree */
return FALSE;
default:
g_assert_not_reached ();
}
self->state = MTREE_STATE_LAZY;
g_set_object (&self->repo, repo);
ostree_mutable_tree_set_metadata_checksum (self, metadata_checksum);
if (g_strcmp0 (self->contents_checksum, contents_checksum) != 0)
{
invalidate_contents_checksum (self);
self->contents_checksum = g_strdup (contents_checksum);
}
return TRUE;
}
/** /**
* ostree_mutable_tree_walk: * ostree_mutable_tree_walk:
* @self: Tree * @self: Tree
@ -392,6 +568,8 @@ ostree_mutable_tree_walk (OstreeMutableTree *self,
else else
{ {
OstreeMutableTree *subdir; OstreeMutableTree *subdir;
if (!_ostree_mutable_tree_make_whole (self, NULL, error))
return FALSE;
subdir = g_hash_table_lookup (self->subdirs, split_path->pdata[start]); subdir = g_hash_table_lookup (self->subdirs, split_path->pdata[start]);
if (!subdir) if (!subdir)
@ -410,6 +588,7 @@ ostree_mutable_tree_walk (OstreeMutableTree *self,
GHashTable * GHashTable *
ostree_mutable_tree_get_subdirs (OstreeMutableTree *self) ostree_mutable_tree_get_subdirs (OstreeMutableTree *self)
{ {
_assert_ostree_mutable_tree_make_whole (self);
return self->subdirs; return self->subdirs;
} }
@ -422,9 +601,35 @@ ostree_mutable_tree_get_subdirs (OstreeMutableTree *self)
GHashTable * GHashTable *
ostree_mutable_tree_get_files (OstreeMutableTree *self) ostree_mutable_tree_get_files (OstreeMutableTree *self)
{ {
_assert_ostree_mutable_tree_make_whole (self);
return self->files; return self->files;
} }
/**
* ostree_mutable_tree_check_error:
* @self: Tree
*
* In some cases, a tree may be in a "lazy" state that loads
* data in the background; if an error occurred during a non-throwing
* API call, it will have been cached. This function checks for a
* cached error. The tree remains in error state.
*
* Since: 2018.7
* Returns: `TRUE` on success
*/
gboolean
ostree_mutable_tree_check_error (OstreeMutableTree *self,
GError **error)
{
if (self->cached_error)
{
if (error)
*error = g_error_copy (self->cached_error);
return FALSE;
}
return TRUE;
}
/** /**
* ostree_mutable_tree_new: * ostree_mutable_tree_new:
* *
@ -435,3 +640,27 @@ ostree_mutable_tree_new (void)
{ {
return (OstreeMutableTree*)g_object_new (OSTREE_TYPE_MUTABLE_TREE, NULL); return (OstreeMutableTree*)g_object_new (OSTREE_TYPE_MUTABLE_TREE, NULL);
} }
/**
* ostree_mutable_tree_new_from_checksum:
* @repo: The repo which contains the objects refered by the checksums.
* @contents_checksum: dirtree checksum
* @metadata_checksum: dirmeta checksum
*
* Creates a new OstreeMutableTree with the contents taken from the given repo
* and checksums. The data will be loaded from the repo lazily as needed.
*
* Returns: (transfer full): A new tree
*/
OstreeMutableTree *
ostree_mutable_tree_new_from_checksum (OstreeRepo *repo,
const char *contents_checksum,
const char *metadata_checksum)
{
OstreeMutableTree* out = (OstreeMutableTree*)g_object_new (OSTREE_TYPE_MUTABLE_TREE, NULL);
out->state = MTREE_STATE_LAZY;
out->repo = g_object_ref (repo);
out->contents_checksum = g_strdup (contents_checksum);
out->metadata_checksum = g_strdup (metadata_checksum);
return out;
}

View File

@ -52,6 +52,11 @@ GType ostree_mutable_tree_get_type (void) G_GNUC_CONST;
_OSTREE_PUBLIC _OSTREE_PUBLIC
OstreeMutableTree *ostree_mutable_tree_new (void); OstreeMutableTree *ostree_mutable_tree_new (void);
_OSTREE_PUBLIC
OstreeMutableTree * ostree_mutable_tree_new_from_checksum (OstreeRepo *repo,
const char *contents_checksum,
const char *metadata_checksum);
_OSTREE_PUBLIC _OSTREE_PUBLIC
void ostree_mutable_tree_set_metadata_checksum (OstreeMutableTree *self, void ostree_mutable_tree_set_metadata_checksum (OstreeMutableTree *self,
const char *checksum); const char *checksum);
@ -100,6 +105,17 @@ gboolean ostree_mutable_tree_walk (OstreeMutableTree *self,
OstreeMutableTree **out_subdir, OstreeMutableTree **out_subdir,
GError **error); GError **error);
_OSTREE_PUBLIC
gboolean ostree_mutable_tree_fill_empty_from_dirtree (OstreeMutableTree *self,
OstreeRepo *repo,
const char *contents_checksum,
const char *metadata_checksum);
_OSTREE_PUBLIC
gboolean
ostree_mutable_tree_check_error (OstreeMutableTree *self,
GError **error);
_OSTREE_PUBLIC _OSTREE_PUBLIC
GHashTable * ostree_mutable_tree_get_subdirs (OstreeMutableTree *self); GHashTable * ostree_mutable_tree_get_subdirs (OstreeMutableTree *self);
_OSTREE_PUBLIC _OSTREE_PUBLIC

View File

@ -3602,6 +3602,14 @@ write_directory_to_mtree_internal (OstreeRepo *self,
if (!ostree_repo_file_ensure_resolved (repo_dir, error)) if (!ostree_repo_file_ensure_resolved (repo_dir, error))
return FALSE; return FALSE;
/* ostree_mutable_tree_fill_from_dirtree returns FALSE if mtree isn't
* empty: in which case we're responsible for merging the trees. */
if (ostree_mutable_tree_fill_empty_from_dirtree (mtree,
ostree_repo_file_get_repo (repo_dir),
ostree_repo_file_tree_get_contents_checksum (repo_dir),
ostree_repo_file_get_checksum (repo_dir)))
return TRUE;
ostree_mutable_tree_set_metadata_checksum (mtree, ostree_repo_file_tree_get_metadata_checksum (repo_dir)); ostree_mutable_tree_set_metadata_checksum (mtree, ostree_repo_file_tree_get_metadata_checksum (repo_dir));
filter_result = OSTREE_REPO_COMMIT_FILTER_ALLOW; filter_result = OSTREE_REPO_COMMIT_FILTER_ALLOW;
@ -3915,6 +3923,9 @@ ostree_repo_write_mtree (OstreeRepo *self,
const char *contents_checksum, *metadata_checksum; const char *contents_checksum, *metadata_checksum;
g_autoptr(GFile) ret_file = NULL; g_autoptr(GFile) ret_file = NULL;
if (!ostree_mutable_tree_check_error (mtree, error))
return glnx_prefix_error (error, "mtree");
metadata_checksum = ostree_mutable_tree_get_metadata_checksum (mtree); metadata_checksum = ostree_mutable_tree_get_metadata_checksum (mtree);
if (!metadata_checksum) if (!metadata_checksum)
return glnx_throw (error, "Can't commit an empty tree"); return glnx_throw (error, "Can't commit an empty tree");