repo/checkout: Cache lookups of dirmeta objects

I was reading a strace the other day and noticed we were loading the same
`.dirmeta` object many times. Unlike the other object types, `.dirmeta` objects
don't accumulate much over time; there are only so many directory metadata types.
(Without SELinux involved it'd probably be 5-6 I'd guess offhand).

For `fedora-atomic/25/x86_64/docker-host` there are currently 34 `.dirmeta` in
the tree.

But how many times during a checkout did we load those 34 dirmeta objects?
With a quick strace:

```
$ strace -s 2048 -f -o strace.log ostree --repo=repo-build checkout -U fedora-atomic/25/x86_64/docker-host host-test-checkout
$ grep dirmeta strace.log | wc -l
7165
```

After, as you'd expect, we just loaded `34` from disk.  We do
6 system calls (`openat+fstat+fstat+read+read+close`) per dirmeta,
so we dropped a total of 42780 system calls - which is about 20% of the total
system calls made.

`perf record` tells me that we're spending ~40 of our time in the kernel during
a checkout, so reducing syscall traffic helps. Though most of that appears to be
in the VFS and XFS layers for `linkat` (which isn't surprising).

So how much did perf improve? Well, on my workstation, I get a lot of
fluctuation in timing, sometimes by 30%, so this was well within the noise. But
it's well worth speeding up checkout, and I think this optimization will shine
more as we improve performance elsewhere.

Closes: #795
Approved by: jlebon
This commit is contained in:
Colin Walters 2017-04-14 14:00:55 -04:00 committed by Atomic Bot
parent 4fc65b808a
commit b7afe91e21
3 changed files with 85 additions and 0 deletions

View File

@ -799,6 +799,8 @@ ostree_repo_checkout_tree (OstreeRepo *self,
/* Backwards compatibility */
options.enable_uncompressed_cache = TRUE;
g_auto(OstreeRepoMemoryCacheRef) memcache_ref;
_ostree_repo_memory_cache_ref_init (&memcache_ref, self);
return checkout_tree_at (self, &options,
AT_FDCWD, gs_file_get_path_cached (destination),
source, source_info,
@ -916,6 +918,8 @@ ostree_repo_checkout_at (OstreeRepo *self,
if (!target_info)
return FALSE;
g_auto(OstreeRepoMemoryCacheRef) memcache_ref;
_ostree_repo_memory_cache_ref_init (&memcache_ref, self);
if (!checkout_tree_at (self, options,
destination_dfd,
destination_path,

View File

@ -92,6 +92,9 @@ struct OstreeRepo {
OstreeRepoTransactionStats txn_stats;
GMutex cache_lock;
guint dirmeta_cache_refcount;
/* char * checksum → GVariant * for dirmeta objects, used in the checkout path */
GHashTable *dirmeta_cache;
gboolean inited;
gboolean writable;
@ -126,6 +129,24 @@ typedef struct {
char checksum[OSTREE_SHA256_STRING_LEN+1];
} OstreeDevIno;
/* A MemoryCacheRef is an in-memory cache of objects (currently just DIRMETA). This can
* be used when performing an operation that traverses a repository in someway. Currently,
* the primary use case is ostree_repo_checkout_at() avoiding lots of duplicate dirmeta
* lookups.
*/
typedef struct {
OstreeRepo *repo;
} OstreeRepoMemoryCacheRef;
void
_ostree_repo_memory_cache_ref_init (OstreeRepoMemoryCacheRef *state,
OstreeRepo *repo);
void
_ostree_repo_memory_cache_ref_destroy (OstreeRepoMemoryCacheRef *state);
G_DEFINE_AUTO_CLEANUP_CLEAR_FUNC(OstreeRepoMemoryCacheRef, _ostree_repo_memory_cache_ref_destroy);
#define OSTREE_REPO_TMPDIR_STAGING "staging-"
#define OSTREE_REPO_TMPDIR_FETCHER "fetcher-"

View File

@ -538,6 +538,7 @@ ostree_repo_finalize (GObject *object)
g_clear_pointer (&self->txn_refs, g_hash_table_destroy);
g_clear_error (&self->writable_error);
g_clear_pointer (&self->object_sizes, (GDestroyNotify) g_hash_table_unref);
g_clear_pointer (&self->dirmeta_cache, (GDestroyNotify) g_hash_table_unref);
g_mutex_clear (&self->cache_lock);
g_mutex_clear (&self->txn_stats_lock);
@ -2500,6 +2501,26 @@ load_metadata_internal (OstreeRepo *self,
g_return_val_if_fail (OSTREE_OBJECT_TYPE_IS_META (objtype), FALSE);
/* Special caching for dirmeta objects, since they're commonly referenced many
* times.
*/
const gboolean is_dirmeta_cachable =
(objtype == OSTREE_OBJECT_TYPE_DIR_META && out_variant && !out_stream);
if (is_dirmeta_cachable)
{
GMutex *lock = &self->cache_lock;
g_mutex_lock (lock);
GVariant *cache_hit = NULL;
/* Look it up, if we have a cache */
if (self->dirmeta_cache)
cache_hit = g_hash_table_lookup (self->dirmeta_cache, sha256);
if (cache_hit)
*out_variant = g_variant_ref (cache_hit);
g_mutex_unlock (lock);
if (cache_hit)
return TRUE;
}
_ostree_loose_path (loose_path_buf, sha256, objtype, self->mode);
if (!ot_openat_ignore_enoent (self->objects_dir_fd, loose_path_buf, &fd,
@ -2545,6 +2566,16 @@ load_metadata_internal (OstreeRepo *self,
data, TRUE);
g_variant_ref_sink (ret_variant);
}
/* Now, let's put it in the cache */
if (is_dirmeta_cachable)
{
GMutex *lock = &self->cache_lock;
g_mutex_lock (lock);
if (self->dirmeta_cache)
g_hash_table_replace (self->dirmeta_cache, g_strdup (sha256), g_variant_ref (ret_variant));
g_mutex_unlock (lock);
}
}
else if (out_stream)
{
@ -4932,3 +4963,32 @@ _ostree_repo_allocate_tmpdir (int tmpdir_dfd,
out:
return ret;
}
/* See ostree-repo-private.h for more information about this */
void
_ostree_repo_memory_cache_ref_init (OstreeRepoMemoryCacheRef *state,
OstreeRepo *repo)
{
state->repo = g_object_ref (repo);
GMutex *lock = &repo->cache_lock;
g_mutex_lock (lock);
repo->dirmeta_cache_refcount++;
if (repo->dirmeta_cache == NULL)
repo->dirmeta_cache = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, (GDestroyNotify)g_variant_unref);
g_mutex_unlock (lock);
}
/* See ostree-repo-private.h for more information about this */
void
_ostree_repo_memory_cache_ref_destroy (OstreeRepoMemoryCacheRef *state)
{
OstreeRepo *repo = state->repo;
GMutex *lock = &repo->cache_lock;
g_mutex_lock (lock);
repo->dirmeta_cache_refcount--;
if (repo->dirmeta_cache_refcount == 0)
g_clear_pointer (&repo->dirmeta_cache, (GDestroyNotify) g_hash_table_unref);
g_mutex_unlock (lock);
g_object_unref (repo);
}