repo: Clean up staging directory for previous boot IDs

We had a policy of cleaning up all files in `$repo/tmp` older
than one day, but we should really clean up previous bootid staging
directories too, as they can potentially take up a lot of disk space.

https://bugzilla.gnome.org/show_bug.cgi?id=760531

Closes: #170
Approved by: jlebon
This commit is contained in:
Colin Walters 2016-01-13 11:33:54 -05:00 committed by Colin Walters (automation)
parent e3ec83a934
commit a56ba6081a
6 changed files with 220 additions and 71 deletions

View File

@ -63,6 +63,7 @@ test_scripts = \
tests/test-admin-deploy-etcmerge-cornercases.sh \
tests/test-admin-deploy-uboot.sh \
tests/test-admin-deploy-grub2.sh \
tests/test-admin-deploy-bootid-gc.sh \
tests/test-admin-instutil-set-kargs.sh \
tests/test-admin-upgrade-not-backwards.sh \
tests/test-admin-pull-deploy-commit.sh \

View File

@ -383,7 +383,7 @@ session_thread_request_uri (ThreadClosure *thread_closure,
if (thread_closure->tmpdir_name == NULL)
{
if (!_ostree_repo_allocate_tmpdir (thread_closure->base_tmpdir_dfd,
"fetcher-",
OSTREE_REPO_TMPDIR_FETCHER,
&thread_closure->tmpdir_name,
&thread_closure->tmpdir_dfd,
&thread_closure->tmpdir_lock,

View File

@ -1161,7 +1161,6 @@ ostree_repo_prepare_transaction (OstreeRepo *self,
{
gboolean ret = FALSE;
gboolean ret_transaction_resume = FALSE;
g_autofree char *stagedir_boot_id_prefix = NULL;
g_autofree char *stagedir_name = NULL;
glnx_fd_close int stagedir_fd = -1;
g_auto(GLnxDirFdIterator) dfd_iter = { 0, };
@ -1172,10 +1171,8 @@ ostree_repo_prepare_transaction (OstreeRepo *self,
self->in_transaction = TRUE;
stagedir_boot_id_prefix = g_strconcat ("staging-", self->boot_id, "-", NULL);
if (!_ostree_repo_allocate_tmpdir (self->tmp_dir_fd,
stagedir_boot_id_prefix,
self->stagedir_prefix,
&self->commit_stagedir_name,
&self->commit_stagedir_fd,
&self->commit_stagedir_lock,
@ -1281,44 +1278,86 @@ cleanup_tmpdir (OstreeRepo *self,
GError **error)
{
gboolean ret = FALSE;
g_autoptr(GFileEnumerator) enumerator = NULL;
g_auto(GLnxDirFdIterator) dfd_iter = { 0, };
guint64 curtime_secs;
enumerator = g_file_enumerate_children (self->tmp_dir, "standard::name,time::modified",
G_FILE_QUERY_INFO_NOFOLLOW_SYMLINKS,
cancellable,
error);
if (!enumerator)
goto out;
curtime_secs = g_get_real_time () / 1000000;
if (!glnx_dirfd_iterator_init_at (self->tmp_dir_fd, ".", TRUE, &dfd_iter, error))
goto out;
while (TRUE)
{
GFileInfo *file_info;
GFile *path;
guint64 mtime;
guint64 delta;
struct dirent *dent;
struct stat stbuf;
g_auto(GLnxLockFile) lockfile = GLNX_LOCK_FILE_INIT;
gboolean did_lock;
if (!gs_file_enumerator_iterate (enumerator, &file_info, &path,
cancellable, error))
if (!glnx_dirfd_iterator_next_dent (&dfd_iter, &dent, cancellable, error))
goto out;
if (file_info == NULL)
if (dent == NULL)
break;
mtime = g_file_info_get_attribute_uint64 (file_info, "time::modified");
if (mtime > curtime_secs)
continue;
/* Only delete files older than a day. To do better, we would
* need to coordinate between multiple processes in a reliable
* fashion. See
* https://bugzilla.gnome.org/show_bug.cgi?id=709115
*/
delta = curtime_secs - mtime;
if (delta > 60*60*24)
if (TEMP_FAILURE_RETRY (fstatat (dfd_iter.fd, dent->d_name, &stbuf, AT_SYMLINK_NOFOLLOW)) < 0)
{
if (!glnx_shutil_rm_rf_at (AT_FDCWD, gs_file_get_path_cached (path), cancellable, error))
if (errno == ENOENT) /* Did another cleanup win? */
continue;
glnx_set_error_from_errno (error);
goto out;
}
/* First, if it's a directory which needs locking, but it's
* busy, skip it.
*/
if (_ostree_repo_is_locked_tmpdir (dent->d_name))
{
if (!_ostree_repo_try_lock_tmpdir (dfd_iter.fd, dent->d_name,
&lockfile, &did_lock, error))
goto out;
if (!did_lock)
continue;
}
/* If however this is the staging directory for the *current*
* boot, then don't delete it now - we may end up reusing it, as
* is the point.
*/
if (g_str_has_prefix (dent->d_name, self->stagedir_prefix))
continue;
else if (g_str_has_prefix (dent->d_name, OSTREE_REPO_TMPDIR_STAGING))
{
/* But, crucially we can now clean up staging directories
* from *other* boots
*/
if (!glnx_shutil_rm_rf_at (dfd_iter.fd, dent->d_name, cancellable, error))
goto out;
}
/* FIXME - move OSTREE_REPO_TMPDIR_FETCHER underneath the
* staging/boot-id scheme as well, since all of the "did it get
* fsync'd" concerns apply to that as well. Then we can skip
* this special case.
*/
else if (g_str_has_prefix (dent->d_name, OSTREE_REPO_TMPDIR_FETCHER))
continue;
else
{
/* Now we do time-based cleanup. Ignore it if it's somehow
* in the future...
*/
if (stbuf.st_mtime > curtime_secs)
continue;
/* Now, we arbitrarily delete files/directories older than a
* day, since that's what we were doing before we had locking.
*/
delta = curtime_secs - stbuf.st_mtime;
if (delta > 60*60*24)
{
if (!glnx_shutil_rm_rf_at (dfd_iter.fd, dent->d_name, cancellable, error))
goto out;
}
}
}

View File

@ -48,7 +48,7 @@ typedef enum {
struct OstreeRepo {
GObject parent;
char *boot_id;
char *stagedir_prefix;
int commit_stagedir_fd;
char *commit_stagedir_name;
GLnxLockFile commit_stagedir_lock;
@ -108,6 +108,9 @@ typedef struct {
char checksum[65];
} OstreeDevIno;
#define OSTREE_REPO_TMPDIR_STAGING "staging-"
#define OSTREE_REPO_TMPDIR_FETCHER "fetcher-"
gboolean
_ostree_repo_allocate_tmpdir (int tmpdir_dfd,
const char *tmpdir_prefix,
@ -118,6 +121,16 @@ _ostree_repo_allocate_tmpdir (int tmpdir_dfd,
GCancellable *cancellable,
GError **error);
gboolean
_ostree_repo_is_locked_tmpdir (const char *filename);
gboolean
_ostree_repo_try_lock_tmpdir (int tmpdir_dfd,
const char *tmpdir_name,
GLnxLockFile *file_lock_out,
gboolean *out_did_lock,
GError **error);
gboolean
_ostree_repo_ensure_loose_objdir_at (int dfd,
const char *loose_path,

View File

@ -617,7 +617,7 @@ ostree_repo_finalize (GObject *object)
g_clear_object (&self->parent_repo);
g_free (self->boot_id);
g_free (self->stagedir_prefix);
g_clear_object (&self->repodir);
if (self->repo_dir_fd != -1)
(void) close (self->repo_dir_fd);
@ -2428,22 +2428,26 @@ ostree_repo_open (OstreeRepo *self,
if (self->inited)
return TRUE;
/* We use a per-boot identifier to keep track of which file contents
* possibly haven't been sync'd to disk.
/* We use a directory of the form `staging-${BOOT_ID}-${RANDOM}`
* where if the ${BOOT_ID} doesn't match, we know file contents
* possibly haven't been sync'd to disk and need to be discarded.
*/
{ const char *env_bootid = getenv ("OSTREE_BOOTID");
g_autofree char *boot_id = NULL;
if (env_bootid != NULL)
self->boot_id = g_strdup (env_bootid);
boot_id = g_strdup (env_bootid);
else
{
if (!g_file_get_contents ("/proc/sys/kernel/random/boot_id",
&self->boot_id,
&boot_id,
NULL,
error))
goto out;
g_strdelimit (self->boot_id, "\n", '\0');
g_strdelimit (boot_id, "\n", '\0');
}
self->stagedir_prefix = g_strconcat (OSTREE_REPO_TMPDIR_STAGING, boot_id, "-", NULL);
}
if (!glnx_opendirat (AT_FDCWD, gs_file_get_path_cached (self->repodir), TRUE,
@ -5003,6 +5007,50 @@ ostree_repo_regenerate_summary (OstreeRepo *self,
return ret;
}
gboolean
_ostree_repo_is_locked_tmpdir (const char *filename)
{
return g_str_has_prefix (filename, OSTREE_REPO_TMPDIR_STAGING) ||
g_str_has_prefix (filename, OSTREE_REPO_TMPDIR_FETCHER);
}
gboolean
_ostree_repo_try_lock_tmpdir (int tmpdir_dfd,
const char *tmpdir_name,
GLnxLockFile *file_lock_out,
gboolean *out_did_lock,
GError **error)
{
gboolean ret = FALSE;
g_autofree char *lock_name = g_strconcat (tmpdir_name, "-lock", NULL);
gboolean did_lock = FALSE;
g_autoptr(GError) local_error = NULL;
/* We put the lock outside the dir, so we can hold the lock
* until the directory is fully removed */
if (!glnx_make_lock_file (tmpdir_dfd, lock_name, LOCK_EX | LOCK_NB,
file_lock_out, &local_error))
{
if (g_error_matches (local_error, G_IO_ERROR, G_IO_ERROR_WOULD_BLOCK))
{
did_lock = FALSE;
}
else
{
g_propagate_error (error, g_steal_pointer (&local_error));
goto out;
}
}
else
{
did_lock = TRUE;
}
ret = TRUE;
*out_did_lock = did_lock;
out:
return ret;
}
/* This allocates and locks a subdir of the repo tmp dir, using an existing
* one with the same prefix if it is not in use already. */
@ -5016,14 +5064,18 @@ _ostree_repo_allocate_tmpdir (int tmpdir_dfd,
GCancellable *cancellable,
GError **error)
{
gboolean ret = FALSE;
gboolean reusing_dir = FALSE;
gboolean did_lock;
g_autofree char *tmpdir_name = NULL;
glnx_fd_close int tmpdir_fd = -1;
g_auto(GLnxDirFdIterator) dfd_iter = { 0, };
g_return_val_if_fail (_ostree_repo_is_locked_tmpdir (tmpdir_prefix), FALSE);
/* Look for existing tmpdir (with same prefix) to reuse */
if (!glnx_dirfd_iterator_init_at (tmpdir_dfd, ".", FALSE, &dfd_iter, error))
return FALSE;
goto out;
while (tmpdir_name == NULL)
{
@ -5031,10 +5083,9 @@ _ostree_repo_allocate_tmpdir (int tmpdir_dfd,
struct dirent *dent;
glnx_fd_close int existing_tmpdir_fd = -1;
g_autoptr(GError) local_error = NULL;
g_autofree char *lock_name = NULL;
if (!glnx_dirfd_iterator_next_dent (&dfd_iter, &dent, cancellable, error))
return FALSE;
goto out;
if (dent == NULL)
break;
@ -5055,25 +5106,18 @@ _ostree_repo_allocate_tmpdir (int tmpdir_dfd,
else
{
g_propagate_error (error, g_steal_pointer (&local_error));
return FALSE;
goto out;
}
}
lock_name = g_strconcat (dent->d_name, "-lock", NULL);
/* We put the lock outside the dir, so we can hold the lock
* until the directory is fully removed */
if (!glnx_make_lock_file (dfd_iter.fd, lock_name, LOCK_EX | LOCK_NB,
file_lock_out, &local_error))
{
if (g_error_matches (local_error, G_IO_ERROR, G_IO_ERROR_WOULD_BLOCK))
continue;
else
{
g_propagate_error (error, g_steal_pointer (&local_error));
return FALSE;
}
}
if (!_ostree_repo_try_lock_tmpdir (dfd_iter.fd, dent->d_name,
file_lock_out, &did_lock,
error))
goto out;
if (!did_lock)
continue;
/* Touch the reused directory so that we don't accidentally
* remove it due to being old when cleaning up the tmpdir
@ -5091,32 +5135,24 @@ _ostree_repo_allocate_tmpdir (int tmpdir_dfd,
g_autofree char *tmpdir_name_template = g_strconcat (tmpdir_prefix, "XXXXXX", NULL);
glnx_fd_close int new_tmpdir_fd = -1;
g_autoptr(GError) local_error = NULL;
g_autofree char *lock_name = NULL;
/* No existing tmpdir found, create a new */
if (!glnx_mkdtempat (tmpdir_dfd, tmpdir_name_template, 0777, error))
return FALSE;
goto out;
if (!glnx_opendirat (tmpdir_dfd, tmpdir_name_template, FALSE,
&new_tmpdir_fd, error))
return FALSE;
lock_name = g_strconcat (tmpdir_name_template, "-lock", NULL);
goto out;
/* Note, at this point we can race with another process that picks up this
* new directory. If that happens we need to retry, making a new directory. */
if (!glnx_make_lock_file (tmpdir_dfd, lock_name, LOCK_EX | LOCK_NB,
file_lock_out, &local_error))
{
if (g_error_matches (local_error, G_IO_ERROR, G_IO_ERROR_WOULD_BLOCK))
continue;
else
{
g_propagate_error (error, g_steal_pointer (&local_error));
return FALSE;
}
}
if (!_ostree_repo_try_lock_tmpdir (tmpdir_dfd, tmpdir_name_template,
file_lock_out, &did_lock,
error))
goto out;
if (!did_lock)
continue;
tmpdir_name = g_steal_pointer (&tmpdir_name_template);
tmpdir_fd = glnx_steal_fd (&new_tmpdir_fd);
@ -5131,5 +5167,7 @@ _ostree_repo_allocate_tmpdir (int tmpdir_dfd,
if (reusing_dir_out)
*reusing_dir_out = reusing_dir;
return TRUE;
ret = TRUE;
out:
return ret;
}

View File

@ -0,0 +1,58 @@
#!/bin/bash
#
# Copyright (C) 2016 Colin Walters <walters@verbum.org>
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the
# Free Software Foundation, Inc., 59 Temple Place - Suite 330,
# Boston, MA 02111-1307, USA.
set -euo pipefail
. $(dirname $0)/libtest.sh
# Exports OSTREE_SYSROOT so --sysroot not needed.
setup_os_repository "archive-z2" "syslinux"
echo "1..1"
${CMD_PREFIX} ostree --repo=sysroot/ostree/repo pull-local --remote=testos testos-repo testos/buildmaster/x86_64-runtime
rev=$(${CMD_PREFIX} ostree --repo=sysroot/ostree/repo rev-parse testos/buildmaster/x86_64-runtime)
export rev
${CMD_PREFIX} ostree admin deploy --karg=root=LABEL=MOO --karg=quiet --os=testos testos:testos/buildmaster/x86_64-runtime
os_repository_new_commit
rm sysroot/ostree/repo/tmp/* -rf
export TEST_BOOTID=4072029c-8b10-60d1-d31b-8422eeff9b42
if env OSTREE_REPO_TEST_ERROR=pre-commit OSTREE_BOOTID=${TEST_BOOTID} \
${CMD_PREFIX} ostree admin deploy --karg=root=LABEL=MOO --karg=quiet --os=testos testos:testos/buildmaster/x86_64-runtime 2>err.txt; then
assert_not_reached "Should have hit OSTREE_REPO_TEST_ERROR_PRE_COMMIT"
fi
stagepath=$(ls -d sysroot/ostree/repo/tmp/staging-${TEST_BOOTID}-*)
assert_has_dir "${stagepath}"
# We have an older failed stage, now use a new boot id
export NEW_TEST_BOOTID=5072029c-8b10-60d1-d31b-8422eeff9b42
if env OSTREE_REPO_TEST_ERROR=pre-commit OSTREE_BOOTID=${NEW_TEST_BOOTID} \
${CMD_PREFIX} ostree admin deploy --karg=root=LABEL=FOO --karg=quiet --os=testos testos:testos/buildmaster/x86_64-runtime 2>err.txt; then
assert_not_reached "Should have hit OSTREE_REPO_TEST_ERROR_PRE_COMMIT"
fi
newstagepath=$(ls -d sysroot/ostree/repo/tmp/staging-${NEW_TEST_BOOTID}-*)
assert_has_dir "${newstagepath}"
env OSTREE_BOOTID=${NEW_TEST_BOOTID} ${CMD_PREFIX} ostree admin deploy --karg=root=LABEL=MOO --karg=quiet --os=testos testos:testos/buildmaster/x86_64-runtime
newstagepath=$(ls -d sysroot/ostree/repo/tmp/staging-${NEW_TEST_BOOTID}-*)
assert_not_has_dir "${stagepath}"
assert_not_has_dir "${newstagepath}"
echo "ok admin bootid GC"