lib/checkout: Rename disjoint union, change to merge identical files

It turns out that librpm automatically merges identical files between
distinct packages, and this occurs in practice with Fedora today between
`chkconfig` and `initscripts` for exmaple.

Since we added this for rpm-ostree, we basically want to do what librpm does,
let's change the semantics to do a merge.  While we're here rename
to `UNION_IDENTICAL`.

Closes: #1156
Approved by: jlebon
This commit is contained in:
Colin Walters 2017-09-08 17:07:45 -04:00 committed by Atomic Bot
parent 8d3752a0d6
commit 051cdf396c
6 changed files with 208 additions and 86 deletions

View File

@ -696,7 +696,7 @@ _ostree_checkout() {
--require-hardlinks -H
--union
--union-add
--disjoint-union
--union-identical
--user-mode -U
--whiteouts
"

View File

@ -98,11 +98,12 @@ Boston, MA 02111-1307, USA.
</varlistentry>
<varlistentry>
<term><option>--disjoint-union</option></term>
<term><option>--union-identical</option></term>
<listitem><para>
When layering checkouts, error out if a file would be replaced, but add new files and directories
</para></listitem>
<listitem><para>Like <literal>--union</literal>, but error out
if a file would be replaced with a different file. Add new files
and directories, ignore identical files, and keep existing
directories. Requires <literal>-H</literal>.</para></listitem>
</varlistentry>
<varlistentry>

View File

@ -188,8 +188,6 @@ create_file_copy_from_input_at (OstreeRepo *repo,
GCancellable *cancellable,
GError **error)
{
const gboolean union_mode = options->overwrite_mode == OSTREE_REPO_CHECKOUT_OVERWRITE_UNION_FILES;
const gboolean add_mode = options->overwrite_mode == OSTREE_REPO_CHECKOUT_OVERWRITE_ADD_FILES;
const gboolean sepolicy_enabled = options->sepolicy && !repo->disable_xattrs;
g_autoptr(GVariant) modified_xattrs = NULL;
@ -218,23 +216,51 @@ create_file_copy_from_input_at (OstreeRepo *repo,
return FALSE;
}
if (symlinkat (g_file_info_get_symlink_target (file_info),
destination_dfd, destination_name) < 0)
const char *target = g_file_info_get_symlink_target (file_info);
if (symlinkat (target, destination_dfd, destination_name) < 0)
{
if (errno != EEXIST)
return glnx_throw_errno_prefix (error, "symlinkat");
/* Handle union/add behaviors if we get EEXIST */
if (errno == EEXIST && union_mode)
switch (options->overwrite_mode)
{
/* Unioning? Let's unlink and try again */
(void) unlinkat (destination_dfd, destination_name, 0);
if (symlinkat (g_file_info_get_symlink_target (file_info),
destination_dfd, destination_name) < 0)
return glnx_throw_errno (error);
case OSTREE_REPO_CHECKOUT_OVERWRITE_NONE:
return glnx_throw_errno_prefix (error, "symlinkat");
case OSTREE_REPO_CHECKOUT_OVERWRITE_UNION_FILES:
{
/* Unioning? Let's unlink and try again */
(void) unlinkat (destination_dfd, destination_name, 0);
if (symlinkat (target, destination_dfd, destination_name) < 0)
return glnx_throw_errno_prefix (error, "symlinkat");
}
case OSTREE_REPO_CHECKOUT_OVERWRITE_ADD_FILES:
/* Note early return - we don't want to set the xattrs below */
return TRUE;
case OSTREE_REPO_CHECKOUT_OVERWRITE_UNION_IDENTICAL:
{
/* See the comments for the hardlink version of this
* for why we do this.
*/
struct stat dest_stbuf;
if (!glnx_fstatat (destination_dfd, destination_name, &dest_stbuf,
AT_SYMLINK_NOFOLLOW, error))
return FALSE;
if (S_ISLNK (dest_stbuf.st_mode))
{
g_autofree char *dest_target =
glnx_readlinkat_malloc (destination_dfd, destination_name,
cancellable, error);
if (!dest_target)
return FALSE;
/* In theory we could also compare xattrs...but eh */
if (g_str_equal (dest_target, target))
return TRUE;
}
errno = EEXIST;
return glnx_throw_errno_prefix (error, "symlinkat");
}
}
else if (errno == EEXIST && add_mode)
/* Note early return - we don't want to set the xattrs below */
return TRUE;
else
return glnx_throw_errno (error);
}
/* Process ownership and xattrs now that we made the link */
@ -255,7 +281,6 @@ create_file_copy_from_input_at (OstreeRepo *repo,
else if (g_file_info_get_file_type (file_info) == G_FILE_TYPE_REGULAR)
{
g_auto(GLnxTmpfile) tmpf = { 0, };
GLnxLinkTmpfileReplaceMode replace_mode;
if (!glnx_open_tmpfile_linkable_at (destination_dfd, ".", O_WRONLY | O_CLOEXEC,
&tmpf, error))
@ -278,12 +303,23 @@ create_file_copy_from_input_at (OstreeRepo *repo,
return FALSE;
/* The add/union/none behaviors map directly to GLnxLinkTmpfileReplaceMode */
if (add_mode)
replace_mode = GLNX_LINK_TMPFILE_NOREPLACE_IGNORE_EXIST;
else if (union_mode)
replace_mode = GLNX_LINK_TMPFILE_REPLACE;
else
replace_mode = GLNX_LINK_TMPFILE_NOREPLACE;
GLnxLinkTmpfileReplaceMode replace_mode;
switch (options->overwrite_mode)
{
case OSTREE_REPO_CHECKOUT_OVERWRITE_NONE:
replace_mode = GLNX_LINK_TMPFILE_NOREPLACE;
break;
case OSTREE_REPO_CHECKOUT_OVERWRITE_UNION_FILES:
replace_mode = GLNX_LINK_TMPFILE_REPLACE;
break;
case OSTREE_REPO_CHECKOUT_OVERWRITE_ADD_FILES:
replace_mode = GLNX_LINK_TMPFILE_NOREPLACE_IGNORE_EXIST;
break;
case OSTREE_REPO_CHECKOUT_OVERWRITE_UNION_IDENTICAL:
/* We don't support copying in union identical */
g_assert_not_reached ();
break;
}
if (!glnx_link_tmpfile_at (&tmpf, replace_mode,
destination_dfd, destination_name,
@ -329,25 +365,61 @@ checkout_file_hardlink (OstreeRepo *self,
else if (allow_noent && errno == ENOENT)
{
}
else if (errno == EEXIST && options->overwrite_mode == OSTREE_REPO_CHECKOUT_OVERWRITE_ADD_FILES)
else if (errno == EEXIST)
{
/* In this mode, we keep existing content. Distinguish this case though to
* avoid inserting into the devino cache.
*/
ret_result = HARDLINK_RESULT_SKIP_EXISTED;
}
else if (errno == EEXIST && options->overwrite_mode == OSTREE_REPO_CHECKOUT_OVERWRITE_UNION_FILES)
{
/* Idiocy, from man rename(2)
*
* "If oldpath and newpath are existing hard links referring to
* the same file, then rename() does nothing, and returns a
* success status."
*
* So we can't make this atomic.
*/
(void) unlinkat (destination_dfd, destination_name, 0);
goto again;
/* When we get EEXIST, we need to handle the different overwrite modes. */
switch (options->overwrite_mode)
{
case OSTREE_REPO_CHECKOUT_OVERWRITE_NONE:
/* Just throw */
return glnx_throw_errno_prefix (error, "Hardlinking %s to %s", loose_path, destination_name);
case OSTREE_REPO_CHECKOUT_OVERWRITE_ADD_FILES:
/* In this mode, we keep existing content. Distinguish this case though to
* avoid inserting into the devino cache.
*/
ret_result = HARDLINK_RESULT_SKIP_EXISTED;
break;
case OSTREE_REPO_CHECKOUT_OVERWRITE_UNION_FILES:
{
/* Idiocy, from man rename(2)
*
* "If oldpath and newpath are existing hard links referring to
* the same file, then rename() does nothing, and returns a
* success status."
*
* So we can't make this atomic.
*/
(void) unlinkat (destination_dfd, destination_name, 0);
goto again;
}
case OSTREE_REPO_CHECKOUT_OVERWRITE_UNION_IDENTICAL:
{
/* In this mode, we error out on EEXIST *unless* the files are already
* hardlinked, which is what rpm-ostree wants for package layering.
* https://github.com/projectatomic/rpm-ostree/issues/982
*
* This should be similar to the librpm version:
* https://github.com/rpm-software-management/rpm/blob/e3cd2bc85e0578f158d14e6f9624eb955c32543b/lib/rpmfi.c#L921
* in rpmfilesCompare().
*/
struct stat src_stbuf;
if (!glnx_fstatat (srcfd, loose_path, &src_stbuf,
AT_SYMLINK_NOFOLLOW, error))
return FALSE;
struct stat dest_stbuf;
if (!glnx_fstatat (destination_dfd, destination_name, &dest_stbuf,
AT_SYMLINK_NOFOLLOW, error))
return FALSE;
const gboolean is_identical =
(src_stbuf.st_dev == dest_stbuf.st_dev &&
src_stbuf.st_ino == dest_stbuf.st_ino);
if (is_identical)
ret_result = HARDLINK_RESULT_SKIP_EXISTED;
else
return glnx_throw_errno_prefix (error, "Hardlinking %s to %s", loose_path, destination_name);
break;
}
}
}
else
{
@ -652,13 +724,20 @@ checkout_tree_at_recurse (OstreeRepo *self,
*/
if (TEMP_FAILURE_RETRY (mkdirat (destination_parent_fd, destination_name, 0700)) < 0)
{
if (errno == EEXIST &&
(options->overwrite_mode == OSTREE_REPO_CHECKOUT_OVERWRITE_UNION_FILES
|| options->overwrite_mode == OSTREE_REPO_CHECKOUT_OVERWRITE_ADD_FILES
|| options->overwrite_mode == OSTREE_REPO_CHECKOUT_OVERWRITE_DISJOINT_UNION_FILES))
did_exist = TRUE;
else
return glnx_throw_errno (error);
if (errno != EEXIST)
return glnx_throw_errno_prefix (error, "mkdirat");
switch (options->overwrite_mode)
{
case OSTREE_REPO_CHECKOUT_OVERWRITE_NONE:
return glnx_throw_errno_prefix (error, "mkdirat");
/* All of these cases are the same for directories */
case OSTREE_REPO_CHECKOUT_OVERWRITE_UNION_FILES:
case OSTREE_REPO_CHECKOUT_OVERWRITE_ADD_FILES:
case OSTREE_REPO_CHECKOUT_OVERWRITE_UNION_IDENTICAL:
did_exist = TRUE;
break;
}
}
}
@ -1002,6 +1081,9 @@ ostree_repo_checkout_at (OstreeRepo *self,
g_return_val_if_fail (!(options->force_copy && options->no_copy_fallback), FALSE);
g_return_val_if_fail (!options->sepolicy || options->force_copy, FALSE);
/* union identical requires hardlink mode */
g_return_val_if_fail (!(options->overwrite_mode == OSTREE_REPO_CHECKOUT_OVERWRITE_UNION_IDENTICAL &&
!options->no_copy_fallback), FALSE);
g_autoptr(GFile) commit_root = (GFile*) _ostree_repo_file_new_for_commit (self, commit, error);
if (!commit_root)

View File

@ -832,13 +832,13 @@ typedef enum {
* @OSTREE_REPO_CHECKOUT_OVERWRITE_NONE: No special options
* @OSTREE_REPO_CHECKOUT_OVERWRITE_UNION_FILES: When layering checkouts, unlink() and replace existing files, but do not modify existing directories
* @OSTREE_REPO_CHECKOUT_OVERWRITE_ADD_FILES: Only add new files/directories
* @OSTREE_REPO_CHECKOUT_OVERWRITE_DISJOINT_UNION_FILES: When layering checkouts, error out if a file would be replaced, but add new files and directories
* @OSTREE_REPO_CHECKOUT_OVERWRITE_UNION_IDENTICAL: Like UNION_FILES, but error if files are not identical (requires hardlink checkouts)
*/
typedef enum {
OSTREE_REPO_CHECKOUT_OVERWRITE_NONE = 0,
OSTREE_REPO_CHECKOUT_OVERWRITE_UNION_FILES = 1,
OSTREE_REPO_CHECKOUT_OVERWRITE_ADD_FILES = 2, /* Since: 2017.3 */
OSTREE_REPO_CHECKOUT_OVERWRITE_DISJOINT_UNION_FILES = 3 /* Since: 2017.11 */
OSTREE_REPO_CHECKOUT_OVERWRITE_UNION_IDENTICAL = 3, /* Since: 2017.11 */
} OstreeRepoCheckoutOverwriteMode;
_OSTREE_PUBLIC

View File

@ -37,7 +37,7 @@ static gboolean opt_disable_cache;
static char *opt_subpath;
static gboolean opt_union;
static gboolean opt_union_add;
static gboolean opt_disjoint_union;
static gboolean opt_union_identical;
static gboolean opt_whiteouts;
static gboolean opt_from_stdin;
static char *opt_from_file;
@ -73,7 +73,7 @@ static GOptionEntry options[] = {
{ "subpath", 0, 0, G_OPTION_ARG_FILENAME, &opt_subpath, "Checkout sub-directory PATH", "PATH" },
{ "union", 0, 0, G_OPTION_ARG_NONE, &opt_union, "Keep existing directories, overwrite existing files", NULL },
{ "union-add", 0, 0, G_OPTION_ARG_NONE, &opt_union_add, "Keep existing files/directories, only add new", NULL },
{ "disjoint-union", 0, 0, G_OPTION_ARG_NONE, &opt_disjoint_union, "When layering checkouts, error out if a file would be replaced, but add new files and directories", NULL },
{ "union-identical", 0, 0, G_OPTION_ARG_NONE, &opt_union_identical, "When layering checkouts, error out if a file would be replaced with a different version, but add new files and directories", NULL },
{ "whiteouts", 0, 0, G_OPTION_ARG_NONE, &opt_whiteouts, "Process 'whiteout' (Docker style) entries", NULL },
{ "allow-noent", 0, 0, G_OPTION_ARG_NONE, &opt_allow_noent, "Do nothing if specified path does not exist", NULL },
{ "from-stdin", 0, 0, G_OPTION_ARG_NONE, &opt_from_stdin, "Process many checkouts from standard input", NULL },
@ -101,7 +101,7 @@ process_one_checkout (OstreeRepo *repo,
* convenient infrastructure for testing C APIs with data.
*/
if (opt_disable_cache || opt_whiteouts || opt_require_hardlinks ||
opt_union_add || opt_force_copy || opt_bareuseronly_dirs || opt_disjoint_union)
opt_union_add || opt_force_copy || opt_bareuseronly_dirs || opt_union_identical)
{
OstreeRepoCheckoutAtOptions options = { 0, };
@ -114,16 +114,16 @@ process_one_checkout (OstreeRepo *repo,
"Cannot specify both --union and --union-add");
goto out;
}
if (opt_union && opt_disjoint_union)
if (opt_union && opt_union_identical)
{
g_set_error (error, G_IO_ERROR, G_IO_ERROR_FAILED,
"Cannot specify both --union and --disjoint-union");
"Cannot specify both --union and --union-identical");
goto out;
}
if (opt_union_add && opt_disjoint_union)
if (opt_union_add && opt_union_identical)
{
g_set_error (error, G_IO_ERROR, G_IO_ERROR_FAILED,
"Cannot specify both --union-add and --disjoint-union ");
"Cannot specify both --union-add and --union-identical ");
goto out;
}
if (opt_require_hardlinks && opt_force_copy)
@ -135,8 +135,16 @@ process_one_checkout (OstreeRepo *repo,
options.overwrite_mode = OSTREE_REPO_CHECKOUT_OVERWRITE_UNION_FILES;
else if (opt_union_add)
options.overwrite_mode = OSTREE_REPO_CHECKOUT_OVERWRITE_ADD_FILES;
else if (opt_disjoint_union)
options.overwrite_mode = OSTREE_REPO_CHECKOUT_OVERWRITE_DISJOINT_UNION_FILES;
else if (opt_union_identical)
{
if (!opt_require_hardlinks)
{
g_set_error (error, G_IO_ERROR, G_IO_ERROR_FAILED,
"--union-identical requires --require-hardlinks");
goto out;
}
options.overwrite_mode = OSTREE_REPO_CHECKOUT_OVERWRITE_UNION_IDENTICAL;
}
if (opt_whiteouts)
options.process_whiteouts = TRUE;
if (subpath)

View File

@ -26,6 +26,7 @@ python -c 'import yaml; yaml.safe_load(open("version.yaml"))'
echo "ok yaml version"
CHECKOUT_U_ARG=""
CHECKOUT_H_ARGS="-H"
COMMIT_ARGS=""
DIFF_ARGS=""
if is_bare_user_only_repo repo; then
@ -36,6 +37,11 @@ if is_bare_user_only_repo repo; then
DIFF_ARGS="--owner-uid=0 --owner-gid=0 --no-xattrs"
# Also, since we can't check out uid=0 files we need to check out in user mode
CHECKOUT_U_ARG="-U"
CHECKOUT_H_ARGS="-U -H"
else
if grep -E -q '^mode=bare-user' repo/config; then
CHECKOUT_H_ARGS="-U -H"
fi
fi
validate_checkout_basic() {
@ -469,31 +475,56 @@ assert_file_has_content checkout-test-union-add/union-add-test 'existing file fo
assert_file_has_content checkout-test-union-add/union-add-test2 'another file for union add testing'
echo "ok checkout union add"
# Create some new files for testing
# Test --union-identical <https://github.com/projectatomic/rpm-ostree/issues/982>
# Prepare data:
cd ${test_tmpdir}
mkdir disjoint-union-test
mkdir disjoint-union-test/test_one
chmod a+w disjoint-union-test/test_one
echo 'file for add dirs testing' > disjoint-union-test/test_one/test_file
$OSTREE commit ${COMMIT_ARGS} -b test-disjoint-union --tree=dir=disjoint-union-test
$OSTREE checkout --disjoint-union test-disjoint-union checkout-test-disjoint-union
echo "ok adding new directories and new file"
# Make a new file, and try the checkout again
echo 'second test file' > disjoint-union-test/test_one/test_second_file
$OSTREE commit ${COMMIT_ARGS} -b test-disjoint-union --tree=dir=disjoint-union-test
# Check out the latest commit, should fail due to presence of existing files
if $OSTREE checkout --disjoint-union test-disjoint-union checkout-test-disjoint-union 2> err.txt; then
assert_not_reached "checking out files unexpectedly succeeded!"
for x in $(seq 3); do
mkdir -p pkg${x}/usr/{bin,share/licenses}
# Separate binaries and symlinks
echo 'binary for pkg'${x} > pkg${x}/usr/bin/pkg${x}
ln -s pkg${x} pkg${x}/usr/bin/link${x}
# But they share the GPL
echo 'this is the GPL' > pkg${x}/usr/share/licenses/COPYING
ln -s COPYING pkg${x}/usr/share/licenses/LICENSE
$OSTREE commit -b union-identical-pkg${x} --tree=dir=pkg${x}
done
rm union-identical-test -rf
for x in $(seq 3); do
$OSTREE checkout ${CHECKOUT_H_ARGS} --union-identical union-identical-pkg${x} union-identical-test
done
if $OSTREE checkout ${CHECKOUT_H_ARGS/-H/} --union-identical union-identical-pkg${x} union-identical-test-tmp 2>err.txt; then
fatal "--union-identical without -H"
fi
assert_file_has_content err.txt 'File exists'
# Verify that Union mode still functions properly
rm checkout-test-disjoint-union/test_one/test_file
echo 'file for testing union mode alongwith disjoint-union mode' > checkout-test-disjoint-union/test_one/test_file
$OSTREE checkout --union test-disjoint-union checkout-test-disjoint-union
assert_has_file checkout-test-disjoint-union/test_one/test_second_file
# This shows the file with same name has been successfully overwriten
assert_file_has_content checkout-test-disjoint-union/test_one/test_file 'file for add dirs testing'
echo "ok checkout disjoint union"
assert_file_has_content err.txt "error:.*--union-identical requires --require-hardlinks"
for x in $(seq 3); do
for v in pkg link; do
assert_file_has_content union-identical-test/usr/bin/${v}${x} "binary for pkg"${x}
done
for v in COPYING LICENSE; do
assert_file_has_content union-identical-test/usr/share/licenses/${v} GPL
done
done
echo "ok checkout union identical merges"
# Make conflicting packages, one with regfile, one with symlink
mkdir -p pkg-conflict1bin/usr/{bin,share/licenses}
echo 'binary for pkg-conflict1bin' > pkg-conflict1bin/usr/bin/pkg1
echo 'this is the GPL' > pkg-conflict1bin/usr/share/licenses/COPYING
$OSTREE commit -b union-identical-conflictpkg1bin --tree=dir=pkg-conflict1bin
mkdir -p pkg-conflict1link/usr/{bin,share/licenses}
ln -s somewhere-else > pkg-conflict1link/usr/bin/pkg1
echo 'this is the GPL' > pkg-conflict1link/usr/share/licenses/COPYING
$OSTREE commit -b union-identical-conflictpkg1link --tree=dir=pkg-conflict1link
for v in bin link; do
rm union-identical-test -rf
$OSTREE checkout ${CHECKOUT_H_ARGS} --union-identical union-identical-pkg1 union-identical-test
if $OSTREE checkout ${CHECKOUT_H_ARGS} --union-identical union-identical-conflictpkg1${v} union-identical-test 2>err.txt; then
fatal "union identical $v succeeded?"
fi
assert_file_has_content err.txt 'error:.*File exists'
done
echo "ok checkout union identical conflicts"
cd ${test_tmpdir}
rm files -rf && mkdir files