mirror of
https://github.com/ostreedev/ostree.git
synced 2025-08-25 13:49:24 +03:00
static-delta: find a similar filename using what is before '.' or '-'
Improve the heuristic to use only the part before the first '.' when looking for a similar file in the current directory. last versions of dracut generate reproducible initramfs files, but we still fallback to the full file download if there is any minimal change that causes a different checksum and file name. This change extends that case to deal better with similar files that have a different suffix. This is the difference generating a static delta from fedora-atomic/f24/x86_64/docker-host to fedora-atomic/f24/x86_64/testing/docker-host before the patch: fallback for 111ec866aa7ce3688407fa4a1ae7c9fca93dcee0b851fc9434c59ff947830cc7 (47.0 MB) fallback for c6a898265de22b02c89ea2f35d132628d0ee1c0a058052ed14fee5799c17904c (47.0 MB) fallback for fbce656249ece77260887ed873e445561b9d43bcb28a32e759c0b1bab89e7137 (6.6 MB) fallback for cfdb51457e47e0a0fe0bac38991a21279d2646ff2f019630c7b52a0cd3451397 (6.6 MB) part 0 n:1972 compressed:11239809 uncompressed:33747412 part 1 n:1079 compressed:9683681 uncompressed:55641397 part 2 n:1507 compressed:15050265 uncompressed:44448838 part 3 n:101 compressed:1865881 uncompressed:31896086 part 4 n:278 compressed:2452585 uncompressed:52811323 part 5 n:18 compressed:67621 uncompressed:100220 uncompressed=218645276 compressed=40359842 loose=545102 rollsum=49 objects, 2117254 bytes bsdiff=4067 objects after the patch: part 0 n:843 compressed:19844109 uncompressed:95443178 part 1 n:1223 compressed:11188609 uncompressed:33330401 part 2 n:990 compressed:15762905 uncompressed:61214132 part 3 n:1441 compressed:20614573 uncompressed:31534195 part 4 n:163 compressed:2734997 uncompressed:51356423 part 5 n:285 compressed:2480813 uncompressed:52902904 part 6 n:14 compressed:59125 uncompressed:75341 uncompressed=325856574 compressed=72685131 loose=533283 rollsum=51 objects, 57235332 bytes bsdiff=4073 objects Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com> Closes: #554 Approved by: cgwalters
This commit is contained in:
committed by
Atomic Bot
parent
0333260559
commit
c4c8937b20
@ -189,18 +189,30 @@ build_content_sizenames_filtered (OstreeRepo *repo,
|
||||
|
||||
static gboolean
|
||||
string_array_nonempty_intersection (GPtrArray *a,
|
||||
GPtrArray *b)
|
||||
GPtrArray *b,
|
||||
gboolean fuzzy)
|
||||
{
|
||||
guint i;
|
||||
for (i = 0; i < a->len; i++)
|
||||
{
|
||||
guint j;
|
||||
const char *a_str = a->pdata[i];
|
||||
const char *a_dot = strchr (a_str, '.');
|
||||
for (j = 0; j < b->len; j++)
|
||||
{
|
||||
const char *b_str = b->pdata[j];
|
||||
if (strcmp (a_str, b_str) == 0)
|
||||
return TRUE;
|
||||
const char *b_dot = strchr (b_str, '.');
|
||||
/* When doing fuzzy comparison, just compare the part before the '.' if it exists. */
|
||||
if (fuzzy && a_dot && b_dot && b_dot - b_str && b_dot - b_str == a_dot - a_str)
|
||||
{
|
||||
if (strncmp (a_str, b_str, a_dot - a_str) == 0)
|
||||
return TRUE;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (strcmp (a_str, b_str) == 0)
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
}
|
||||
return FALSE;
|
||||
@ -258,6 +270,8 @@ _ostree_delta_compute_similar_objects (OstreeRepo *repo,
|
||||
upper = from_sizes->len;
|
||||
for (i = 0; i < to_sizes->len; i++)
|
||||
{
|
||||
int fuzzy;
|
||||
gboolean found = FALSE;
|
||||
OstreeDeltaContentSizeNames *to_sizenames = to_sizes->pdata[i];
|
||||
const guint64 min_threshold = to_sizenames->size *
|
||||
(1.0-similarity_percent_threshold/100.0);
|
||||
@ -268,31 +282,41 @@ _ostree_delta_compute_similar_objects (OstreeRepo *repo,
|
||||
if (to_sizenames->size == 0)
|
||||
continue;
|
||||
|
||||
for (j = lower; j < upper; j++)
|
||||
for (fuzzy = 0; fuzzy < 2 && !found; fuzzy++)
|
||||
{
|
||||
OstreeDeltaContentSizeNames *from_sizenames = from_sizes->pdata[j];
|
||||
|
||||
/* Don't build candidates for the empty object */
|
||||
if (from_sizenames->size == 0)
|
||||
continue;
|
||||
|
||||
if (from_sizenames->size < min_threshold)
|
||||
for (j = lower; j < upper; j++)
|
||||
{
|
||||
lower++;
|
||||
continue;
|
||||
OstreeDeltaContentSizeNames *from_sizenames = from_sizes->pdata[j];
|
||||
|
||||
/* Don't build candidates for the empty object */
|
||||
if (from_sizenames->size == 0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (from_sizenames->size < min_threshold)
|
||||
{
|
||||
lower++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (from_sizenames->size > max_threshold)
|
||||
break;
|
||||
|
||||
if (!string_array_nonempty_intersection (from_sizenames->basenames,
|
||||
to_sizenames->basenames,
|
||||
fuzzy == 1))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Only one candidate right now */
|
||||
g_hash_table_insert (ret_modified_regfile_content,
|
||||
g_strdup (to_sizenames->checksum),
|
||||
g_strdup (from_sizenames->checksum));
|
||||
found = TRUE;
|
||||
break;
|
||||
}
|
||||
|
||||
if (from_sizenames->size > max_threshold)
|
||||
break;
|
||||
|
||||
if (!string_array_nonempty_intersection (from_sizenames->basenames, to_sizenames->basenames))
|
||||
continue;
|
||||
|
||||
/* Only one candidate right now */
|
||||
g_hash_table_insert (ret_modified_regfile_content,
|
||||
g_strdup (to_sizenames->checksum),
|
||||
g_strdup (from_sizenames->checksum));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user