From 87b7d9b6ff23ec10b66bf53efeabf16ad85d7ad8 Mon Sep 17 00:00:00 2001 From: Yu Watanabe Date: Thu, 4 Feb 2021 05:55:59 +0900 Subject: [PATCH] string-util: introduce strverscmp_improved() Unfortunately, strverscmp() from libc or str_verscmp() do not correctly handle pre-release version, e.g. 247 vs 247~rc1. This implement a new comparison function, which is based on the RPM's rpmvercmp(). --- src/fundamental/string-util-fundamental.c | 158 ++++++++++++++++++++++ src/fundamental/string-util-fundamental.h | 2 + src/test/test-string-util.c | 79 +++++++++++ 3 files changed, 239 insertions(+) diff --git a/src/fundamental/string-util-fundamental.c b/src/fundamental/string-util-fundamental.c index a94bce6e3ec..9f14597fef8 100644 --- a/src/fundamental/string-util-fundamental.c +++ b/src/fundamental/string-util-fundamental.c @@ -1,6 +1,8 @@ /* SPDX-License-Identifier: LGPL-2.1-or-later */ #ifndef SD_BOOT +#include + #include "macro.h" #endif #include "string-util-fundamental.h" @@ -74,3 +76,159 @@ sd_char* endswith_no_case(const sd_char *s, const sd_char *postfix) { return (sd_char*) s + sl - pl; } + +#ifdef SD_BOOT +static sd_bool isdigit(sd_char a) { + return a >= '0' && a <= '9'; +} +#endif + +static sd_bool is_alpha(sd_char a) { + /* Locale independent version of isalpha(). */ + return (a >= 'a' && a <= 'z') || (a >= 'A' && a <= 'Z'); +} + +static sd_bool is_valid_version_char(sd_char a) { + return isdigit(a) || is_alpha(a) || IN_SET(a, '~', '-', '^', '.'); +} + +sd_int strverscmp_improved(const sd_char *a, const sd_char *b) { + + /* This is based on RPM's rpmvercmp(). But this explicitly handles '-' and '.', as we usually + * want to directly compare strings which contain both version and release; e.g. + * '247.2-3.1.fc33.x86_64' or '5.11.0-0.rc5.20210128git76c057c84d28.137.fc34'. + * Unlike rpmvercmp(), this distiguishes e.g. 123a and 123.a, and 123a is newer. + * + * This splits the input strings into segments. Each segment is numeric or alpha, and may be + * prefixed with the following: + * '~' : used for pre-releases, a segment prefixed with this is the oldest, + * '-' : used for the separator between version and release, + * '^' : used for patched releases, a segment with this is newer than one with '-'. + * '.' : used for point releases. + * Note, no prefix segment is the newest. All non-supported characters are dropped, and + * handled as a separator of segments, e.g., 123_a is equivalent to 123a. + * + * By using this, version strings can be sorted like following: + * (older) 122.1 + * ^ 123~rc1-1 + * | 123 + * | 123-a + * | 123-a.1 + * | 123-1 + * | 123-1.1 + * | 123^post1 + * | 123.a-1 + * | 123.1-1 + * v 123a-1 + * (newer) 124-1 + */ + + if (isempty(a) || isempty(b)) + return strcmp_ptr(a, b); + + for (;;) { + const sd_char *aa, *bb; + sd_int r; + + /* Drop leading invalid characters. */ + while (*a != '\0' && !is_valid_version_char(*a)) + a++; + while (*b != '\0' && !is_valid_version_char(*b)) + b++; + + /* Handle '~'. Used for pre-releases, e.g. 123~rc1, or 4.5~alpha1 */ + if (*a == '~' || *b == '~') { + /* The string prefixed with '~' is older. */ + r = CMP(*a != '~', *b != '~'); + if (r != 0) + return r; + + /* Now both strings are prefixed with '~'. Compare remaining strings. */ + a++; + b++; + } + + /* If at least one string reaches the end, then longer is newer. + * Note that except for '~' prefixed segments, a string has more segments is newer. + * So, this check must be after the '~' check. */ + if (*a == '\0' || *b == '\0') + return strcmp(a, b); + + /* Handle '-', which separates version and release, e.g 123.4-3.1.fc33.x86_64 */ + if (*a == '-' || *b == '-') { + /* The string prefixed with '-' is older (e.g., 123-9 vs 123.1-1) */ + r = CMP(*a != '-', *b != '-'); + if (r != 0) + return r; + + a++; + b++; + } + + /* Handle '^'. Used for patched release. */ + if (*a == '^' || *b == '^') { + r = CMP(*a != '^', *b != '^'); + if (r != 0) + return r; + + a++; + b++; + } + + /* Handle '.'. Used for point releases. */ + if (*a == '.' || *b == '.') { + r = CMP(*a != '.', *b != '.'); + if (r != 0) + return r; + + a++; + b++; + } + + if (isdigit(*a) || isdigit(*b)) { + /* Skip leading '0', to make 00123 equivalent to 123. */ + while (*a == '0') + a++; + while (*b == '0') + b++; + + /* Find the leading numeric segments. One may be an empty string. So, + * numeric segments are always newer than alpha segments. */ + for (aa = a; *aa != '\0' && isdigit(*aa); aa++) + ; + for (bb = b; *bb != '\0' && isdigit(*bb); bb++) + ; + + /* To compare numeric segments without parsing their values, first compare the + * lengths of the segments. Eg. 12345 vs 123, longer is newer. */ + r = CMP(aa - a, bb - b); + if (r != 0) + return r; + + /* Then, compare them as strings. */ + r = strncmp(a, b, aa - a); + if (r != 0) + return r; + } else { + /* Find the leading non-numeric segments. */ + for (aa = a; *aa != '\0' && is_alpha(*aa); aa++) + ; + for (bb = b; *bb != '\0' && is_alpha(*bb); bb++) + ; + + /* Note that the segments are usually not NUL-terminated. */ + r = strncmp(a, b, MIN(aa - a, bb - b)); + if (r != 0) + return r; + + /* Longer is newer, e.g. abc vs abcde. */ + r = CMP(aa - a, bb - b); + if (r != 0) + return r; + } + + /* The current segments are equivalent. Let's compare the next one. */ + a = aa; + b = bb; + } +} diff --git a/src/fundamental/string-util-fundamental.h b/src/fundamental/string-util-fundamental.h index b1f067fc56f..407cede48d9 100644 --- a/src/fundamental/string-util-fundamental.h +++ b/src/fundamental/string-util-fundamental.h @@ -63,3 +63,5 @@ static inline sd_bool isempty(const sd_char *a) { static inline const sd_char *yes_no(sd_bool b) { return b ? STR_C("yes") : STR_C("no"); } + +sd_int strverscmp_improved(const sd_char *a, const sd_char *b); diff --git a/src/test/test-string-util.c b/src/test/test-string-util.c index 67f083ff93a..12eafaaf22d 100644 --- a/src/test/test-string-util.c +++ b/src/test/test-string-util.c @@ -886,6 +886,84 @@ static void test_string_contains_word(void) { assert_se(!string_contains_word("a:b:cc", ":#", ":cc")); } +static void test_strverscmp_improved_one(const char *newer, const char *older) { + log_info("/* %s(%s, %s) */", __func__, strnull(newer), strnull(older)); + + assert_se(strverscmp_improved(newer, newer) == 0); + assert_se(strverscmp_improved(newer, older) > 0); + assert_se(strverscmp_improved(older, newer) < 0); + assert_se(strverscmp_improved(older, older) == 0); +} + +static void test_strverscmp_improved(void) { + static const char * const versions[] = { + "", + "~1", + "ab", + "abb", + "abc", + "0001", + "002", + "12", + "122", + "122.9", + "123~rc1", + "123", + "123-a", + "123-a.1", + "123-a1", + "123-a1.1", + "123-3", + "123-3.1", + "123^patch1", + "123^1", + "123.a-1" + "123.1-1", + "123a-1", + "124", + NULL, + }; + const char * const *p, * const *q; + + STRV_FOREACH(p, versions) + STRV_FOREACH(q, p + 1) + test_strverscmp_improved_one(*q, *p); + + test_strverscmp_improved_one("123.45-67.89", "123.45-67.88"); + test_strverscmp_improved_one("123.45-67.89a", "123.45-67.89"); + test_strverscmp_improved_one("123.45-67.89", "123.45-67.ab"); + test_strverscmp_improved_one("123.45-67.89", "123.45-67.9"); + test_strverscmp_improved_one("123.45-67.89", "123.45-67"); + test_strverscmp_improved_one("123.45-67.89", "123.45-66.89"); + test_strverscmp_improved_one("123.45-67.89", "123.45-9.99"); + test_strverscmp_improved_one("123.45-67.89", "123.42-99.99"); + test_strverscmp_improved_one("123.45-67.89", "123-99.99"); + + /* '~' : pre-releases */ + test_strverscmp_improved_one("123.45-67.89", "123~rc1-99.99"); + test_strverscmp_improved_one("123-45.67.89", "123~rc1-99.99"); + test_strverscmp_improved_one("123~rc2-67.89", "123~rc1-99.99"); + test_strverscmp_improved_one("123^aa2-67.89", "123~rc1-99.99"); + test_strverscmp_improved_one("123aa2-67.89", "123~rc1-99.99"); + + /* '-' : separator between version and release. */ + test_strverscmp_improved_one("123.45-67.89", "123-99.99"); + test_strverscmp_improved_one("123^aa2-67.89", "123-99.99"); + test_strverscmp_improved_one("123aa2-67.89", "123-99.99"); + + /* '^' : patch releases */ + test_strverscmp_improved_one("123.45-67.89", "123^45-67.89"); + test_strverscmp_improved_one("123^aa2-67.89", "123^aa1-99.99"); + test_strverscmp_improved_one("123aa2-67.89", "123^aa2-67.89"); + + /* '.' : point release */ + test_strverscmp_improved_one("123aa2-67.89", "123.aa2-67.89"); + test_strverscmp_improved_one("123.ab2-67.89", "123.aa2-67.89"); + + /* invalid characters */ + assert_se(strverscmp_improved("123_aa2-67.89", "123aa+2-67.89") == 0); +} + int main(int argc, char *argv[]) { test_setup_logging(LOG_DEBUG); @@ -923,6 +1001,7 @@ int main(int argc, char *argv[]) { test_string_extract_line(); test_string_contains_word_strv(); test_string_contains_word(); + test_strverscmp_improved(); return 0; }