1
0
mirror of https://github.com/systemd/systemd.git synced 2024-12-23 21:35:11 +03:00

string-util: introduce strverscmp_improved()

Unfortunately, strverscmp() from libc or str_verscmp() do not correctly
handle pre-release version, e.g. 247 vs 247~rc1.

This implement a new comparison function, which is based on the RPM's
rpmvercmp().
This commit is contained in:
Yu Watanabe 2021-02-04 05:55:59 +09:00
parent e5bc5f1f5a
commit 87b7d9b6ff
3 changed files with 239 additions and 0 deletions

View File

@ -1,6 +1,8 @@
/* SPDX-License-Identifier: LGPL-2.1-or-later */
#ifndef SD_BOOT
#include <ctype.h>
#include "macro.h"
#endif
#include "string-util-fundamental.h"
@ -74,3 +76,159 @@ sd_char* endswith_no_case(const sd_char *s, const sd_char *postfix) {
return (sd_char*) s + sl - pl;
}
#ifdef SD_BOOT
static sd_bool isdigit(sd_char a) {
return a >= '0' && a <= '9';
}
#endif
static sd_bool is_alpha(sd_char a) {
/* Locale independent version of isalpha(). */
return (a >= 'a' && a <= 'z') || (a >= 'A' && a <= 'Z');
}
static sd_bool is_valid_version_char(sd_char a) {
return isdigit(a) || is_alpha(a) || IN_SET(a, '~', '-', '^', '.');
}
sd_int strverscmp_improved(const sd_char *a, const sd_char *b) {
/* This is based on RPM's rpmvercmp(). But this explicitly handles '-' and '.', as we usually
* want to directly compare strings which contain both version and release; e.g.
* '247.2-3.1.fc33.x86_64' or '5.11.0-0.rc5.20210128git76c057c84d28.137.fc34'.
* Unlike rpmvercmp(), this distiguishes e.g. 123a and 123.a, and 123a is newer.
*
* This splits the input strings into segments. Each segment is numeric or alpha, and may be
* prefixed with the following:
* '~' : used for pre-releases, a segment prefixed with this is the oldest,
* '-' : used for the separator between version and release,
* '^' : used for patched releases, a segment with this is newer than one with '-'.
* '.' : used for point releases.
* Note, no prefix segment is the newest. All non-supported characters are dropped, and
* handled as a separator of segments, e.g., 123_a is equivalent to 123a.
*
* By using this, version strings can be sorted like following:
* (older) 122.1
* ^ 123~rc1-1
* | 123
* | 123-a
* | 123-a.1
* | 123-1
* | 123-1.1
* | 123^post1
* | 123.a-1
* | 123.1-1
* v 123a-1
* (newer) 124-1
*/
if (isempty(a) || isempty(b))
return strcmp_ptr(a, b);
for (;;) {
const sd_char *aa, *bb;
sd_int r;
/* Drop leading invalid characters. */
while (*a != '\0' && !is_valid_version_char(*a))
a++;
while (*b != '\0' && !is_valid_version_char(*b))
b++;
/* Handle '~'. Used for pre-releases, e.g. 123~rc1, or 4.5~alpha1 */
if (*a == '~' || *b == '~') {
/* The string prefixed with '~' is older. */
r = CMP(*a != '~', *b != '~');
if (r != 0)
return r;
/* Now both strings are prefixed with '~'. Compare remaining strings. */
a++;
b++;
}
/* If at least one string reaches the end, then longer is newer.
* Note that except for '~' prefixed segments, a string has more segments is newer.
* So, this check must be after the '~' check. */
if (*a == '\0' || *b == '\0')
return strcmp(a, b);
/* Handle '-', which separates version and release, e.g 123.4-3.1.fc33.x86_64 */
if (*a == '-' || *b == '-') {
/* The string prefixed with '-' is older (e.g., 123-9 vs 123.1-1) */
r = CMP(*a != '-', *b != '-');
if (r != 0)
return r;
a++;
b++;
}
/* Handle '^'. Used for patched release. */
if (*a == '^' || *b == '^') {
r = CMP(*a != '^', *b != '^');
if (r != 0)
return r;
a++;
b++;
}
/* Handle '.'. Used for point releases. */
if (*a == '.' || *b == '.') {
r = CMP(*a != '.', *b != '.');
if (r != 0)
return r;
a++;
b++;
}
if (isdigit(*a) || isdigit(*b)) {
/* Skip leading '0', to make 00123 equivalent to 123. */
while (*a == '0')
a++;
while (*b == '0')
b++;
/* Find the leading numeric segments. One may be an empty string. So,
* numeric segments are always newer than alpha segments. */
for (aa = a; *aa != '\0' && isdigit(*aa); aa++)
;
for (bb = b; *bb != '\0' && isdigit(*bb); bb++)
;
/* To compare numeric segments without parsing their values, first compare the
* lengths of the segments. Eg. 12345 vs 123, longer is newer. */
r = CMP(aa - a, bb - b);
if (r != 0)
return r;
/* Then, compare them as strings. */
r = strncmp(a, b, aa - a);
if (r != 0)
return r;
} else {
/* Find the leading non-numeric segments. */
for (aa = a; *aa != '\0' && is_alpha(*aa); aa++)
;
for (bb = b; *bb != '\0' && is_alpha(*bb); bb++)
;
/* Note that the segments are usually not NUL-terminated. */
r = strncmp(a, b, MIN(aa - a, bb - b));
if (r != 0)
return r;
/* Longer is newer, e.g. abc vs abcde. */
r = CMP(aa - a, bb - b);
if (r != 0)
return r;
}
/* The current segments are equivalent. Let's compare the next one. */
a = aa;
b = bb;
}
}

View File

@ -63,3 +63,5 @@ static inline sd_bool isempty(const sd_char *a) {
static inline const sd_char *yes_no(sd_bool b) {
return b ? STR_C("yes") : STR_C("no");
}
sd_int strverscmp_improved(const sd_char *a, const sd_char *b);

View File

@ -886,6 +886,84 @@ static void test_string_contains_word(void) {
assert_se(!string_contains_word("a:b:cc", ":#", ":cc"));
}
static void test_strverscmp_improved_one(const char *newer, const char *older) {
log_info("/* %s(%s, %s) */", __func__, strnull(newer), strnull(older));
assert_se(strverscmp_improved(newer, newer) == 0);
assert_se(strverscmp_improved(newer, older) > 0);
assert_se(strverscmp_improved(older, newer) < 0);
assert_se(strverscmp_improved(older, older) == 0);
}
static void test_strverscmp_improved(void) {
static const char * const versions[] = {
"",
"~1",
"ab",
"abb",
"abc",
"0001",
"002",
"12",
"122",
"122.9",
"123~rc1",
"123",
"123-a",
"123-a.1",
"123-a1",
"123-a1.1",
"123-3",
"123-3.1",
"123^patch1",
"123^1",
"123.a-1"
"123.1-1",
"123a-1",
"124",
NULL,
};
const char * const *p, * const *q;
STRV_FOREACH(p, versions)
STRV_FOREACH(q, p + 1)
test_strverscmp_improved_one(*q, *p);
test_strverscmp_improved_one("123.45-67.89", "123.45-67.88");
test_strverscmp_improved_one("123.45-67.89a", "123.45-67.89");
test_strverscmp_improved_one("123.45-67.89", "123.45-67.ab");
test_strverscmp_improved_one("123.45-67.89", "123.45-67.9");
test_strverscmp_improved_one("123.45-67.89", "123.45-67");
test_strverscmp_improved_one("123.45-67.89", "123.45-66.89");
test_strverscmp_improved_one("123.45-67.89", "123.45-9.99");
test_strverscmp_improved_one("123.45-67.89", "123.42-99.99");
test_strverscmp_improved_one("123.45-67.89", "123-99.99");
/* '~' : pre-releases */
test_strverscmp_improved_one("123.45-67.89", "123~rc1-99.99");
test_strverscmp_improved_one("123-45.67.89", "123~rc1-99.99");
test_strverscmp_improved_one("123~rc2-67.89", "123~rc1-99.99");
test_strverscmp_improved_one("123^aa2-67.89", "123~rc1-99.99");
test_strverscmp_improved_one("123aa2-67.89", "123~rc1-99.99");
/* '-' : separator between version and release. */
test_strverscmp_improved_one("123.45-67.89", "123-99.99");
test_strverscmp_improved_one("123^aa2-67.89", "123-99.99");
test_strverscmp_improved_one("123aa2-67.89", "123-99.99");
/* '^' : patch releases */
test_strverscmp_improved_one("123.45-67.89", "123^45-67.89");
test_strverscmp_improved_one("123^aa2-67.89", "123^aa1-99.99");
test_strverscmp_improved_one("123aa2-67.89", "123^aa2-67.89");
/* '.' : point release */
test_strverscmp_improved_one("123aa2-67.89", "123.aa2-67.89");
test_strverscmp_improved_one("123.ab2-67.89", "123.aa2-67.89");
/* invalid characters */
assert_se(strverscmp_improved("123_aa2-67.89", "123aa+2-67.89") == 0);
}
int main(int argc, char *argv[]) {
test_setup_logging(LOG_DEBUG);
@ -923,6 +1001,7 @@ int main(int argc, char *argv[]) {
test_string_extract_line();
test_string_contains_word_strv();
test_string_contains_word();
test_strverscmp_improved();
return 0;
}