From 46083ab321826d920b32de85c7328a5f39b7fff4 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Tue, 24 May 2022 21:20:36 +0200 Subject: [PATCH] basic/string-util: tweak strverscmp_improved() for some corner cases So far we had the rule that '' == '', '0_' == '0', but '_' > ''. This means that the general rule that strings are compared iteratively, and each segment that compares equal can be dropped and the comparison resumes at the following characters wasn't true in such cases. Similarly, '0~' < '0', but after dropping the common segment, '~' > ''. The special handling of empty strings is dropped, and '_' == '' and '~' < ''. --- src/fundamental/string-util-fundamental.c | 21 +++++++++++-------- src/test/test-string-util.c | 25 ++++++++++++++++++++++- 2 files changed, 37 insertions(+), 9 deletions(-) diff --git a/src/fundamental/string-util-fundamental.c b/src/fundamental/string-util-fundamental.c index feccb822ff..73abc2f8c8 100644 --- a/src/fundamental/string-util-fundamental.c +++ b/src/fundamental/string-util-fundamental.c @@ -124,8 +124,8 @@ sd_int strverscmp_improved(const sd_char *a, const sd_char *b) { * (newer) 124-1 */ - if (isempty(a) || isempty(b)) - return CMP(strcmp_ptr(a, b), 0); + a = strempty(a); + b = strempty(b); for (;;) { const sd_char *aa, *bb; @@ -187,12 +187,6 @@ sd_int strverscmp_improved(const sd_char *a, const sd_char *b) { } if (is_digit(*a) || is_digit(*b)) { - /* Skip leading '0', to make 00123 equivalent to 123. */ - while (*a == '0') - a++; - while (*b == '0') - b++; - /* Find the leading numeric segments. One may be an empty string. So, * numeric segments are always newer than alpha segments. */ for (aa = a; is_digit(*aa); aa++) @@ -200,6 +194,17 @@ sd_int strverscmp_improved(const sd_char *a, const sd_char *b) { for (bb = b; is_digit(*bb); bb++) ; + /* Check if one of the strings was empty, but the other not. */ + r = CMP(a != aa, b != bb); + if (r != 0) + return r; + + /* Skip leading '0', to make 00123 equivalent to 123. */ + while (*a == '0') + a++; + while (*b == '0') + b++; + /* To compare numeric segments without parsing their values, first compare the * lengths of the segments. Eg. 12345 vs 123, longer is newer. */ r = CMP(aa - a, bb - b); diff --git a/src/test/test-string-util.c b/src/test/test-string-util.c index 1054f9ea31..2faceca4e9 100644 --- a/src/test/test-string-util.c +++ b/src/test/test-string-util.c @@ -852,8 +852,8 @@ static void test_strverscmp_improved_newer(const char *older, const char *newer) TEST(strverscmp_improved) { static const char * const versions[] = { - "", "~1", + "", "ab", "abb", "abc", @@ -917,6 +917,29 @@ TEST(strverscmp_improved) { /* invalid characters */ assert_se(strverscmp_improved("123_aa2-67.89", "123aa+2-67.89") == 0); + /* some corner cases */ + assert_se(strverscmp_improved("123.", "123") > 0); /* One more version segment */ + assert_se(strverscmp_improved("12_3", "123") < 0); /* 12 < 123 */ + assert_se(strverscmp_improved("12_3", "12") > 0); /* 3 > '' */ + assert_se(strverscmp_improved("12_3", "12.3") > 0); /* 3 > '' */ + assert_se(strverscmp_improved("123.0", "123") > 0); /* 0 > '' */ + assert_se(strverscmp_improved("123_0", "123") > 0); /* 0 > '' */ + assert_se(strverscmp_improved("123..0", "123.0") < 0); /* '' < 0 */ + + /* empty strings or strings with ignored characters only */ + assert_se(strverscmp_improved("", NULL) == 0); + assert_se(strverscmp_improved(NULL, "") == 0); + assert_se(strverscmp_improved("0_", "0") == 0); + assert_se(strverscmp_improved("_0_", "0") == 0); + assert_se(strverscmp_improved("_0", "0") == 0); + assert_se(strverscmp_improved("0", "0___") == 0); + assert_se(strverscmp_improved("", "_") == 0); + assert_se(strverscmp_improved("_", "") == 0); + assert_se(strverscmp_improved("_", "_") == 0); + assert_se(strverscmp_improved("", "~") > 0); + assert_se(strverscmp_improved("~", "") < 0); + assert_se(strverscmp_improved("~", "~") == 0); + /* non-ASCII digits */ (void) setlocale(LC_NUMERIC, "ar_YE.utf8"); assert_se(strverscmp_improved("1٠١٢٣٤٥٦٧٨٩", "1") == 0); -- 2.25.1