diff --git a/lib/util/stable_sort.c b/lib/util/stable_sort.c new file mode 100644 index 00000000000..47667c4284e --- /dev/null +++ b/lib/util/stable_sort.c @@ -0,0 +1,250 @@ +/* + Stable sort routines + + Copyright © Douglas Bagnall + + ** NOTE! The following LGPL license applies to this file which is used by + ** the ldb library. This does NOT imply that all of Samba is released + ** under the LGPL. + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 3 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, see . +*/ + +#include +#include +#include +#include +#include +#include "replace.h" +#include "stable_sort.h" + +static void sort_few(char *array, char *aux, + size_t n, + size_t s, + samba_compare_with_context_fn_t cmpfn, + void *opaque) +{ + /* a kind of insertion sort for small n. */ + int i, j, dist; + int cmp; + char *a, *b; + + for (i = 1; i < n; i++) { + a = &array[i * s]; + /* leftwards is sorted. look until we find this one's place */ + for (j = i - 1; j >= 0; j--) { + b = &array[j * s]; + cmp = cmpfn(a, b, opaque); + if (cmp >= 0) { + break; + } + } + dist = i - 1 - j; + if (dist == 0) { + /* a is already in the right place */ + continue; + } + + b = &array[(i - dist) * s]; + memcpy(aux, a, s); + memmove(b + s, b, s * dist); + memcpy(b, aux, s); + } +} + + +static void merge(char *dest, + char *a, size_t alen, + char *b, size_t blen, + size_t s, + samba_compare_with_context_fn_t cmpfn, + void *opaque) +{ + size_t ai = 0; + size_t bi = 0; + size_t di = 0; + while (ai < alen && bi < blen) { + int cmp = cmpfn(&a[ai * s], &b[bi * s], opaque); + if (cmp <= 0) { + memcpy(&dest[di * s], &a[ai * s], s); + ai++; + } else { + memcpy(&dest[di * s], &b[bi * s], s); + bi++; + } + di++; + } + if (ai < alen) { + memcpy(&dest[di * s], &a[ai * s], s * (alen - ai)); + } else if (bi < blen) { + memcpy(&dest[di * s], &b[bi * s], s * (blen - bi)); + } +} + + +bool stable_sort_r(void *array, void *aux, + size_t n, + size_t s, + samba_compare_with_context_fn_t cmpfn, + void * opaque) +{ + char *src = array, *dest = aux, *tmp = NULL; + size_t i, j, k; + size_t runsize; + if (array == NULL || aux == NULL) { + return false; + } + + if (n < 20) { + sort_few(array, aux, n, s, cmpfn, opaque); + return true; + } + + if (n > SIZE_MAX / s) { + /* + * We will have an integer overflow if we continue. + * + * This means that the *supposed* size of the allocated buffer + * is greater than SIZE_MAX, which is not possible in theory + * or practice, and is a sign the caller has got very + * confused. + */ + return false; + } + + /* + * This is kind of a bottom-up merge sort. + * + * We start but sorting into a whole lot of little runs, using an + * insertion sort which is efficient for small numbers. Empirically, + * on 2 machines, a run size of around 8 seems optimal, but the peak + * is wide, and it seems worth adapting the size to avoid an + * unbalanced final merge at the top. That is, if we pick the right + * runsize now, we will finish with a merge of roughly n/2:n/2, and + * not have to follow that with an merge of roughly n:[a few], which + * we would sometimes do with a fixed size at the lowest level. + * + * The aim is a runsize of n / (a power of 2) rounded up, in the + * target range. + */ + + runsize = n; + while (runsize > 10) { + runsize++; + runsize >>= 1; + } + + for (i = 0; i < n; i += runsize) { + size_t nn = MIN(n - i, runsize); + sort_few(&src[i * s], aux, nn, s, cmpfn, opaque); + } + + while (runsize < n) { + for (i = 0; i < n; i += runsize * 2) { + j = i + runsize; + if (j >= n) { + /* + * first run doesn't fit, meaning this chunk + * is already sorted. We just need to copy + * it. + */ + size_t nn = n - i; + memcpy(&dest[i * s], &src[i * s], nn * s); + break; + } + k = j + runsize; + if (k > n) { + merge(&dest[i * s], + &src[i * s], runsize, + &src[j * s], n - j, + s, + cmpfn, opaque); + } else { + merge(&dest[i * s], + &src[i * s], runsize, + &src[j * s], runsize, + s, + cmpfn, opaque); + } + } + + tmp = src; + src = dest; + dest = tmp; + runsize *= 2; + } + /* + * We have sorted the array into src, which is either array or aux. + */ + if (src != array) { + memcpy(array, src, n * s); + } + return true; +} + + + +/* + * A wrapper that allocates (and frees) the temporary buffer if necessary. + * + * returns false on allocation error, true otherwise. + */ + +bool stable_sort_talloc_r(TALLOC_CTX *mem_ctx, + void *array, + size_t n, + size_t s, + samba_compare_with_context_fn_t cmpfn, + void *opaque) +{ + bool ok; + char *mem = talloc_array_size(mem_ctx, s, n); + if (mem == NULL) { + return false; + } + ok = stable_sort_r(array, mem, n, s, cmpfn, opaque); + talloc_free(mem); + return ok; +} + + +bool stable_sort(void *array, void *aux, + size_t n, + size_t s, + samba_compare_fn_t cmpfn) +{ + /* + * What is this magic, casting cmpfn into a different type that takes + * an extra parameter? Is that allowed? + * + * A: Yes. It's fine. The extra argument will be passed on the stack + * or (more likely) a register, and the cmpfn will remain blissfully + * unaware. + */ + return stable_sort_r(array, aux, n, s, + (samba_compare_with_context_fn_t)cmpfn, + NULL); +} + + +bool stable_sort_talloc(TALLOC_CTX *mem_ctx, + void *array, + size_t n, + size_t s, + samba_compare_fn_t cmpfn) +{ + return stable_sort_talloc_r(mem_ctx, array, n, s, + (samba_compare_with_context_fn_t)cmpfn, + NULL); +} diff --git a/lib/util/stable_sort.h b/lib/util/stable_sort.h new file mode 100644 index 00000000000..17329b81ac7 --- /dev/null +++ b/lib/util/stable_sort.h @@ -0,0 +1,46 @@ +#ifndef HAVE_STABLE_SORT_H +#define HAVE_STABLE_SORT_H 1 + +#ifdef __COMPAR_FN_T +typedef __compar_fn_t samba_compare_fn_t; + +#ifdef __USE_GNU +/* glibc defines __compar_d_fn_t for qsort_r */ +typedef __compar_d_fn_t samba_compare_with_context_fn_t; +#endif + +#else +typedef int (*samba_compare_fn_t) (const void *, const void *); +typedef int (*samba_compare_with_context_fn_t) (const void *, const void *, void *); +#endif + + + +bool stable_sort_r(void *array, void *aux, + size_t n, + size_t s, + samba_compare_with_context_fn_t cmpfn, + void *opaque); + +bool stable_sort(void *array, void *aux, + size_t n, + size_t s, + samba_compare_fn_t cmpfn); + + +bool stable_sort_talloc_r(TALLOC_CTX *mem_ctx, + void *array, + size_t n, + size_t s, + samba_compare_with_context_fn_t cmpfn, + void *opaque); + + +bool stable_sort_talloc(TALLOC_CTX *mem_ctx, + void *array, + size_t n, + size_t s, + samba_compare_fn_t cmpfn); + + +#endif /* HAVE_STABLE_SORT_H */ diff --git a/lib/util/tests/test_stable_sort.c b/lib/util/tests/test_stable_sort.c new file mode 100644 index 00000000000..ae4b1f3ea53 --- /dev/null +++ b/lib/util/tests/test_stable_sort.c @@ -0,0 +1,317 @@ +/* + * Unix SMB/CIFS implementation. + * + * Copyright (C) 2018 Andreas Schneider + * Copyright (C) 2022 Douglas Bagnall + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include "replace.h" +#include + +#include "../stable_sort.h" + + +static int cmp_integer(int *a, int *b) +{ + if (a == NULL || b == NULL) { + return 0; + } + + if (*a > *b) { + return 1; + } + + if (*a < *b) { + return -1; + } + + return 0; +} + +static void test_stable_sort(void **state) +{ + int a[6] = { 6, 3, 2, 7, 9, 4 }; + int tmp[6]; + bool ok; + ok = stable_sort(a, tmp, + 6, sizeof(int), (samba_compare_fn_t)cmp_integer); + + assert_true(ok); + assert_int_equal(a[0], 2); + assert_int_equal(a[1], 3); + assert_int_equal(a[2], 4); + assert_int_equal(a[3], 6); + assert_int_equal(a[4], 7); + assert_int_equal(a[5], 9); +} + +static void test_stable_sort_talloc_short(void **state) +{ + int a[6] = { 6, 3, 2, 7, 9, 4 }; + int ret; + ret = stable_sort_talloc(NULL, a, 6, sizeof(int), + (samba_compare_fn_t)cmp_integer); + assert_true(ret); + + assert_int_equal(a[0], 2); + assert_int_equal(a[1], 3); + assert_int_equal(a[2], 4); + assert_int_equal(a[3], 6); + assert_int_equal(a[4], 7); + assert_int_equal(a[5], 9); +} + +static void test_stable_sort_talloc_long(void **state) +{ + int i, ret; + size_t n = 1500; + TALLOC_CTX *mem_ctx = talloc_new(NULL); + int *a = talloc_array(mem_ctx, int, n); + for (i = 0; i < n; i++) { + a[i] = n - i; + } + + ret = stable_sort_talloc(mem_ctx, a, n, sizeof(int), + (samba_compare_fn_t)cmp_integer); + assert_true(ret); + + for (i = 0; i < n; i++) { + assert_int_equal(a[i], 1 + i); + } +} + + +/* + * Sort an array of structs with: + * - unwieldy uneven size + * - sort key not at the start + * - comparison depends on context + * + * which are things we sometimes do. + */ + +struct contrived_struct { + char padding_1[13]; + int key[3]; + char padding_2[18]; + size_t *index; +}; + + +static int cmp_contrived_struct(struct contrived_struct *as, + struct contrived_struct *bs) +{ + int i = *as->index; + int a = as->key[i]; + int b = bs->key[i]; + return a - b; +} + +static int cmp_contrived_struct_rev(struct contrived_struct *as, + struct contrived_struct *bs) +{ + /* will sort in reverseo order */ + int i = *as->index; + int a = as->key[i]; + int b = bs->key[i]; + return b - a; +} + + +static void test_stable_sort_talloc_contrived_struct(void **state) +{ + int i, ret, prev; + size_t n = 800000; + TALLOC_CTX *mem_ctx = talloc_new(NULL); + size_t key_index = 0; + + struct contrived_struct *a = talloc_zero_array(mem_ctx, + struct contrived_struct, + n); + + /* we don't really want a good RNG, we want mess and repeated values. */ + uint32_t x = 89, y = (uint32_t)-6, z = 11; + for (i = 0; i < n; i++) { + a[i].index = &key_index; + a[i].key[0] = (x & 0xffff) - 0x8000; + a[i].key[1] = z & 255; + /* key[2] is original order, useful for checking stability */ + a[i].key[2] = i; + x += z ^ y; + y *= z + (x + 0x5555); + z -= x ^ i; + } + + /* 1. sort by key[0] */ + ret = stable_sort_talloc(mem_ctx, a, n, + sizeof(struct contrived_struct), + (samba_compare_fn_t)cmp_contrived_struct); + assert_true(ret); + prev = a[0].key[0]; + for (i = 1; i < n; i++) { + int value = a[i].key[0]; + assert_true(value >= prev); + if (value == prev) { + /* we can test the stability by comparing key[2] */ + assert_true(a[i].key[2] >= a[i - 1].key[2]); + } + prev = value; + } + + /* 2. sort by key[1]. key[0] now indicates stability. */ + key_index = 1; + ret = stable_sort_talloc(mem_ctx, a, n, + sizeof(struct contrived_struct), + (samba_compare_fn_t)cmp_contrived_struct); + assert_true(ret); + prev = a[0].key[1]; + for (i = 1; i < n; i++) { + int value = a[i].key[1]; + assert_true(value >= prev); + if (value == prev) { + assert_true(a[i].key[0] >= a[i - 1].key[0]); + } + prev = value; + } + + /* + * 3. sort by key[2]. key[1] would now indicate stability, but we know + * that key[2] has no duplicates, so stability is moot. + */ + key_index = 2; + ret = stable_sort_talloc(mem_ctx, a, n, + sizeof(struct contrived_struct), + (samba_compare_fn_t)cmp_contrived_struct); + assert_true(ret); + prev = a[0].key[2]; + for (i = 1; i < n; i++) { + int value = a[i].key[2]; + assert_true(value > prev); + prev = value; + } + + /* + * 4. sort by key[0] again, using descending sort order. key[2] should + * still be in ascending order where there are duplicate key[0] values. + */ + key_index = 0; + ret = stable_sort_talloc(mem_ctx, a, n, + sizeof(struct contrived_struct), + (samba_compare_fn_t)cmp_contrived_struct_rev); + assert_true(ret); + prev = a[0].key[0]; + for (i = 1; i < n; i++) { + int value = a[i].key[0]; + assert_true(value <= prev); + if (value == prev) { + assert_true(a[i].key[2] >= a[i - 1].key[2]); + } + prev = value; + } +} + + + +static int cmp_integer_xor_blob(int *_a, int *_b, int *opaque) +{ + int a = *_a ^ *opaque; + int b = *_b ^ *opaque; + + if (a > b) { + return 1; + } + + if (a < b) { + return -1; + } + + return 0; +} + +static void test_stable_sort_talloc_r(void **state) +{ + int i; + size_t n = 1500; + TALLOC_CTX *mem_ctx = talloc_new(NULL); + int opaque = 42; + bool ok; + int *a = talloc_array(mem_ctx, int, n); + for (i = 0; i < n; i++) { + a[i] = (i * 7) & 255; + } + + ok = stable_sort_talloc_r(mem_ctx, a, n, sizeof(int), + (samba_compare_with_context_fn_t)cmp_integer_xor_blob, + &opaque); + assert_true(ok); + + for (i = 1; i < n; i++) { + assert_true((a[i - 1] ^ opaque) <= (a[i] ^ opaque)); + } +} + + +static void test_stable_sort_silly_size(void **state) +{ + bool ok; + int a[33] = {0}; + int b[33] = {0}; + + ok = stable_sort(a, + b, + (SIZE_MAX / 2) + 2, + (SIZE_MAX / 2) + 2, + (samba_compare_fn_t)cmp_integer); + assert_false(ok); +} + +static void test_stable_sort_null_array(void **state) +{ + bool ok; + int a[33] = {0}; + + ok = stable_sort(a, + NULL, + 33, + sizeof(int), + (samba_compare_fn_t)cmp_integer); + assert_false(ok); +} + + + + + +int main(void) { + const struct CMUnitTest tests[] = { + cmocka_unit_test(test_stable_sort), + cmocka_unit_test(test_stable_sort_talloc_short), + cmocka_unit_test(test_stable_sort_talloc_long), + cmocka_unit_test(test_stable_sort_talloc_contrived_struct), + cmocka_unit_test(test_stable_sort_talloc_r), + cmocka_unit_test(test_stable_sort_silly_size), + cmocka_unit_test(test_stable_sort_null_array), + }; + if (!isatty(1)) { + cmocka_set_message_output(CM_OUTPUT_SUBUNIT); + } + return cmocka_run_group_tests(tests, NULL, NULL); +} diff --git a/lib/util/wscript_build b/lib/util/wscript_build index d26aa4e5843..8eac1013394 100644 --- a/lib/util/wscript_build +++ b/lib/util/wscript_build @@ -147,6 +147,13 @@ bld.SAMBA_LIBRARY('genrand', local_include=False, private_library=True) +bld.SAMBA_LIBRARY('stable_sort', + source='stable_sort.c', + deps='replace', + public_deps='talloc', + local_include=False, + private_library=True) + if bld.env.SAMBA_UTIL_CORE_ONLY: bld.SAMBA_LIBRARY('tevent-util', @@ -400,3 +407,9 @@ else: cflags="-D USING_CMDLINE_S3", local_include=False, for_selftest=True) + + bld.SAMBA_BINARY('test_stable_sort', + source='tests/test_stable_sort.c', + deps='cmocka replace talloc stable_sort', + local_include=False, + for_selftest=True) diff --git a/selftest/tests.py b/selftest/tests.py index db43d8a9e7d..4cc2e7f8918 100644 --- a/selftest/tests.py +++ b/selftest/tests.py @@ -437,6 +437,8 @@ plantestsuite("samba.unittests.memcache", "none", [os.path.join(bindir(), "default/lib/util/test_memcache")]) plantestsuite("samba.unittests.sys_rw", "none", [os.path.join(bindir(), "default/lib/util/test_sys_rw")]) +plantestsuite("samba.unittests.stable_sort", "none", + [os.path.join(bindir(), "default/lib/util/test_stable_sort")]) plantestsuite("samba.unittests.ntlm_check", "none", [os.path.join(bindir(), "default/libcli/auth/test_ntlm_check")]) plantestsuite("samba.unittests.gnutls", "none",