From f1e075a85d43fb7d1850dc6f53de6a21fc58076e Mon Sep 17 00:00:00 2001 From: Zdenek Kabelac Date: Tue, 5 Nov 2024 12:01:03 +0100 Subject: [PATCH] radix_tree: implement non-recursive iterator Add radix_tree_iter_init() to initialize iterator and radix_tree_iter_next() to walk over all radix_tree nodes. This is non-recursive implementation which is not using 'visitor()', that sometime may look code unnecessarily complicated. Use the maximal keylen to 'estimate' the largest needed stack size. --- base/data-struct/radix-tree-adaptive.c | 147 ++++++++++++++++++++++++- base/data-struct/radix-tree.h | 5 + test/unit/radix_tree_t.c | 56 ++++++++++ 3 files changed, 204 insertions(+), 4 deletions(-) diff --git a/base/data-struct/radix-tree-adaptive.c b/base/data-struct/radix-tree-adaptive.c index 69984e66a..e5755af1a 100644 --- a/base/data-struct/radix-tree-adaptive.c +++ b/base/data-struct/radix-tree-adaptive.c @@ -74,24 +74,37 @@ struct node256 { struct value values[256]; }; +struct radix_tree_iter_node { + const struct value *v; + unsigned i; +}; + +struct radix_tree_iter { + struct radix_tree_iter_node *nodes; + unsigned depth; + unsigned max_depth; +}; + struct radix_tree { unsigned nr_entries; + unsigned max_keylen; struct value root; radix_value_dtr dtr; void *dtr_context; + struct radix_tree_iter it; }; //---------------------------------------------------------------- struct radix_tree *radix_tree_create(radix_value_dtr dtr, void *dtr_context) { - struct radix_tree *rt = malloc(sizeof(*rt)); + struct radix_tree *rt; - if (rt) { - rt->nr_entries = 0; + if ((rt = zalloc(sizeof(*rt)))) { rt->root.type = UNSET; rt->dtr = dtr; rt->dtr_context = dtr_context; + rt->max_keylen = 1; } return rt; @@ -171,6 +184,7 @@ static unsigned _free_node(struct radix_tree *rt, struct value v) void radix_tree_destroy(struct radix_tree *rt) { _free_node(rt, rt->root); + free(rt->it.nodes); free(rt); } @@ -561,7 +575,14 @@ bool radix_tree_insert(struct radix_tree *rt, const void *key, size_t keylen, un const uint8_t *kb = key; const uint8_t *ke = kb + keylen; struct lookup_result lr = _lookup_prefix(&rt->root, kb, ke); - return _insert(rt, lr.v, lr.kb, ke, rv); + if (!_insert(rt, lr.v, lr.kb, ke, rv)) + return false; + + if (keylen > rt->max_keylen) + /* For non recursive iterator remember maximal keylen */ + rt->max_keylen = keylen; + + return true; } int radix_tree_uniq_insert(struct radix_tree *rt, const void *key, size_t keylen, union radix_value rv) @@ -1054,6 +1075,124 @@ void radix_tree_iterate(struct radix_tree *rt, const void *key, size_t keylen, (void) _iterate(it, lr.v); } +// Instead of recursive function call, keep existing value node stacked and stack +// a 'new/next' value node for _next() processing. +// Once this stacked node is processed, it can get backtracked to the previous one */ +static void _n_iter_next(struct radix_tree_iter *it, const struct value *next, + unsigned i, unsigned nr_entries) +{ + if (i >= nr_entries) { + it->depth--; // Last entry, backtrack + + } else if (it->depth > it->max_depth) { + it->depth = 0; // Maybe someone added nodes while iterating ?? + free(it->nodes); + it->nodes = NULL; // Eventually can recognized, if needed... + + } else { + it->depth++; // Stack node for next iteration + it->nodes[it->depth].v = next; + it->nodes[it->depth].i = 0; + } +} + +// Non-recursive iteration over all tree nodes +bool radix_tree_iter_next(struct radix_tree_iter *it, union radix_value *value) +{ + unsigned i; + const struct value *v; + const struct node4 *n4; + const struct node16 *n16; + const struct node48 *n48; + const struct node256 *n256; + const struct value_chain *vc; + const struct prefix_chain *pc; + + while (it->depth) { + if (!(v = it->nodes[it->depth].v)) { + it->depth = 0; // can't happen + continue; + } + + switch (v->type) { + case UNSET: + it->depth--; // can't happen + break; + + case VALUE: + it->depth--; + *value = v->value; + return true; + + case VALUE_CHAIN: + vc = v->value.ptr; + it->nodes[it->depth].v = &vc->child; + *value = vc->value; + return true; + + case PREFIX_CHAIN: + pc = v->value.ptr; + it->nodes[it->depth].v = &pc->child; + break; + + case NODE4: + n4 = v->value.ptr; + i = it->nodes[it->depth].i++; + _n_iter_next(it, n4->values + i, i, n4->nr_entries); + break; + + case NODE16: + n16 = v->value.ptr; + i = it->nodes[it->depth].i++; + _n_iter_next(it, n16->values + i, i, n16->nr_entries); + break; + + case NODE48: + n48 = v->value.ptr; + i = it->nodes[it->depth].i++; + _n_iter_next(it, n48->values + i, i, n48->nr_entries); + break; + + case NODE256: + n256 = v->value.ptr; + // skipping all UNSET elements + while ((i = it->nodes[it->depth].i++) < 256) + if (n256->values[i].type != UNSET) + break; + _n_iter_next(it, n256->values + i, i, 256); + break; + } + } + + return false; +} + +// Currently iterator keeps everything within the radix_tree structure itself. +// +// TODO: Maybe allocate individual iterator structure, but this would need _destroy(). +// ATM lvm2 code does not need this... +struct radix_tree_iter *radix_tree_iter_init(struct radix_tree *rt, const void *key, size_t keylen) +{ + const uint8_t *kb = key; + const uint8_t *ke = kb + keylen; + struct lookup_result lr = _lookup_prefix(&rt->root, kb, ke); + struct radix_tree_iter *it = &rt->it; + + it->max_depth = rt->max_keylen; + it->depth = 0; + free(it->nodes); + if (!(it->nodes = calloc(sizeof(struct radix_tree_iter_node), it->max_depth + 1))) + return NULL; + + if (lr.kb == ke || _prefix_chain_matches(&lr, ke)) { + it->nodes[1].v = lr.v; + it->nodes[1].i = 0; + it->depth = 1; + } + + return it; +} + //---------------------------------------------------------------- // Checks: // 1) The number of entries matches rt->nr_entries diff --git a/base/data-struct/radix-tree.h b/base/data-struct/radix-tree.h index f0fe2baf3..c56fe7771 100644 --- a/base/data-struct/radix-tree.h +++ b/base/data-struct/radix-tree.h @@ -58,6 +58,11 @@ struct radix_tree_iterator { void radix_tree_iterate(struct radix_tree *rt, const void *key, size_t keylen, struct radix_tree_iterator *it); +// Non-recursive traversal of all radix_tree nodes. +struct radix_tree_iter; +struct radix_tree_iter *radix_tree_iter_init(struct radix_tree *rt, const void *key, size_t keylen); +bool radix_tree_iter_next(struct radix_tree_iter *it, union radix_value *value); + // Alternative traversing radix_tree. // Builds whole set all radix_tree nr_values values. // After use, free(values). diff --git a/test/unit/radix_tree_t.c b/test/unit/radix_tree_t.c index 15876ffd4..01c1f7dbf 100644 --- a/test/unit/radix_tree_t.c +++ b/test/unit/radix_tree_t.c @@ -492,6 +492,61 @@ static void test_iterate_vary_middle(void *fixture) } } +static void test_iter(void *fixture) +{ + struct radix_tree *rt = fixture; + struct radix_tree_iter *it; + const unsigned max = 2104; + unsigned i; + uint8_t k[6] = { 0 }; + union radix_value v; + unsigned count = 0; + uint16_t arr[max]; + + memset(arr, 0, sizeof(arr)); + + // for checking also VALUE_CHAIN + k[0] = 'a'; + v.n = 'a'; + radix_tree_insert(rt, &k, 1, v); + + k[0] = 'b'; + v.n = 'b'; + radix_tree_insert(rt, &k, 1, v); + + for (i = 0; i < max; i++) { + k[1] = i & 0xff; + k[2] = (i >> 8) & 0xff; + v.n = i; + T_ASSERT(radix_tree_insert(rt, &k, sizeof(k), v)); + } + + T_ASSERT(radix_tree_is_well_formed(rt)); + //radix_tree_dump(rt, stdout); + + T_ASSERT((it = radix_tree_iter_init(rt, NULL, 0))); + + while (radix_tree_iter_next(it, &v)) { + if (count++ > (max + 1)) + break; // too many iterations already... + if (v.n > max) + break; // too large value ??? + arr[v.n]++; + } + + T_ASSERT_EQUAL(count, max + 2); + for (i = 0; i < max; i++) + switch (i) { + case 'a': + case 'b': + T_ASSERT_EQUAL(arr[i], 2); + break; + default: + T_ASSERT_EQUAL(arr[i], 1); + break; + } +} + //---------------------------------------------------------------- #define DTR_COUNT 100 @@ -872,6 +927,7 @@ void radix_tree_tests(struct dm_list *all_tests) T("iterate-subset", "iterate a subset of entries in tree", test_iterate_subset); T("iterate-single", "iterate a subset that contains a single entry", test_iterate_single); T("iterate-vary-middle", "iterate keys that vary in the middle", test_iterate_vary_middle); + T("non-recursive-iterate-all", "non-recursively iterate keys", test_iter); T("remove-calls-dtr", "remove should call the dtr for the value", test_remove_calls_dtr); T("destroy-calls-dtr", "destroy should call the dtr for all values", test_destroy_calls_dtr); T("bcache-scenario", "A specific series of keys from a bcache scenario", test_bcache_scenario);