rhashtable: Add nested tables

This patch adds code that handles GFP_ATOMIC kmalloc failure on
insertion.  As we cannot use vmalloc, we solve it by making our
hash table nested.  That is, we allocate single pages at each level
and reach our desired table size by nesting them.

When a nested table is created, only a single page is allocated
at the top-level.  Lower levels are allocated on demand during
insertion.  Therefore for each insertion to succeed, only two
(non-consecutive) pages are needed.

After a nested table is created, a rehash will be scheduled in
order to switch to a vmalloced table as soon as possible.  Also,
the rehash code will never rehash into a nested table.  If we
detect a nested table during a rehash, the rehash will be aborted
and a new rehash will be scheduled.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Herbert Xu 2017-02-11 19:26:47 +08:00 committed by David S. Miller
parent 9dbbfb0ab6
commit 40137906c5
2 changed files with 276 additions and 72 deletions

View File

@ -61,6 +61,7 @@ struct rhlist_head {
/** /**
* struct bucket_table - Table of hash buckets * struct bucket_table - Table of hash buckets
* @size: Number of hash buckets * @size: Number of hash buckets
* @nest: Number of bits of first-level nested table.
* @rehash: Current bucket being rehashed * @rehash: Current bucket being rehashed
* @hash_rnd: Random seed to fold into hash * @hash_rnd: Random seed to fold into hash
* @locks_mask: Mask to apply before accessing locks[] * @locks_mask: Mask to apply before accessing locks[]
@ -68,10 +69,12 @@ struct rhlist_head {
* @walkers: List of active walkers * @walkers: List of active walkers
* @rcu: RCU structure for freeing the table * @rcu: RCU structure for freeing the table
* @future_tbl: Table under construction during rehashing * @future_tbl: Table under construction during rehashing
* @ntbl: Nested table used when out of memory.
* @buckets: size * hash buckets * @buckets: size * hash buckets
*/ */
struct bucket_table { struct bucket_table {
unsigned int size; unsigned int size;
unsigned int nest;
unsigned int rehash; unsigned int rehash;
u32 hash_rnd; u32 hash_rnd;
unsigned int locks_mask; unsigned int locks_mask;
@ -374,6 +377,12 @@ void rhashtable_free_and_destroy(struct rhashtable *ht,
void *arg); void *arg);
void rhashtable_destroy(struct rhashtable *ht); void rhashtable_destroy(struct rhashtable *ht);
struct rhash_head __rcu **rht_bucket_nested(const struct bucket_table *tbl,
unsigned int hash);
struct rhash_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht,
struct bucket_table *tbl,
unsigned int hash);
#define rht_dereference(p, ht) \ #define rht_dereference(p, ht) \
rcu_dereference_protected(p, lockdep_rht_mutex_is_held(ht)) rcu_dereference_protected(p, lockdep_rht_mutex_is_held(ht))
@ -389,6 +398,27 @@ void rhashtable_destroy(struct rhashtable *ht);
#define rht_entry(tpos, pos, member) \ #define rht_entry(tpos, pos, member) \
({ tpos = container_of(pos, typeof(*tpos), member); 1; }) ({ tpos = container_of(pos, typeof(*tpos), member); 1; })
static inline struct rhash_head __rcu *const *rht_bucket(
const struct bucket_table *tbl, unsigned int hash)
{
return unlikely(tbl->nest) ? rht_bucket_nested(tbl, hash) :
&tbl->buckets[hash];
}
static inline struct rhash_head __rcu **rht_bucket_var(
struct bucket_table *tbl, unsigned int hash)
{
return unlikely(tbl->nest) ? rht_bucket_nested(tbl, hash) :
&tbl->buckets[hash];
}
static inline struct rhash_head __rcu **rht_bucket_insert(
struct rhashtable *ht, struct bucket_table *tbl, unsigned int hash)
{
return unlikely(tbl->nest) ? rht_bucket_nested_insert(ht, tbl, hash) :
&tbl->buckets[hash];
}
/** /**
* rht_for_each_continue - continue iterating over hash chain * rht_for_each_continue - continue iterating over hash chain
* @pos: the &struct rhash_head to use as a loop cursor. * @pos: the &struct rhash_head to use as a loop cursor.
@ -408,7 +438,7 @@ void rhashtable_destroy(struct rhashtable *ht);
* @hash: the hash value / bucket index * @hash: the hash value / bucket index
*/ */
#define rht_for_each(pos, tbl, hash) \ #define rht_for_each(pos, tbl, hash) \
rht_for_each_continue(pos, (tbl)->buckets[hash], tbl, hash) rht_for_each_continue(pos, *rht_bucket(tbl, hash), tbl, hash)
/** /**
* rht_for_each_entry_continue - continue iterating over hash chain * rht_for_each_entry_continue - continue iterating over hash chain
@ -433,7 +463,7 @@ void rhashtable_destroy(struct rhashtable *ht);
* @member: name of the &struct rhash_head within the hashable struct. * @member: name of the &struct rhash_head within the hashable struct.
*/ */
#define rht_for_each_entry(tpos, pos, tbl, hash, member) \ #define rht_for_each_entry(tpos, pos, tbl, hash, member) \
rht_for_each_entry_continue(tpos, pos, (tbl)->buckets[hash], \ rht_for_each_entry_continue(tpos, pos, *rht_bucket(tbl, hash), \
tbl, hash, member) tbl, hash, member)
/** /**
@ -449,7 +479,7 @@ void rhashtable_destroy(struct rhashtable *ht);
* remove the loop cursor from the list. * remove the loop cursor from the list.
*/ */
#define rht_for_each_entry_safe(tpos, pos, next, tbl, hash, member) \ #define rht_for_each_entry_safe(tpos, pos, next, tbl, hash, member) \
for (pos = rht_dereference_bucket((tbl)->buckets[hash], tbl, hash), \ for (pos = rht_dereference_bucket(*rht_bucket(tbl, hash), tbl, hash), \
next = !rht_is_a_nulls(pos) ? \ next = !rht_is_a_nulls(pos) ? \
rht_dereference_bucket(pos->next, tbl, hash) : NULL; \ rht_dereference_bucket(pos->next, tbl, hash) : NULL; \
(!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \ (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \
@ -485,7 +515,7 @@ void rhashtable_destroy(struct rhashtable *ht);
* traversal is guarded by rcu_read_lock(). * traversal is guarded by rcu_read_lock().
*/ */
#define rht_for_each_rcu(pos, tbl, hash) \ #define rht_for_each_rcu(pos, tbl, hash) \
rht_for_each_rcu_continue(pos, (tbl)->buckets[hash], tbl, hash) rht_for_each_rcu_continue(pos, *rht_bucket(tbl, hash), tbl, hash)
/** /**
* rht_for_each_entry_rcu_continue - continue iterating over rcu hash chain * rht_for_each_entry_rcu_continue - continue iterating over rcu hash chain
@ -519,7 +549,7 @@ void rhashtable_destroy(struct rhashtable *ht);
* traversal is guarded by rcu_read_lock(). * traversal is guarded by rcu_read_lock().
*/ */
#define rht_for_each_entry_rcu(tpos, pos, tbl, hash, member) \ #define rht_for_each_entry_rcu(tpos, pos, tbl, hash, member) \
rht_for_each_entry_rcu_continue(tpos, pos, (tbl)->buckets[hash],\ rht_for_each_entry_rcu_continue(tpos, pos, *rht_bucket(tbl, hash), \
tbl, hash, member) tbl, hash, member)
/** /**
@ -565,7 +595,7 @@ static inline struct rhash_head *__rhashtable_lookup(
.ht = ht, .ht = ht,
.key = key, .key = key,
}; };
const struct bucket_table *tbl; struct bucket_table *tbl;
struct rhash_head *he; struct rhash_head *he;
unsigned int hash; unsigned int hash;
@ -697,8 +727,12 @@ slow_path:
} }
elasticity = ht->elasticity; elasticity = ht->elasticity;
pprev = &tbl->buckets[hash]; pprev = rht_bucket_insert(ht, tbl, hash);
rht_for_each(head, tbl, hash) { data = ERR_PTR(-ENOMEM);
if (!pprev)
goto out;
rht_for_each_continue(head, *pprev, tbl, hash) {
struct rhlist_head *plist; struct rhlist_head *plist;
struct rhlist_head *list; struct rhlist_head *list;
@ -736,7 +770,7 @@ slow_path:
if (unlikely(rht_grow_above_100(ht, tbl))) if (unlikely(rht_grow_above_100(ht, tbl)))
goto slow_path; goto slow_path;
head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash); head = rht_dereference_bucket(*pprev, tbl, hash);
RCU_INIT_POINTER(obj->next, head); RCU_INIT_POINTER(obj->next, head);
if (rhlist) { if (rhlist) {
@ -746,7 +780,7 @@ slow_path:
RCU_INIT_POINTER(list->next, NULL); RCU_INIT_POINTER(list->next, NULL);
} }
rcu_assign_pointer(tbl->buckets[hash], obj); rcu_assign_pointer(*pprev, obj);
atomic_inc(&ht->nelems); atomic_inc(&ht->nelems);
if (rht_grow_above_75(ht, tbl)) if (rht_grow_above_75(ht, tbl))
@ -955,8 +989,8 @@ static inline int __rhashtable_remove_fast_one(
spin_lock_bh(lock); spin_lock_bh(lock);
pprev = &tbl->buckets[hash]; pprev = rht_bucket_var(tbl, hash);
rht_for_each(he, tbl, hash) { rht_for_each_continue(he, *pprev, tbl, hash) {
struct rhlist_head *list; struct rhlist_head *list;
list = container_of(he, struct rhlist_head, rhead); list = container_of(he, struct rhlist_head, rhead);
@ -1107,8 +1141,8 @@ static inline int __rhashtable_replace_fast(
spin_lock_bh(lock); spin_lock_bh(lock);
pprev = &tbl->buckets[hash]; pprev = rht_bucket_var(tbl, hash);
rht_for_each(he, tbl, hash) { rht_for_each_continue(he, *pprev, tbl, hash) {
if (he != obj_old) { if (he != obj_old) {
pprev = &he->next; pprev = &he->next;
continue; continue;

View File

@ -32,6 +32,11 @@
#define HASH_MIN_SIZE 4U #define HASH_MIN_SIZE 4U
#define BUCKET_LOCKS_PER_CPU 32UL #define BUCKET_LOCKS_PER_CPU 32UL
union nested_table {
union nested_table __rcu *table;
struct rhash_head __rcu *bucket;
};
static u32 head_hashfn(struct rhashtable *ht, static u32 head_hashfn(struct rhashtable *ht,
const struct bucket_table *tbl, const struct bucket_table *tbl,
const struct rhash_head *he) const struct rhash_head *he)
@ -76,6 +81,9 @@ static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl,
/* Never allocate more than 0.5 locks per bucket */ /* Never allocate more than 0.5 locks per bucket */
size = min_t(unsigned int, size, tbl->size >> 1); size = min_t(unsigned int, size, tbl->size >> 1);
if (tbl->nest)
size = min(size, 1U << tbl->nest);
if (sizeof(spinlock_t) != 0) { if (sizeof(spinlock_t) != 0) {
tbl->locks = NULL; tbl->locks = NULL;
#ifdef CONFIG_NUMA #ifdef CONFIG_NUMA
@ -99,8 +107,45 @@ static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl,
return 0; return 0;
} }
static void nested_table_free(union nested_table *ntbl, unsigned int size)
{
const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
const unsigned int len = 1 << shift;
unsigned int i;
ntbl = rcu_dereference_raw(ntbl->table);
if (!ntbl)
return;
if (size > len) {
size >>= shift;
for (i = 0; i < len; i++)
nested_table_free(ntbl + i, size);
}
kfree(ntbl);
}
static void nested_bucket_table_free(const struct bucket_table *tbl)
{
unsigned int size = tbl->size >> tbl->nest;
unsigned int len = 1 << tbl->nest;
union nested_table *ntbl;
unsigned int i;
ntbl = (union nested_table *)rcu_dereference_raw(tbl->buckets[0]);
for (i = 0; i < len; i++)
nested_table_free(ntbl + i, size);
kfree(ntbl);
}
static void bucket_table_free(const struct bucket_table *tbl) static void bucket_table_free(const struct bucket_table *tbl)
{ {
if (tbl->nest)
nested_bucket_table_free(tbl);
if (tbl) if (tbl)
kvfree(tbl->locks); kvfree(tbl->locks);
@ -112,6 +157,59 @@ static void bucket_table_free_rcu(struct rcu_head *head)
bucket_table_free(container_of(head, struct bucket_table, rcu)); bucket_table_free(container_of(head, struct bucket_table, rcu));
} }
static union nested_table *nested_table_alloc(struct rhashtable *ht,
union nested_table __rcu **prev,
unsigned int shifted,
unsigned int nhash)
{
union nested_table *ntbl;
int i;
ntbl = rcu_dereference(*prev);
if (ntbl)
return ntbl;
ntbl = kzalloc(PAGE_SIZE, GFP_ATOMIC);
if (ntbl && shifted) {
for (i = 0; i < PAGE_SIZE / sizeof(ntbl[0].bucket); i++)
INIT_RHT_NULLS_HEAD(ntbl[i].bucket, ht,
(i << shifted) | nhash);
}
rcu_assign_pointer(*prev, ntbl);
return ntbl;
}
static struct bucket_table *nested_bucket_table_alloc(struct rhashtable *ht,
size_t nbuckets,
gfp_t gfp)
{
const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
struct bucket_table *tbl;
size_t size;
if (nbuckets < (1 << (shift + 1)))
return NULL;
size = sizeof(*tbl) + sizeof(tbl->buckets[0]);
tbl = kzalloc(size, gfp);
if (!tbl)
return NULL;
if (!nested_table_alloc(ht, (union nested_table __rcu **)tbl->buckets,
0, 0)) {
kfree(tbl);
return NULL;
}
tbl->nest = (ilog2(nbuckets) - 1) % shift + 1;
return tbl;
}
static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
size_t nbuckets, size_t nbuckets,
gfp_t gfp) gfp_t gfp)
@ -126,10 +224,17 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
tbl = kzalloc(size, gfp | __GFP_NOWARN | __GFP_NORETRY); tbl = kzalloc(size, gfp | __GFP_NOWARN | __GFP_NORETRY);
if (tbl == NULL && gfp == GFP_KERNEL) if (tbl == NULL && gfp == GFP_KERNEL)
tbl = vzalloc(size); tbl = vzalloc(size);
size = nbuckets;
if (tbl == NULL && gfp != GFP_KERNEL) {
tbl = nested_bucket_table_alloc(ht, nbuckets, gfp);
nbuckets = 0;
}
if (tbl == NULL) if (tbl == NULL)
return NULL; return NULL;
tbl->size = nbuckets; tbl->size = size;
if (alloc_bucket_locks(ht, tbl, gfp) < 0) { if (alloc_bucket_locks(ht, tbl, gfp) < 0) {
bucket_table_free(tbl); bucket_table_free(tbl);
@ -164,12 +269,17 @@ static int rhashtable_rehash_one(struct rhashtable *ht, unsigned int old_hash)
struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
struct bucket_table *new_tbl = rhashtable_last_table(ht, struct bucket_table *new_tbl = rhashtable_last_table(ht,
rht_dereference_rcu(old_tbl->future_tbl, ht)); rht_dereference_rcu(old_tbl->future_tbl, ht));
struct rhash_head __rcu **pprev = &old_tbl->buckets[old_hash]; struct rhash_head __rcu **pprev = rht_bucket_var(old_tbl, old_hash);
int err = -ENOENT; int err = -EAGAIN;
struct rhash_head *head, *next, *entry; struct rhash_head *head, *next, *entry;
spinlock_t *new_bucket_lock; spinlock_t *new_bucket_lock;
unsigned int new_hash; unsigned int new_hash;
if (new_tbl->nest)
goto out;
err = -ENOENT;
rht_for_each(entry, old_tbl, old_hash) { rht_for_each(entry, old_tbl, old_hash) {
err = 0; err = 0;
next = rht_dereference_bucket(entry->next, old_tbl, old_hash); next = rht_dereference_bucket(entry->next, old_tbl, old_hash);
@ -202,19 +312,26 @@ out:
return err; return err;
} }
static void rhashtable_rehash_chain(struct rhashtable *ht, static int rhashtable_rehash_chain(struct rhashtable *ht,
unsigned int old_hash) unsigned int old_hash)
{ {
struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
spinlock_t *old_bucket_lock; spinlock_t *old_bucket_lock;
int err;
old_bucket_lock = rht_bucket_lock(old_tbl, old_hash); old_bucket_lock = rht_bucket_lock(old_tbl, old_hash);
spin_lock_bh(old_bucket_lock); spin_lock_bh(old_bucket_lock);
while (!rhashtable_rehash_one(ht, old_hash)) while (!(err = rhashtable_rehash_one(ht, old_hash)))
; ;
if (err == -ENOENT) {
old_tbl->rehash++; old_tbl->rehash++;
err = 0;
}
spin_unlock_bh(old_bucket_lock); spin_unlock_bh(old_bucket_lock);
return err;
} }
static int rhashtable_rehash_attach(struct rhashtable *ht, static int rhashtable_rehash_attach(struct rhashtable *ht,
@ -246,13 +363,17 @@ static int rhashtable_rehash_table(struct rhashtable *ht)
struct bucket_table *new_tbl; struct bucket_table *new_tbl;
struct rhashtable_walker *walker; struct rhashtable_walker *walker;
unsigned int old_hash; unsigned int old_hash;
int err;
new_tbl = rht_dereference(old_tbl->future_tbl, ht); new_tbl = rht_dereference(old_tbl->future_tbl, ht);
if (!new_tbl) if (!new_tbl)
return 0; return 0;
for (old_hash = 0; old_hash < old_tbl->size; old_hash++) for (old_hash = 0; old_hash < old_tbl->size; old_hash++) {
rhashtable_rehash_chain(ht, old_hash); err = rhashtable_rehash_chain(ht, old_hash);
if (err)
return err;
}
/* Publish the new table pointer. */ /* Publish the new table pointer. */
rcu_assign_pointer(ht->tbl, new_tbl); rcu_assign_pointer(ht->tbl, new_tbl);
@ -271,31 +392,16 @@ static int rhashtable_rehash_table(struct rhashtable *ht)
return rht_dereference(new_tbl->future_tbl, ht) ? -EAGAIN : 0; return rht_dereference(new_tbl->future_tbl, ht) ? -EAGAIN : 0;
} }
/** static int rhashtable_rehash_alloc(struct rhashtable *ht,
* rhashtable_expand - Expand hash table while allowing concurrent lookups struct bucket_table *old_tbl,
* @ht: the hash table to expand unsigned int size)
*
* A secondary bucket array is allocated and the hash entries are migrated.
*
* This function may only be called in a context where it is safe to call
* synchronize_rcu(), e.g. not within a rcu_read_lock() section.
*
* The caller must ensure that no concurrent resizing occurs by holding
* ht->mutex.
*
* It is valid to have concurrent insertions and deletions protected by per
* bucket locks or concurrent RCU protected lookups and traversals.
*/
static int rhashtable_expand(struct rhashtable *ht)
{ {
struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht); struct bucket_table *new_tbl;
int err; int err;
ASSERT_RHT_MUTEX(ht); ASSERT_RHT_MUTEX(ht);
old_tbl = rhashtable_last_table(ht, old_tbl); new_tbl = bucket_table_alloc(ht, size, GFP_KERNEL);
new_tbl = bucket_table_alloc(ht, old_tbl->size * 2, GFP_KERNEL);
if (new_tbl == NULL) if (new_tbl == NULL)
return -ENOMEM; return -ENOMEM;
@ -324,12 +430,9 @@ static int rhashtable_expand(struct rhashtable *ht)
*/ */
static int rhashtable_shrink(struct rhashtable *ht) static int rhashtable_shrink(struct rhashtable *ht)
{ {
struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht); struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
unsigned int nelems = atomic_read(&ht->nelems); unsigned int nelems = atomic_read(&ht->nelems);
unsigned int size = 0; unsigned int size = 0;
int err;
ASSERT_RHT_MUTEX(ht);
if (nelems) if (nelems)
size = roundup_pow_of_two(nelems * 3 / 2); size = roundup_pow_of_two(nelems * 3 / 2);
@ -342,15 +445,7 @@ static int rhashtable_shrink(struct rhashtable *ht)
if (rht_dereference(old_tbl->future_tbl, ht)) if (rht_dereference(old_tbl->future_tbl, ht))
return -EEXIST; return -EEXIST;
new_tbl = bucket_table_alloc(ht, size, GFP_KERNEL); return rhashtable_rehash_alloc(ht, old_tbl, size);
if (new_tbl == NULL)
return -ENOMEM;
err = rhashtable_rehash_attach(ht, old_tbl, new_tbl);
if (err)
bucket_table_free(new_tbl);
return err;
} }
static void rht_deferred_worker(struct work_struct *work) static void rht_deferred_worker(struct work_struct *work)
@ -366,10 +461,13 @@ static void rht_deferred_worker(struct work_struct *work)
tbl = rhashtable_last_table(ht, tbl); tbl = rhashtable_last_table(ht, tbl);
if (rht_grow_above_75(ht, tbl)) if (rht_grow_above_75(ht, tbl))
rhashtable_expand(ht); err = rhashtable_rehash_alloc(ht, tbl, tbl->size * 2);
else if (ht->p.automatic_shrinking && rht_shrink_below_30(ht, tbl)) else if (ht->p.automatic_shrinking && rht_shrink_below_30(ht, tbl))
rhashtable_shrink(ht); err = rhashtable_shrink(ht);
else if (tbl->nest)
err = rhashtable_rehash_alloc(ht, tbl, tbl->size);
if (!err)
err = rhashtable_rehash_table(ht); err = rhashtable_rehash_table(ht);
mutex_unlock(&ht->mutex); mutex_unlock(&ht->mutex);
@ -439,8 +537,8 @@ static void *rhashtable_lookup_one(struct rhashtable *ht,
int elasticity; int elasticity;
elasticity = ht->elasticity; elasticity = ht->elasticity;
pprev = &tbl->buckets[hash]; pprev = rht_bucket_var(tbl, hash);
rht_for_each(head, tbl, hash) { rht_for_each_continue(head, *pprev, tbl, hash) {
struct rhlist_head *list; struct rhlist_head *list;
struct rhlist_head *plist; struct rhlist_head *plist;
@ -477,6 +575,7 @@ static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht,
struct rhash_head *obj, struct rhash_head *obj,
void *data) void *data)
{ {
struct rhash_head __rcu **pprev;
struct bucket_table *new_tbl; struct bucket_table *new_tbl;
struct rhash_head *head; struct rhash_head *head;
@ -499,7 +598,11 @@ static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht,
if (unlikely(rht_grow_above_100(ht, tbl))) if (unlikely(rht_grow_above_100(ht, tbl)))
return ERR_PTR(-EAGAIN); return ERR_PTR(-EAGAIN);
head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash); pprev = rht_bucket_insert(ht, tbl, hash);
if (!pprev)
return ERR_PTR(-ENOMEM);
head = rht_dereference_bucket(*pprev, tbl, hash);
RCU_INIT_POINTER(obj->next, head); RCU_INIT_POINTER(obj->next, head);
if (ht->rhlist) { if (ht->rhlist) {
@ -509,7 +612,7 @@ static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht,
RCU_INIT_POINTER(list->next, NULL); RCU_INIT_POINTER(list->next, NULL);
} }
rcu_assign_pointer(tbl->buckets[hash], obj); rcu_assign_pointer(*pprev, obj);
atomic_inc(&ht->nelems); atomic_inc(&ht->nelems);
if (rht_grow_above_75(ht, tbl)) if (rht_grow_above_75(ht, tbl))
@ -975,7 +1078,7 @@ void rhashtable_free_and_destroy(struct rhashtable *ht,
void (*free_fn)(void *ptr, void *arg), void (*free_fn)(void *ptr, void *arg),
void *arg) void *arg)
{ {
const struct bucket_table *tbl; struct bucket_table *tbl;
unsigned int i; unsigned int i;
cancel_work_sync(&ht->run_work); cancel_work_sync(&ht->run_work);
@ -986,7 +1089,7 @@ void rhashtable_free_and_destroy(struct rhashtable *ht,
for (i = 0; i < tbl->size; i++) { for (i = 0; i < tbl->size; i++) {
struct rhash_head *pos, *next; struct rhash_head *pos, *next;
for (pos = rht_dereference(tbl->buckets[i], ht), for (pos = rht_dereference(*rht_bucket(tbl, i), ht),
next = !rht_is_a_nulls(pos) ? next = !rht_is_a_nulls(pos) ?
rht_dereference(pos->next, ht) : NULL; rht_dereference(pos->next, ht) : NULL;
!rht_is_a_nulls(pos); !rht_is_a_nulls(pos);
@ -1007,3 +1110,70 @@ void rhashtable_destroy(struct rhashtable *ht)
return rhashtable_free_and_destroy(ht, NULL, NULL); return rhashtable_free_and_destroy(ht, NULL, NULL);
} }
EXPORT_SYMBOL_GPL(rhashtable_destroy); EXPORT_SYMBOL_GPL(rhashtable_destroy);
struct rhash_head __rcu **rht_bucket_nested(const struct bucket_table *tbl,
unsigned int hash)
{
const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
static struct rhash_head __rcu *rhnull =
(struct rhash_head __rcu *)NULLS_MARKER(0);
unsigned int index = hash & ((1 << tbl->nest) - 1);
unsigned int size = tbl->size >> tbl->nest;
unsigned int subhash = hash;
union nested_table *ntbl;
ntbl = (union nested_table *)rcu_dereference_raw(tbl->buckets[0]);
ntbl = rht_dereference_bucket(ntbl[index].table, tbl, hash);
subhash >>= tbl->nest;
while (ntbl && size > (1 << shift)) {
index = subhash & ((1 << shift) - 1);
ntbl = rht_dereference_bucket(ntbl[index].table, tbl, hash);
size >>= shift;
subhash >>= shift;
}
if (!ntbl)
return &rhnull;
return &ntbl[subhash].bucket;
}
EXPORT_SYMBOL_GPL(rht_bucket_nested);
struct rhash_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht,
struct bucket_table *tbl,
unsigned int hash)
{
const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
unsigned int index = hash & ((1 << tbl->nest) - 1);
unsigned int size = tbl->size >> tbl->nest;
union nested_table *ntbl;
unsigned int shifted;
unsigned int nhash;
ntbl = (union nested_table *)rcu_dereference_raw(tbl->buckets[0]);
hash >>= tbl->nest;
nhash = index;
shifted = tbl->nest;
ntbl = nested_table_alloc(ht, &ntbl[index].table,
size <= (1 << shift) ? shifted : 0, nhash);
while (ntbl && size > (1 << shift)) {
index = hash & ((1 << shift) - 1);
size >>= shift;
hash >>= shift;
nhash |= index << shifted;
shifted += shift;
ntbl = nested_table_alloc(ht, &ntbl[index].table,
size <= (1 << shift) ? shifted : 0,
nhash);
}
if (!ntbl)
return NULL;
return &ntbl[hash].bucket;
}
EXPORT_SYMBOL_GPL(rht_bucket_nested_insert);