From 7a868d1e9ab3c534c5ad44e3e5dc46753a1e5636 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 12 Jan 2015 14:52:22 +0800 Subject: [PATCH 1/3] rhashtable: involve rhashtable_lookup_compare_insert routine Introduce a new function called rhashtable_lookup_compare_insert() which is very similar to rhashtable_lookup_insert(). But the former makes use of users' given compare function to look for an object, and then inserts it into hash table if found. As the entire process of search and insertion is under protection of per bucket lock, this can help users to avoid the involvement of extra lock. Signed-off-by: Ying Xue Cc: Thomas Graf Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 5 +++++ lib/rhashtable.c | 42 ++++++++++++++++++++++++++++++++++++-- 2 files changed, 45 insertions(+), 2 deletions(-) diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 326acd8c2e9f..7b9bd77ed684 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -168,7 +168,12 @@ int rhashtable_shrink(struct rhashtable *ht); void *rhashtable_lookup(struct rhashtable *ht, const void *key); void *rhashtable_lookup_compare(struct rhashtable *ht, const void *key, bool (*compare)(void *, void *), void *arg); + bool rhashtable_lookup_insert(struct rhashtable *ht, struct rhash_head *obj); +bool rhashtable_lookup_compare_insert(struct rhashtable *ht, + struct rhash_head *obj, + bool (*compare)(void *, void *), + void *arg); void rhashtable_destroy(struct rhashtable *ht); diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 8023b554905c..ed6ae1ad304c 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -726,6 +726,43 @@ EXPORT_SYMBOL_GPL(rhashtable_lookup_compare); * to rhashtable_init(). */ bool rhashtable_lookup_insert(struct rhashtable *ht, struct rhash_head *obj) +{ + struct rhashtable_compare_arg arg = { + .ht = ht, + .key = rht_obj(ht, obj) + ht->p.key_offset, + }; + + BUG_ON(!ht->p.key_len); + + return rhashtable_lookup_compare_insert(ht, obj, &rhashtable_compare, + &arg); +} +EXPORT_SYMBOL_GPL(rhashtable_lookup_insert); + +/** + * rhashtable_lookup_compare_insert - search and insert object to hash table + * with compare function + * @ht: hash table + * @obj: pointer to hash head inside object + * @compare: compare function, must return true on match + * @arg: argument passed on to compare function + * + * Locks down the bucket chain in both the old and new table if a resize + * is in progress to ensure that writers can't remove from the old table + * and can't insert to the new table during the atomic operation of search + * and insertion. Searches for duplicates in both the old and new table if + * a resize is in progress. + * + * Lookups may occur in parallel with hashtable mutations and resizing. + * + * Will trigger an automatic deferred table resizing if the size grows + * beyond the watermark indicated by grow_decision() which can be passed + * to rhashtable_init(). + */ +bool rhashtable_lookup_compare_insert(struct rhashtable *ht, + struct rhash_head *obj, + bool (*compare)(void *, void *), + void *arg) { struct bucket_table *new_tbl, *old_tbl; spinlock_t *new_bucket_lock, *old_bucket_lock; @@ -747,7 +784,8 @@ bool rhashtable_lookup_insert(struct rhashtable *ht, struct rhash_head *obj) if (unlikely(old_tbl != new_tbl)) spin_lock_bh_nested(new_bucket_lock, RHT_LOCK_NESTED); - if (rhashtable_lookup(ht, rht_obj(ht, obj) + ht->p.key_offset)) { + if (rhashtable_lookup_compare(ht, rht_obj(ht, obj) + ht->p.key_offset, + compare, arg)) { success = false; goto exit; } @@ -763,7 +801,7 @@ exit: return success; } -EXPORT_SYMBOL_GPL(rhashtable_lookup_insert); +EXPORT_SYMBOL_GPL(rhashtable_lookup_compare_insert); static size_t rounded_hashtable_size(struct rhashtable_params *params) { From c5adde9468b0714a051eac7f9666f23eb10b61f7 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 12 Jan 2015 14:52:23 +0800 Subject: [PATCH 2/3] netlink: eliminate nl_sk_hash_lock As rhashtable_lookup_compare_insert() can guarantee the process of search and insertion is atomic, it's safe to eliminate the nl_sk_hash_lock. After this, object insertion or removal will be protected with per bucket lock on write side while object lookup is guarded with rcu read lock on read side. Signed-off-by: Ying Xue Cc: Thomas Graf Acked-by: Thomas Graf Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 33 ++++++++++++++++++++------------- net/netlink/af_netlink.h | 1 - net/netlink/diag.c | 10 +++++----- 3 files changed, 25 insertions(+), 19 deletions(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 298e1df7132a..01b702d63457 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -98,7 +98,7 @@ static void netlink_skb_destructor(struct sk_buff *skb); /* nl_table locking explained: * Lookup and traversal are protected with an RCU read-side lock. Insertion - * and removal are protected with nl_sk_hash_lock while using RCU list + * and removal are protected with per bucket lock while using RCU list * modification primitives and may run in parallel to RCU protected lookups. * Destruction of the Netlink socket may only occur *after* nl_table_lock has * been acquired * either during or after the socket has been removed from @@ -110,10 +110,6 @@ static atomic_t nl_table_users = ATOMIC_INIT(0); #define nl_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&nl_table_lock)); -/* Protects netlink socket hash table mutations */ -DEFINE_MUTEX(nl_sk_hash_lock); -EXPORT_SYMBOL_GPL(nl_sk_hash_lock); - static ATOMIC_NOTIFIER_HEAD(netlink_chain); static DEFINE_SPINLOCK(netlink_tap_lock); @@ -998,6 +994,19 @@ static struct sock *__netlink_lookup(struct netlink_table *table, u32 portid, &netlink_compare, &arg); } +static bool __netlink_insert(struct netlink_table *table, struct sock *sk, + struct net *net) +{ + struct netlink_compare_arg arg = { + .net = net, + .portid = nlk_sk(sk)->portid, + }; + + return rhashtable_lookup_compare_insert(&table->hash, + &nlk_sk(sk)->node, + &netlink_compare, &arg); +} + static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid) { struct netlink_table *table = &nl_table[protocol]; @@ -1043,9 +1052,7 @@ static int netlink_insert(struct sock *sk, struct net *net, u32 portid) struct netlink_table *table = &nl_table[sk->sk_protocol]; int err = -EADDRINUSE; - mutex_lock(&nl_sk_hash_lock); - if (__netlink_lookup(table, portid, net)) - goto err; + lock_sock(sk); err = -EBUSY; if (nlk_sk(sk)->portid) @@ -1058,10 +1065,12 @@ static int netlink_insert(struct sock *sk, struct net *net, u32 portid) nlk_sk(sk)->portid = portid; sock_hold(sk); - rhashtable_insert(&table->hash, &nlk_sk(sk)->node); - err = 0; + if (__netlink_insert(table, sk, net)) + err = 0; + else + sock_put(sk); err: - mutex_unlock(&nl_sk_hash_lock); + release_sock(sk); return err; } @@ -1069,13 +1078,11 @@ static void netlink_remove(struct sock *sk) { struct netlink_table *table; - mutex_lock(&nl_sk_hash_lock); table = &nl_table[sk->sk_protocol]; if (rhashtable_remove(&table->hash, &nlk_sk(sk)->node)) { WARN_ON(atomic_read(&sk->sk_refcnt) == 1); __sock_put(sk); } - mutex_unlock(&nl_sk_hash_lock); netlink_table_grab(); if (nlk_sk(sk)->subscriptions) { diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h index fd96fa76202a..7518375782f5 100644 --- a/net/netlink/af_netlink.h +++ b/net/netlink/af_netlink.h @@ -74,6 +74,5 @@ struct netlink_table { extern struct netlink_table *nl_table; extern rwlock_t nl_table_lock; -extern struct mutex nl_sk_hash_lock; #endif diff --git a/net/netlink/diag.c b/net/netlink/diag.c index fcca36d81a62..bb59a7ed0859 100644 --- a/net/netlink/diag.c +++ b/net/netlink/diag.c @@ -103,7 +103,7 @@ static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, { struct netlink_table *tbl = &nl_table[protocol]; struct rhashtable *ht = &tbl->hash; - const struct bucket_table *htbl = rht_dereference(ht->tbl, ht); + const struct bucket_table *htbl = rht_dereference_rcu(ht->tbl, ht); struct net *net = sock_net(skb->sk); struct netlink_diag_req *req; struct netlink_sock *nlsk; @@ -115,7 +115,7 @@ static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, for (i = 0; i < htbl->size; i++) { struct rhash_head *pos; - rht_for_each_entry(nlsk, pos, htbl, i, node) { + rht_for_each_entry_rcu(nlsk, pos, htbl, i, node) { sk = (struct sock *)nlsk; if (!net_eq(sock_net(sk), net)) @@ -172,7 +172,7 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) req = nlmsg_data(cb->nlh); - mutex_lock(&nl_sk_hash_lock); + rcu_read_lock(); read_lock(&nl_table_lock); if (req->sdiag_protocol == NDIAG_PROTO_ALL) { @@ -186,7 +186,7 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) } else { if (req->sdiag_protocol >= MAX_LINKS) { read_unlock(&nl_table_lock); - mutex_unlock(&nl_sk_hash_lock); + rcu_read_unlock(); return -ENOENT; } @@ -194,7 +194,7 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) } read_unlock(&nl_table_lock); - mutex_unlock(&nl_sk_hash_lock); + rcu_read_unlock(); return skb->len; } From 6f73d3b13dc5e16ae06025cd1b12a36b2857caa2 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 12 Jan 2015 14:52:24 +0800 Subject: [PATCH 3/3] rhashtable: add a note for grow and shrink decision functions As commit c0c09bfdc415 ("rhashtable: avoid unnecessary wakeup for worker queue") moves condition statements of verifying whether hash table size exceeds its maximum threshold or reaches its minimum threshold from resizing functions to resizing decision functions, we should add a note in rhashtable.h to indicate the implementation of what the grow and shrink decision function must enforce min/max shift, otherwise, it's failed to take min/max shift's set watermarks into effect. Signed-off-by: Ying Xue Cc: Thomas Graf Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 7b9bd77ed684..9570832ab07c 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -79,6 +79,10 @@ struct rhashtable; * @obj_hashfn: Function to hash object * @grow_decision: If defined, may return true if table should expand * @shrink_decision: If defined, may return true if table should shrink + * + * Note: when implementing the grow and shrink decision function, min/max + * shift must be enforced, otherwise, resizing watermarks they set may be + * useless. */ struct rhashtable_params { size_t nelem_hint;