e341694e3e
Heavy Netlink users such as Open vSwitch spend a considerable amount of time in netlink_lookup() due to the read-lock on nl_table_lock. Use of RCU relieves the lock contention. Makes use of the new resizable hash table to avoid locking on the lookup. The hash table will grow if entries exceeds 75% of table size up to a total table size of 64K. It will automatically shrink if usage falls below 30%. Also splits nl_table_lock into a separate mutex to protect hash table mutations and allow synchronize_rcu() to sleep while waiting for readers during expansion and shrinking. Before: 9.16% kpktgend_0 [openvswitch] [k] masked_flow_lookup 6.42% kpktgend_0 [pktgen] [k] mod_cur_headers 6.26% kpktgend_0 [pktgen] [k] pktgen_thread_worker 6.23% kpktgend_0 [kernel.kallsyms] [k] memset 4.79% kpktgend_0 [kernel.kallsyms] [k] netlink_lookup 4.37% kpktgend_0 [kernel.kallsyms] [k] memcpy 3.60% kpktgend_0 [openvswitch] [k] ovs_flow_extract 2.69% kpktgend_0 [kernel.kallsyms] [k] jhash2 After: 15.26% kpktgend_0 [openvswitch] [k] masked_flow_lookup 8.12% kpktgend_0 [pktgen] [k] pktgen_thread_worker 7.92% kpktgend_0 [pktgen] [k] mod_cur_headers 5.11% kpktgend_0 [kernel.kallsyms] [k] memset 4.11% kpktgend_0 [openvswitch] [k] ovs_flow_extract 4.06% kpktgend_0 [kernel.kallsyms] [k] _raw_spin_lock 3.90% kpktgend_0 [kernel.kallsyms] [k] jhash2 [...] 0.67% kpktgend_0 [kernel.kallsyms] [k] netlink_lookup Signed-off-by: Thomas Graf <tgraf@suug.ch> Reviewed-by: Nikolay Aleksandrov <nikolay@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
78 lines
1.7 KiB
C
78 lines
1.7 KiB
C
#ifndef _AF_NETLINK_H
|
|
#define _AF_NETLINK_H
|
|
|
|
#include <linux/rhashtable.h>
|
|
#include <net/sock.h>
|
|
|
|
#define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8)
|
|
#define NLGRPLONGS(x) (NLGRPSZ(x)/sizeof(unsigned long))
|
|
|
|
struct netlink_ring {
|
|
void **pg_vec;
|
|
unsigned int head;
|
|
unsigned int frames_per_block;
|
|
unsigned int frame_size;
|
|
unsigned int frame_max;
|
|
|
|
unsigned int pg_vec_order;
|
|
unsigned int pg_vec_pages;
|
|
unsigned int pg_vec_len;
|
|
|
|
atomic_t pending;
|
|
};
|
|
|
|
struct netlink_sock {
|
|
/* struct sock has to be the first member of netlink_sock */
|
|
struct sock sk;
|
|
u32 portid;
|
|
u32 dst_portid;
|
|
u32 dst_group;
|
|
u32 flags;
|
|
u32 subscriptions;
|
|
u32 ngroups;
|
|
unsigned long *groups;
|
|
unsigned long state;
|
|
size_t max_recvmsg_len;
|
|
wait_queue_head_t wait;
|
|
bool cb_running;
|
|
struct netlink_callback cb;
|
|
struct mutex *cb_mutex;
|
|
struct mutex cb_def_mutex;
|
|
void (*netlink_rcv)(struct sk_buff *skb);
|
|
int (*netlink_bind)(int group);
|
|
void (*netlink_unbind)(int group);
|
|
struct module *module;
|
|
#ifdef CONFIG_NETLINK_MMAP
|
|
struct mutex pg_vec_lock;
|
|
struct netlink_ring rx_ring;
|
|
struct netlink_ring tx_ring;
|
|
atomic_t mapped;
|
|
#endif /* CONFIG_NETLINK_MMAP */
|
|
|
|
struct rhash_head node;
|
|
};
|
|
|
|
static inline struct netlink_sock *nlk_sk(struct sock *sk)
|
|
{
|
|
return container_of(sk, struct netlink_sock, sk);
|
|
}
|
|
|
|
struct netlink_table {
|
|
struct rhashtable hash;
|
|
struct hlist_head mc_list;
|
|
struct listeners __rcu *listeners;
|
|
unsigned int flags;
|
|
unsigned int groups;
|
|
struct mutex *cb_mutex;
|
|
struct module *module;
|
|
int (*bind)(int group);
|
|
void (*unbind)(int group);
|
|
bool (*compare)(struct net *net, struct sock *sock);
|
|
int registered;
|
|
};
|
|
|
|
extern struct netlink_table *nl_table;
|
|
extern rwlock_t nl_table_lock;
|
|
|
|
#endif
|