linux/include/net/raw.h
Eric Dumazet 04309b5f5d raw: convert raw sockets to RCU
[ Upstream commit 0daf07e527095e64ee8927ce297ab626643e9f51 ]

Using rwlock in networking code is extremely risky.
writers can starve if enough readers are constantly
grabing the rwlock.

I thought rwlock were at fault and sent this patch:

https://lkml.org/lkml/2022/6/17/272

But Peter and Linus essentially told me rwlock had to be unfair.

We need to get rid of rwlock in networking code.

Without this fix, following script triggers soft lockups:

for i in {1..48}
do
 ping -f -n -q 127.0.0.1 &
 sleep 0.1
done

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <sashal@kernel.org>
2022-08-17 15:14:31 +02:00

94 lines
2.3 KiB
C

/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
* interface as the means of communication with the user level.
*
* Definitions for the RAW-IP module.
*
* Version: @(#)raw.h 1.0.2 05/07/93
*
* Author: Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
*/
#ifndef _RAW_H
#define _RAW_H
#include <net/inet_sock.h>
#include <net/protocol.h>
#include <linux/icmp.h>
extern struct proto raw_prot;
extern struct raw_hashinfo raw_v4_hashinfo;
bool raw_v4_match(struct net *net, struct sock *sk, unsigned short num,
__be32 raddr, __be32 laddr, int dif, int sdif);
int raw_abort(struct sock *sk, int err);
void raw_icmp_error(struct sk_buff *, int, u32);
int raw_local_deliver(struct sk_buff *, int);
int raw_rcv(struct sock *, struct sk_buff *);
#define RAW_HTABLE_SIZE MAX_INET_PROTOS
struct raw_hashinfo {
rwlock_t lock;
struct hlist_nulls_head ht[RAW_HTABLE_SIZE];
};
static inline void raw_hashinfo_init(struct raw_hashinfo *hashinfo)
{
int i;
rwlock_init(&hashinfo->lock);
for (i = 0; i < RAW_HTABLE_SIZE; i++)
INIT_HLIST_NULLS_HEAD(&hashinfo->ht[i], i);
}
#ifdef CONFIG_PROC_FS
int raw_proc_init(void);
void raw_proc_exit(void);
struct raw_iter_state {
struct seq_net_private p;
int bucket;
};
static inline struct raw_iter_state *raw_seq_private(struct seq_file *seq)
{
return seq->private;
}
void *raw_seq_start(struct seq_file *seq, loff_t *pos);
void *raw_seq_next(struct seq_file *seq, void *v, loff_t *pos);
void raw_seq_stop(struct seq_file *seq, void *v);
#endif
int raw_hash_sk(struct sock *sk);
void raw_unhash_sk(struct sock *sk);
void raw_init(void);
struct raw_sock {
/* inet_sock has to be the first member */
struct inet_sock inet;
struct icmp_filter filter;
u32 ipmr_table;
};
static inline struct raw_sock *raw_sk(const struct sock *sk)
{
return (struct raw_sock *)sk;
}
static inline bool raw_sk_bound_dev_eq(struct net *net, int bound_dev_if,
int dif, int sdif)
{
#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
return inet_bound_dev_eq(READ_ONCE(net->ipv4.sysctl_raw_l3mdev_accept),
bound_dev_if, dif, sdif);
#else
return inet_bound_dev_eq(true, bound_dev_if, dif, sdif);
#endif
}
#endif /* _RAW_H */