bpf: tcp: Stop bpf_setsockopt(TCP_CONGESTION) in init ops to recur itself
When a bad bpf prog '.init' calls bpf_setsockopt(TCP_CONGESTION, "itself"), it will trigger this loop: .init => bpf_setsockopt(tcp_cc) => .init => bpf_setsockopt(tcp_cc) ... ... => .init => bpf_setsockopt(tcp_cc). It was prevented by the prog->active counter before but the prog->active detection cannot be used in struct_ops as explained in the earlier patch of the set. In this patch, the second bpf_setsockopt(tcp_cc) is not allowed in order to break the loop. This is done by using a bit of an existing 1 byte hole in tcp_sock to check if there is on-going bpf_setsockopt(TCP_CONGESTION) in this tcp_sock. Note that this essentially limits only the first '.init' can call bpf_setsockopt(TCP_CONGESTION) to pick a fallback cc (eg. peer does not support ECN) and the second '.init' cannot fallback to another cc. This applies even the second bpf_setsockopt(TCP_CONGESTION) will not cause a loop. Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org> Reviewed-by: Eric Dumazet <edumazet@google.com> Link: https://lore.kernel.org/r/20220929070407.965581-5-martin.lau@linux.dev Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
parent
1e7d217faa
commit
061ff04071
@ -388,6 +388,12 @@ struct tcp_sock {
|
||||
u8 bpf_sock_ops_cb_flags; /* Control calling BPF programs
|
||||
* values defined in uapi/linux/tcp.h
|
||||
*/
|
||||
u8 bpf_chg_cc_inprogress:1; /* In the middle of
|
||||
* bpf_setsockopt(TCP_CONGESTION),
|
||||
* it is to avoid the bpf_tcp_cc->init()
|
||||
* to recur itself by calling
|
||||
* bpf_setsockopt(TCP_CONGESTION, "itself").
|
||||
*/
|
||||
#define BPF_SOCK_OPS_TEST_FLAG(TP, ARG) (TP->bpf_sock_ops_cb_flags & ARG)
|
||||
#else
|
||||
#define BPF_SOCK_OPS_TEST_FLAG(TP, ARG) 0
|
||||
|
@ -5105,6 +5105,9 @@ static int bpf_sol_tcp_setsockopt(struct sock *sk, int optname,
|
||||
static int sol_tcp_sockopt_congestion(struct sock *sk, char *optval,
|
||||
int *optlen, bool getopt)
|
||||
{
|
||||
struct tcp_sock *tp;
|
||||
int ret;
|
||||
|
||||
if (*optlen < 2)
|
||||
return -EINVAL;
|
||||
|
||||
@ -5125,8 +5128,31 @@ static int sol_tcp_sockopt_congestion(struct sock *sk, char *optval,
|
||||
if (*optlen >= sizeof("cdg") - 1 && !strncmp("cdg", optval, *optlen))
|
||||
return -ENOTSUPP;
|
||||
|
||||
return do_tcp_setsockopt(sk, SOL_TCP, TCP_CONGESTION,
|
||||
/* It stops this looping
|
||||
*
|
||||
* .init => bpf_setsockopt(tcp_cc) => .init =>
|
||||
* bpf_setsockopt(tcp_cc)" => .init => ....
|
||||
*
|
||||
* The second bpf_setsockopt(tcp_cc) is not allowed
|
||||
* in order to break the loop when both .init
|
||||
* are the same bpf prog.
|
||||
*
|
||||
* This applies even the second bpf_setsockopt(tcp_cc)
|
||||
* does not cause a loop. This limits only the first
|
||||
* '.init' can call bpf_setsockopt(TCP_CONGESTION) to
|
||||
* pick a fallback cc (eg. peer does not support ECN)
|
||||
* and the second '.init' cannot fallback to
|
||||
* another.
|
||||
*/
|
||||
tp = tcp_sk(sk);
|
||||
if (tp->bpf_chg_cc_inprogress)
|
||||
return -EBUSY;
|
||||
|
||||
tp->bpf_chg_cc_inprogress = 1;
|
||||
ret = do_tcp_setsockopt(sk, SOL_TCP, TCP_CONGESTION,
|
||||
KERNEL_SOCKPTR(optval), *optlen);
|
||||
tp->bpf_chg_cc_inprogress = 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int sol_tcp_sockopt(struct sock *sk, int optname,
|
||||
|
@ -541,6 +541,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
|
||||
newtp->fastopen_req = NULL;
|
||||
RCU_INIT_POINTER(newtp->fastopen_rsk, NULL);
|
||||
|
||||
newtp->bpf_chg_cc_inprogress = 0;
|
||||
tcp_bpf_clone(sk, newsk);
|
||||
|
||||
__TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS);
|
||||
|
Loading…
Reference in New Issue
Block a user