Merge branch 'bpf-sockmap-estab-fixes'
John Fastabend says: ==================== Eric noted that using the close callback is not sufficient to catch all transitions from ESTABLISHED state to a LISTEN state. So this series does two things. First, only allow adding socks in ESTABLISH state and second use unhash callback to catch tcp_disconnect() transitions. v2: added check for ESTABLISH state in hash update sockmap as well v3: Do not release lock from unhash in error path, no lock was used in the first place. And drop not so useful code comments v4: convert, if (unhash()) return unhash(); return to if (unhash()) unhash(); return; Thanks for reviewing Yonghong I carried your ACKs forward. ==================== Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
This commit is contained in:
commit
fad0c40fab
@ -132,6 +132,7 @@ struct smap_psock {
|
|||||||
struct work_struct gc_work;
|
struct work_struct gc_work;
|
||||||
|
|
||||||
struct proto *sk_proto;
|
struct proto *sk_proto;
|
||||||
|
void (*save_unhash)(struct sock *sk);
|
||||||
void (*save_close)(struct sock *sk, long timeout);
|
void (*save_close)(struct sock *sk, long timeout);
|
||||||
void (*save_data_ready)(struct sock *sk);
|
void (*save_data_ready)(struct sock *sk);
|
||||||
void (*save_write_space)(struct sock *sk);
|
void (*save_write_space)(struct sock *sk);
|
||||||
@ -143,6 +144,7 @@ static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
|
|||||||
static int bpf_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
|
static int bpf_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
|
||||||
static int bpf_tcp_sendpage(struct sock *sk, struct page *page,
|
static int bpf_tcp_sendpage(struct sock *sk, struct page *page,
|
||||||
int offset, size_t size, int flags);
|
int offset, size_t size, int flags);
|
||||||
|
static void bpf_tcp_unhash(struct sock *sk);
|
||||||
static void bpf_tcp_close(struct sock *sk, long timeout);
|
static void bpf_tcp_close(struct sock *sk, long timeout);
|
||||||
|
|
||||||
static inline struct smap_psock *smap_psock_sk(const struct sock *sk)
|
static inline struct smap_psock *smap_psock_sk(const struct sock *sk)
|
||||||
@ -184,6 +186,7 @@ static void build_protos(struct proto prot[SOCKMAP_NUM_CONFIGS],
|
|||||||
struct proto *base)
|
struct proto *base)
|
||||||
{
|
{
|
||||||
prot[SOCKMAP_BASE] = *base;
|
prot[SOCKMAP_BASE] = *base;
|
||||||
|
prot[SOCKMAP_BASE].unhash = bpf_tcp_unhash;
|
||||||
prot[SOCKMAP_BASE].close = bpf_tcp_close;
|
prot[SOCKMAP_BASE].close = bpf_tcp_close;
|
||||||
prot[SOCKMAP_BASE].recvmsg = bpf_tcp_recvmsg;
|
prot[SOCKMAP_BASE].recvmsg = bpf_tcp_recvmsg;
|
||||||
prot[SOCKMAP_BASE].stream_memory_read = bpf_tcp_stream_read;
|
prot[SOCKMAP_BASE].stream_memory_read = bpf_tcp_stream_read;
|
||||||
@ -217,6 +220,7 @@ static int bpf_tcp_init(struct sock *sk)
|
|||||||
return -EBUSY;
|
return -EBUSY;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
psock->save_unhash = sk->sk_prot->unhash;
|
||||||
psock->save_close = sk->sk_prot->close;
|
psock->save_close = sk->sk_prot->close;
|
||||||
psock->sk_proto = sk->sk_prot;
|
psock->sk_proto = sk->sk_prot;
|
||||||
|
|
||||||
@ -305,30 +309,12 @@ static struct smap_psock_map_entry *psock_map_pop(struct sock *sk,
|
|||||||
return e;
|
return e;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void bpf_tcp_close(struct sock *sk, long timeout)
|
static void bpf_tcp_remove(struct sock *sk, struct smap_psock *psock)
|
||||||
{
|
{
|
||||||
void (*close_fun)(struct sock *sk, long timeout);
|
|
||||||
struct smap_psock_map_entry *e;
|
struct smap_psock_map_entry *e;
|
||||||
struct sk_msg_buff *md, *mtmp;
|
struct sk_msg_buff *md, *mtmp;
|
||||||
struct smap_psock *psock;
|
|
||||||
struct sock *osk;
|
struct sock *osk;
|
||||||
|
|
||||||
lock_sock(sk);
|
|
||||||
rcu_read_lock();
|
|
||||||
psock = smap_psock_sk(sk);
|
|
||||||
if (unlikely(!psock)) {
|
|
||||||
rcu_read_unlock();
|
|
||||||
release_sock(sk);
|
|
||||||
return sk->sk_prot->close(sk, timeout);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* The psock may be destroyed anytime after exiting the RCU critial
|
|
||||||
* section so by the time we use close_fun the psock may no longer
|
|
||||||
* be valid. However, bpf_tcp_close is called with the sock lock
|
|
||||||
* held so the close hook and sk are still valid.
|
|
||||||
*/
|
|
||||||
close_fun = psock->save_close;
|
|
||||||
|
|
||||||
if (psock->cork) {
|
if (psock->cork) {
|
||||||
free_start_sg(psock->sock, psock->cork, true);
|
free_start_sg(psock->sock, psock->cork, true);
|
||||||
kfree(psock->cork);
|
kfree(psock->cork);
|
||||||
@ -379,6 +365,42 @@ static void bpf_tcp_close(struct sock *sk, long timeout)
|
|||||||
kfree(e);
|
kfree(e);
|
||||||
e = psock_map_pop(sk, psock);
|
e = psock_map_pop(sk, psock);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void bpf_tcp_unhash(struct sock *sk)
|
||||||
|
{
|
||||||
|
void (*unhash_fun)(struct sock *sk);
|
||||||
|
struct smap_psock *psock;
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
psock = smap_psock_sk(sk);
|
||||||
|
if (unlikely(!psock)) {
|
||||||
|
rcu_read_unlock();
|
||||||
|
if (sk->sk_prot->unhash)
|
||||||
|
sk->sk_prot->unhash(sk);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
unhash_fun = psock->save_unhash;
|
||||||
|
bpf_tcp_remove(sk, psock);
|
||||||
|
rcu_read_unlock();
|
||||||
|
unhash_fun(sk);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void bpf_tcp_close(struct sock *sk, long timeout)
|
||||||
|
{
|
||||||
|
void (*close_fun)(struct sock *sk, long timeout);
|
||||||
|
struct smap_psock *psock;
|
||||||
|
|
||||||
|
lock_sock(sk);
|
||||||
|
rcu_read_lock();
|
||||||
|
psock = smap_psock_sk(sk);
|
||||||
|
if (unlikely(!psock)) {
|
||||||
|
rcu_read_unlock();
|
||||||
|
release_sock(sk);
|
||||||
|
return sk->sk_prot->close(sk, timeout);
|
||||||
|
}
|
||||||
|
close_fun = psock->save_close;
|
||||||
|
bpf_tcp_remove(sk, psock);
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
release_sock(sk);
|
release_sock(sk);
|
||||||
close_fun(sk, timeout);
|
close_fun(sk, timeout);
|
||||||
@ -2097,8 +2119,12 @@ static int sock_map_update_elem(struct bpf_map *map,
|
|||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* ULPs are currently supported only for TCP sockets in ESTABLISHED
|
||||||
|
* state.
|
||||||
|
*/
|
||||||
if (skops.sk->sk_type != SOCK_STREAM ||
|
if (skops.sk->sk_type != SOCK_STREAM ||
|
||||||
skops.sk->sk_protocol != IPPROTO_TCP) {
|
skops.sk->sk_protocol != IPPROTO_TCP ||
|
||||||
|
skops.sk->sk_state != TCP_ESTABLISHED) {
|
||||||
fput(socket->file);
|
fput(socket->file);
|
||||||
return -EOPNOTSUPP;
|
return -EOPNOTSUPP;
|
||||||
}
|
}
|
||||||
@ -2453,6 +2479,16 @@ static int sock_hash_update_elem(struct bpf_map *map,
|
|||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* ULPs are currently supported only for TCP sockets in ESTABLISHED
|
||||||
|
* state.
|
||||||
|
*/
|
||||||
|
if (skops.sk->sk_type != SOCK_STREAM ||
|
||||||
|
skops.sk->sk_protocol != IPPROTO_TCP ||
|
||||||
|
skops.sk->sk_state != TCP_ESTABLISHED) {
|
||||||
|
fput(socket->file);
|
||||||
|
return -EOPNOTSUPP;
|
||||||
|
}
|
||||||
|
|
||||||
lock_sock(skops.sk);
|
lock_sock(skops.sk);
|
||||||
preempt_disable();
|
preempt_disable();
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
@ -2543,10 +2579,22 @@ const struct bpf_map_ops sock_hash_ops = {
|
|||||||
.map_check_btf = map_check_no_btf,
|
.map_check_btf = map_check_no_btf,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static bool bpf_is_valid_sock_op(struct bpf_sock_ops_kern *ops)
|
||||||
|
{
|
||||||
|
return ops->op == BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB ||
|
||||||
|
ops->op == BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB;
|
||||||
|
}
|
||||||
BPF_CALL_4(bpf_sock_map_update, struct bpf_sock_ops_kern *, bpf_sock,
|
BPF_CALL_4(bpf_sock_map_update, struct bpf_sock_ops_kern *, bpf_sock,
|
||||||
struct bpf_map *, map, void *, key, u64, flags)
|
struct bpf_map *, map, void *, key, u64, flags)
|
||||||
{
|
{
|
||||||
WARN_ON_ONCE(!rcu_read_lock_held());
|
WARN_ON_ONCE(!rcu_read_lock_held());
|
||||||
|
|
||||||
|
/* ULPs are currently supported only for TCP sockets in ESTABLISHED
|
||||||
|
* state. This checks that the sock ops triggering the update is
|
||||||
|
* one indicating we are (or will be soon) in an ESTABLISHED state.
|
||||||
|
*/
|
||||||
|
if (!bpf_is_valid_sock_op(bpf_sock))
|
||||||
|
return -EOPNOTSUPP;
|
||||||
return sock_map_ctx_update_elem(bpf_sock, map, key, flags);
|
return sock_map_ctx_update_elem(bpf_sock, map, key, flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2565,6 +2613,9 @@ BPF_CALL_4(bpf_sock_hash_update, struct bpf_sock_ops_kern *, bpf_sock,
|
|||||||
struct bpf_map *, map, void *, key, u64, flags)
|
struct bpf_map *, map, void *, key, u64, flags)
|
||||||
{
|
{
|
||||||
WARN_ON_ONCE(!rcu_read_lock_held());
|
WARN_ON_ONCE(!rcu_read_lock_held());
|
||||||
|
|
||||||
|
if (!bpf_is_valid_sock_op(bpf_sock))
|
||||||
|
return -EOPNOTSUPP;
|
||||||
return sock_hash_ctx_update_elem(bpf_sock, map, key, flags);
|
return sock_hash_ctx_update_elem(bpf_sock, map, key, flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -580,7 +580,11 @@ static void test_sockmap(int tasks, void *data)
|
|||||||
/* Test update without programs */
|
/* Test update without programs */
|
||||||
for (i = 0; i < 6; i++) {
|
for (i = 0; i < 6; i++) {
|
||||||
err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_ANY);
|
err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_ANY);
|
||||||
if (err) {
|
if (i < 2 && !err) {
|
||||||
|
printf("Allowed update sockmap '%i:%i' not in ESTABLISHED\n",
|
||||||
|
i, sfd[i]);
|
||||||
|
goto out_sockmap;
|
||||||
|
} else if (i >= 2 && err) {
|
||||||
printf("Failed noprog update sockmap '%i:%i'\n",
|
printf("Failed noprog update sockmap '%i:%i'\n",
|
||||||
i, sfd[i]);
|
i, sfd[i]);
|
||||||
goto out_sockmap;
|
goto out_sockmap;
|
||||||
@ -741,7 +745,7 @@ static void test_sockmap(int tasks, void *data)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Test map update elem afterwards fd lives in fd and map_fd */
|
/* Test map update elem afterwards fd lives in fd and map_fd */
|
||||||
for (i = 0; i < 6; i++) {
|
for (i = 2; i < 6; i++) {
|
||||||
err = bpf_map_update_elem(map_fd_rx, &i, &sfd[i], BPF_ANY);
|
err = bpf_map_update_elem(map_fd_rx, &i, &sfd[i], BPF_ANY);
|
||||||
if (err) {
|
if (err) {
|
||||||
printf("Failed map_fd_rx update sockmap %i '%i:%i'\n",
|
printf("Failed map_fd_rx update sockmap %i '%i:%i'\n",
|
||||||
@ -845,7 +849,7 @@ static void test_sockmap(int tasks, void *data)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Delete the elems without programs */
|
/* Delete the elems without programs */
|
||||||
for (i = 0; i < 6; i++) {
|
for (i = 2; i < 6; i++) {
|
||||||
err = bpf_map_delete_elem(fd, &i);
|
err = bpf_map_delete_elem(fd, &i);
|
||||||
if (err) {
|
if (err) {
|
||||||
printf("Failed delete sockmap %i '%i:%i'\n",
|
printf("Failed delete sockmap %i '%i:%i'\n",
|
||||||
|
Loading…
Reference in New Issue
Block a user