4f90d034bb
This patch verifies the 'first' struct member of struct mptcp_sock, which points to the first subflow of msk. Save 'sk' in mptcp_storage, and verify it with 'first' in verify_msk(). v5: - Use ASSERT_EQ() instead of a manual comparison + log (Andrii). Signed-off-by: Geliang Tang <geliang.tang@suse.com> Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com> Signed-off-by: Andrii Nakryiko <andrii@kernel.org> Acked-by: Matthieu Baerts <matthieu.baerts@tessares.net> Link: https://lore.kernel.org/bpf/20220519233016.105670-8-mathew.j.martineau@linux.intel.com
242 lines
6.6 KiB
C
242 lines
6.6 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef __BPF_TCP_HELPERS_H
|
|
#define __BPF_TCP_HELPERS_H
|
|
|
|
#include <stdbool.h>
|
|
#include <linux/types.h>
|
|
#include <bpf/bpf_helpers.h>
|
|
#include <bpf/bpf_core_read.h>
|
|
#include <bpf/bpf_tracing.h>
|
|
|
|
#define BPF_STRUCT_OPS(name, args...) \
|
|
SEC("struct_ops/"#name) \
|
|
BPF_PROG(name, args)
|
|
|
|
#ifndef SOL_TCP
|
|
#define SOL_TCP 6
|
|
#endif
|
|
|
|
#ifndef TCP_CA_NAME_MAX
|
|
#define TCP_CA_NAME_MAX 16
|
|
#endif
|
|
|
|
#define tcp_jiffies32 ((__u32)bpf_jiffies64())
|
|
|
|
struct sock_common {
|
|
unsigned char skc_state;
|
|
__u16 skc_num;
|
|
} __attribute__((preserve_access_index));
|
|
|
|
enum sk_pacing {
|
|
SK_PACING_NONE = 0,
|
|
SK_PACING_NEEDED = 1,
|
|
SK_PACING_FQ = 2,
|
|
};
|
|
|
|
struct sock {
|
|
struct sock_common __sk_common;
|
|
#define sk_state __sk_common.skc_state
|
|
unsigned long sk_pacing_rate;
|
|
__u32 sk_pacing_status; /* see enum sk_pacing */
|
|
} __attribute__((preserve_access_index));
|
|
|
|
struct inet_sock {
|
|
struct sock sk;
|
|
} __attribute__((preserve_access_index));
|
|
|
|
struct inet_connection_sock {
|
|
struct inet_sock icsk_inet;
|
|
__u8 icsk_ca_state:6,
|
|
icsk_ca_setsockopt:1,
|
|
icsk_ca_dst_locked:1;
|
|
struct {
|
|
__u8 pending;
|
|
} icsk_ack;
|
|
__u64 icsk_ca_priv[104 / sizeof(__u64)];
|
|
} __attribute__((preserve_access_index));
|
|
|
|
struct request_sock {
|
|
struct sock_common __req_common;
|
|
} __attribute__((preserve_access_index));
|
|
|
|
struct tcp_sock {
|
|
struct inet_connection_sock inet_conn;
|
|
|
|
__u32 rcv_nxt;
|
|
__u32 snd_nxt;
|
|
__u32 snd_una;
|
|
__u32 window_clamp;
|
|
__u8 ecn_flags;
|
|
__u32 delivered;
|
|
__u32 delivered_ce;
|
|
__u32 snd_cwnd;
|
|
__u32 snd_cwnd_cnt;
|
|
__u32 snd_cwnd_clamp;
|
|
__u32 snd_ssthresh;
|
|
__u8 syn_data:1, /* SYN includes data */
|
|
syn_fastopen:1, /* SYN includes Fast Open option */
|
|
syn_fastopen_exp:1,/* SYN includes Fast Open exp. option */
|
|
syn_fastopen_ch:1, /* Active TFO re-enabling probe */
|
|
syn_data_acked:1,/* data in SYN is acked by SYN-ACK */
|
|
save_syn:1, /* Save headers of SYN packet */
|
|
is_cwnd_limited:1,/* forward progress limited by snd_cwnd? */
|
|
syn_smc:1; /* SYN includes SMC */
|
|
__u32 max_packets_out;
|
|
__u32 lsndtime;
|
|
__u32 prior_cwnd;
|
|
__u64 tcp_mstamp; /* most recent packet received/sent */
|
|
bool is_mptcp;
|
|
} __attribute__((preserve_access_index));
|
|
|
|
static __always_inline struct inet_connection_sock *inet_csk(const struct sock *sk)
|
|
{
|
|
return (struct inet_connection_sock *)sk;
|
|
}
|
|
|
|
static __always_inline void *inet_csk_ca(const struct sock *sk)
|
|
{
|
|
return (void *)inet_csk(sk)->icsk_ca_priv;
|
|
}
|
|
|
|
static __always_inline struct tcp_sock *tcp_sk(const struct sock *sk)
|
|
{
|
|
return (struct tcp_sock *)sk;
|
|
}
|
|
|
|
static __always_inline bool before(__u32 seq1, __u32 seq2)
|
|
{
|
|
return (__s32)(seq1-seq2) < 0;
|
|
}
|
|
#define after(seq2, seq1) before(seq1, seq2)
|
|
|
|
#define TCP_ECN_OK 1
|
|
#define TCP_ECN_QUEUE_CWR 2
|
|
#define TCP_ECN_DEMAND_CWR 4
|
|
#define TCP_ECN_SEEN 8
|
|
|
|
enum inet_csk_ack_state_t {
|
|
ICSK_ACK_SCHED = 1,
|
|
ICSK_ACK_TIMER = 2,
|
|
ICSK_ACK_PUSHED = 4,
|
|
ICSK_ACK_PUSHED2 = 8,
|
|
ICSK_ACK_NOW = 16 /* Send the next ACK immediately (once) */
|
|
};
|
|
|
|
enum tcp_ca_event {
|
|
CA_EVENT_TX_START = 0,
|
|
CA_EVENT_CWND_RESTART = 1,
|
|
CA_EVENT_COMPLETE_CWR = 2,
|
|
CA_EVENT_LOSS = 3,
|
|
CA_EVENT_ECN_NO_CE = 4,
|
|
CA_EVENT_ECN_IS_CE = 5,
|
|
};
|
|
|
|
struct ack_sample {
|
|
__u32 pkts_acked;
|
|
__s32 rtt_us;
|
|
__u32 in_flight;
|
|
} __attribute__((preserve_access_index));
|
|
|
|
struct rate_sample {
|
|
__u64 prior_mstamp; /* starting timestamp for interval */
|
|
__u32 prior_delivered; /* tp->delivered at "prior_mstamp" */
|
|
__s32 delivered; /* number of packets delivered over interval */
|
|
long interval_us; /* time for tp->delivered to incr "delivered" */
|
|
__u32 snd_interval_us; /* snd interval for delivered packets */
|
|
__u32 rcv_interval_us; /* rcv interval for delivered packets */
|
|
long rtt_us; /* RTT of last (S)ACKed packet (or -1) */
|
|
int losses; /* number of packets marked lost upon ACK */
|
|
__u32 acked_sacked; /* number of packets newly (S)ACKed upon ACK */
|
|
__u32 prior_in_flight; /* in flight before this ACK */
|
|
bool is_app_limited; /* is sample from packet with bubble in pipe? */
|
|
bool is_retrans; /* is sample from retransmission? */
|
|
bool is_ack_delayed; /* is this (likely) a delayed ACK? */
|
|
} __attribute__((preserve_access_index));
|
|
|
|
#define TCP_CA_NAME_MAX 16
|
|
#define TCP_CONG_NEEDS_ECN 0x2
|
|
|
|
struct tcp_congestion_ops {
|
|
char name[TCP_CA_NAME_MAX];
|
|
__u32 flags;
|
|
|
|
/* initialize private data (optional) */
|
|
void (*init)(struct sock *sk);
|
|
/* cleanup private data (optional) */
|
|
void (*release)(struct sock *sk);
|
|
|
|
/* return slow start threshold (required) */
|
|
__u32 (*ssthresh)(struct sock *sk);
|
|
/* do new cwnd calculation (required) */
|
|
void (*cong_avoid)(struct sock *sk, __u32 ack, __u32 acked);
|
|
/* call before changing ca_state (optional) */
|
|
void (*set_state)(struct sock *sk, __u8 new_state);
|
|
/* call when cwnd event occurs (optional) */
|
|
void (*cwnd_event)(struct sock *sk, enum tcp_ca_event ev);
|
|
/* call when ack arrives (optional) */
|
|
void (*in_ack_event)(struct sock *sk, __u32 flags);
|
|
/* new value of cwnd after loss (required) */
|
|
__u32 (*undo_cwnd)(struct sock *sk);
|
|
/* hook for packet ack accounting (optional) */
|
|
void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample);
|
|
/* override sysctl_tcp_min_tso_segs */
|
|
__u32 (*min_tso_segs)(struct sock *sk);
|
|
/* returns the multiplier used in tcp_sndbuf_expand (optional) */
|
|
__u32 (*sndbuf_expand)(struct sock *sk);
|
|
/* call when packets are delivered to update cwnd and pacing rate,
|
|
* after all the ca_state processing. (optional)
|
|
*/
|
|
void (*cong_control)(struct sock *sk, const struct rate_sample *rs);
|
|
void *owner;
|
|
};
|
|
|
|
#define min(a, b) ((a) < (b) ? (a) : (b))
|
|
#define max(a, b) ((a) > (b) ? (a) : (b))
|
|
#define min_not_zero(x, y) ({ \
|
|
typeof(x) __x = (x); \
|
|
typeof(y) __y = (y); \
|
|
__x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); })
|
|
|
|
static __always_inline bool tcp_in_slow_start(const struct tcp_sock *tp)
|
|
{
|
|
return tp->snd_cwnd < tp->snd_ssthresh;
|
|
}
|
|
|
|
static __always_inline bool tcp_is_cwnd_limited(const struct sock *sk)
|
|
{
|
|
const struct tcp_sock *tp = tcp_sk(sk);
|
|
|
|
/* If in slow start, ensure cwnd grows to twice what was ACKed. */
|
|
if (tcp_in_slow_start(tp))
|
|
return tp->snd_cwnd < 2 * tp->max_packets_out;
|
|
|
|
return !!BPF_CORE_READ_BITFIELD(tp, is_cwnd_limited);
|
|
}
|
|
|
|
static __always_inline bool tcp_cc_eq(const char *a, const char *b)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < TCP_CA_NAME_MAX; i++) {
|
|
if (a[i] != b[i])
|
|
return false;
|
|
if (!a[i])
|
|
break;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
extern __u32 tcp_slow_start(struct tcp_sock *tp, __u32 acked) __ksym;
|
|
extern void tcp_cong_avoid_ai(struct tcp_sock *tp, __u32 w, __u32 acked) __ksym;
|
|
|
|
struct mptcp_sock {
|
|
struct inet_connection_sock sk;
|
|
|
|
__u32 token;
|
|
struct sock *first;
|
|
char ca_name[TCP_CA_NAME_MAX];
|
|
} __attribute__((preserve_access_index));
|
|
|
|
#endif
|