Merge branch 'bpf: Allow bpf tcp iter to do bpf_(get|set)sockopt'
Martin KaFai says: ==================== This set is to allow bpf tcp iter to call bpf_(get|set)sockopt. With bpf-tcp-cc, new algo rollout happens more often. Instead of restarting the applications to pick up the new tcp-cc, this set allows the bpf tcp iter to call bpf_(get|set)sockopt(TCP_CONGESTION). It is not limited to TCP_CONGESTION, the bpf tcp iter can call bpf_(get|set)sockopt() with other options. The bpf tcp iter can read into all the fields of a tcp_sock, so there is a lot of flexibility to select the desired sk to do setsockopt(), e.g. it can test for TCP_LISTEN only and leave the established connections untouched, or check the addr/port, or check the current tcp-cc name, ...etc. Patch 1-4 are some cleanup and prep work in the tcp and bpf seq_file. Patch 5 is to have the tcp seq_file iterate on the port+addr lhash2 instead of the port only listening_hash. Patch 6 is to have the bpf tcp iter doing batching which then allows lock_sock. lock_sock is needed for setsockopt. Patch 7 allows the bpf tcp iter to call bpf_(get|set)sockopt. v2: - Use __GFP_NOWARN in patch 6 - Add bpf_getsockopt() in patch 7 to give a symmetrical user experience. selftest in patch 8 is changed to also cover bpf_getsockopt(). - Remove CAP_NET_ADMIN check in patch 7. Tracing bpf prog has already required CAP_SYS_ADMIN or CAP_PERFMON. - Move some def macros to bpf_tracing_net.h in patch 8 ==================== Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
This commit is contained in:
commit
d9e8d14b12
@ -1442,6 +1442,9 @@ typedef void (*bpf_iter_show_fdinfo_t) (const struct bpf_iter_aux_info *aux,
|
||||
struct seq_file *seq);
|
||||
typedef int (*bpf_iter_fill_link_info_t)(const struct bpf_iter_aux_info *aux,
|
||||
struct bpf_link_info *info);
|
||||
typedef const struct bpf_func_proto *
|
||||
(*bpf_iter_get_func_proto_t)(enum bpf_func_id func_id,
|
||||
const struct bpf_prog *prog);
|
||||
|
||||
enum bpf_iter_feature {
|
||||
BPF_ITER_RESCHED = BIT(0),
|
||||
@ -1454,6 +1457,7 @@ struct bpf_iter_reg {
|
||||
bpf_iter_detach_target_t detach_target;
|
||||
bpf_iter_show_fdinfo_t show_fdinfo;
|
||||
bpf_iter_fill_link_info_t fill_link_info;
|
||||
bpf_iter_get_func_proto_t get_func_proto;
|
||||
u32 ctx_arg_info_size;
|
||||
u32 feature;
|
||||
struct bpf_ctx_arg_aux ctx_arg_info[BPF_ITER_CTX_ARG_MAX];
|
||||
@ -1476,6 +1480,8 @@ struct bpf_iter__bpf_map_elem {
|
||||
int bpf_iter_reg_target(const struct bpf_iter_reg *reg_info);
|
||||
void bpf_iter_unreg_target(const struct bpf_iter_reg *reg_info);
|
||||
bool bpf_iter_prog_supported(struct bpf_prog *prog);
|
||||
const struct bpf_func_proto *
|
||||
bpf_iter_get_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog);
|
||||
int bpf_iter_link_attach(const union bpf_attr *attr, bpfptr_t uattr, struct bpf_prog *prog);
|
||||
int bpf_iter_new_fd(struct bpf_link *link);
|
||||
bool bpf_link_is_iter(struct bpf_link *link);
|
||||
@ -2050,6 +2056,8 @@ extern const struct bpf_func_proto bpf_task_storage_get_proto;
|
||||
extern const struct bpf_func_proto bpf_task_storage_delete_proto;
|
||||
extern const struct bpf_func_proto bpf_for_each_map_elem_proto;
|
||||
extern const struct bpf_func_proto bpf_btf_find_by_name_kind_proto;
|
||||
extern const struct bpf_func_proto bpf_sk_setsockopt_proto;
|
||||
extern const struct bpf_func_proto bpf_sk_getsockopt_proto;
|
||||
|
||||
const struct bpf_func_proto *bpf_tracing_func_proto(
|
||||
enum bpf_func_id func_id, const struct bpf_prog *prog);
|
||||
|
@ -160,6 +160,12 @@ struct inet_hashinfo {
|
||||
____cacheline_aligned_in_smp;
|
||||
};
|
||||
|
||||
#define inet_lhash2_for_each_icsk_continue(__icsk) \
|
||||
hlist_for_each_entry_continue(__icsk, icsk_listen_portaddr_node)
|
||||
|
||||
#define inet_lhash2_for_each_icsk(__icsk, list) \
|
||||
hlist_for_each_entry(__icsk, list, icsk_listen_portaddr_node)
|
||||
|
||||
#define inet_lhash2_for_each_icsk_rcu(__icsk, list) \
|
||||
hlist_for_each_entry_rcu(__icsk, list, icsk_listen_portaddr_node)
|
||||
|
||||
|
@ -1959,7 +1959,6 @@ struct tcp_iter_state {
|
||||
struct seq_net_private p;
|
||||
enum tcp_seq_states state;
|
||||
struct sock *syn_wait_sk;
|
||||
struct tcp_seq_afinfo *bpf_seq_afinfo;
|
||||
int bucket, offset, sbucket, num;
|
||||
loff_t last_pos;
|
||||
};
|
||||
|
@ -360,6 +360,28 @@ bool bpf_iter_prog_supported(struct bpf_prog *prog)
|
||||
return supported;
|
||||
}
|
||||
|
||||
const struct bpf_func_proto *
|
||||
bpf_iter_get_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
{
|
||||
const struct bpf_iter_target_info *tinfo;
|
||||
const struct bpf_func_proto *fn = NULL;
|
||||
|
||||
mutex_lock(&targets_mutex);
|
||||
list_for_each_entry(tinfo, &targets, list) {
|
||||
if (tinfo->btf_id == prog->aux->attach_btf_id) {
|
||||
const struct bpf_iter_reg *reg_info;
|
||||
|
||||
reg_info = tinfo->reg_info;
|
||||
if (reg_info->get_func_proto)
|
||||
fn = reg_info->get_func_proto(func_id, prog);
|
||||
break;
|
||||
}
|
||||
}
|
||||
mutex_unlock(&targets_mutex);
|
||||
|
||||
return fn;
|
||||
}
|
||||
|
||||
static void bpf_iter_link_release(struct bpf_link *link)
|
||||
{
|
||||
struct bpf_iter_link *iter_link =
|
||||
|
@ -1461,6 +1461,8 @@ raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
const struct bpf_func_proto *
|
||||
tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
{
|
||||
const struct bpf_func_proto *fn;
|
||||
|
||||
switch (func_id) {
|
||||
#ifdef CONFIG_NET
|
||||
case BPF_FUNC_skb_output:
|
||||
@ -1501,7 +1503,10 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
case BPF_FUNC_d_path:
|
||||
return &bpf_d_path_proto;
|
||||
default:
|
||||
return raw_tp_prog_func_proto(func_id, prog);
|
||||
fn = raw_tp_prog_func_proto(func_id, prog);
|
||||
if (!fn && prog->expected_attach_type == BPF_TRACE_ITER)
|
||||
fn = bpf_iter_get_func_proto(func_id, prog);
|
||||
return fn;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5016,6 +5016,40 @@ err_clear:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
BPF_CALL_5(bpf_sk_setsockopt, struct sock *, sk, int, level,
|
||||
int, optname, char *, optval, int, optlen)
|
||||
{
|
||||
return _bpf_setsockopt(sk, level, optname, optval, optlen);
|
||||
}
|
||||
|
||||
const struct bpf_func_proto bpf_sk_setsockopt_proto = {
|
||||
.func = bpf_sk_setsockopt,
|
||||
.gpl_only = false,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
|
||||
.arg2_type = ARG_ANYTHING,
|
||||
.arg3_type = ARG_ANYTHING,
|
||||
.arg4_type = ARG_PTR_TO_MEM,
|
||||
.arg5_type = ARG_CONST_SIZE,
|
||||
};
|
||||
|
||||
BPF_CALL_5(bpf_sk_getsockopt, struct sock *, sk, int, level,
|
||||
int, optname, char *, optval, int, optlen)
|
||||
{
|
||||
return _bpf_getsockopt(sk, level, optname, optval, optlen);
|
||||
}
|
||||
|
||||
const struct bpf_func_proto bpf_sk_getsockopt_proto = {
|
||||
.func = bpf_sk_getsockopt,
|
||||
.gpl_only = false,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
|
||||
.arg2_type = ARG_ANYTHING,
|
||||
.arg3_type = ARG_ANYTHING,
|
||||
.arg4_type = ARG_PTR_TO_UNINIT_MEM,
|
||||
.arg5_type = ARG_CONST_SIZE,
|
||||
};
|
||||
|
||||
BPF_CALL_5(bpf_sock_addr_setsockopt, struct bpf_sock_addr_kern *, ctx,
|
||||
int, level, int, optname, char *, optval, int, optlen)
|
||||
{
|
||||
|
@ -2277,51 +2277,72 @@ EXPORT_SYMBOL(tcp_v4_destroy_sock);
|
||||
#ifdef CONFIG_PROC_FS
|
||||
/* Proc filesystem TCP sock list dumping. */
|
||||
|
||||
/*
|
||||
* Get next listener socket follow cur. If cur is NULL, get first socket
|
||||
* starting from bucket given in st->bucket; when st->bucket is zero the
|
||||
* very first socket in the hash table is returned.
|
||||
static unsigned short seq_file_family(const struct seq_file *seq);
|
||||
|
||||
static bool seq_sk_match(struct seq_file *seq, const struct sock *sk)
|
||||
{
|
||||
unsigned short family = seq_file_family(seq);
|
||||
|
||||
/* AF_UNSPEC is used as a match all */
|
||||
return ((family == AF_UNSPEC || family == sk->sk_family) &&
|
||||
net_eq(sock_net(sk), seq_file_net(seq)));
|
||||
}
|
||||
|
||||
/* Find a non empty bucket (starting from st->bucket)
|
||||
* and return the first sk from it.
|
||||
*/
|
||||
static void *listening_get_first(struct seq_file *seq)
|
||||
{
|
||||
struct tcp_iter_state *st = seq->private;
|
||||
|
||||
st->offset = 0;
|
||||
for (; st->bucket <= tcp_hashinfo.lhash2_mask; st->bucket++) {
|
||||
struct inet_listen_hashbucket *ilb2;
|
||||
struct inet_connection_sock *icsk;
|
||||
struct sock *sk;
|
||||
|
||||
ilb2 = &tcp_hashinfo.lhash2[st->bucket];
|
||||
if (hlist_empty(&ilb2->head))
|
||||
continue;
|
||||
|
||||
spin_lock(&ilb2->lock);
|
||||
inet_lhash2_for_each_icsk(icsk, &ilb2->head) {
|
||||
sk = (struct sock *)icsk;
|
||||
if (seq_sk_match(seq, sk))
|
||||
return sk;
|
||||
}
|
||||
spin_unlock(&ilb2->lock);
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Find the next sk of "cur" within the same bucket (i.e. st->bucket).
|
||||
* If "cur" is the last one in the st->bucket,
|
||||
* call listening_get_first() to return the first sk of the next
|
||||
* non empty bucket.
|
||||
*/
|
||||
static void *listening_get_next(struct seq_file *seq, void *cur)
|
||||
{
|
||||
struct tcp_seq_afinfo *afinfo;
|
||||
struct tcp_iter_state *st = seq->private;
|
||||
struct net *net = seq_file_net(seq);
|
||||
struct inet_listen_hashbucket *ilb;
|
||||
struct hlist_nulls_node *node;
|
||||
struct inet_listen_hashbucket *ilb2;
|
||||
struct inet_connection_sock *icsk;
|
||||
struct sock *sk = cur;
|
||||
|
||||
if (st->bpf_seq_afinfo)
|
||||
afinfo = st->bpf_seq_afinfo;
|
||||
else
|
||||
afinfo = PDE_DATA(file_inode(seq->file));
|
||||
|
||||
if (!sk) {
|
||||
get_head:
|
||||
ilb = &tcp_hashinfo.listening_hash[st->bucket];
|
||||
spin_lock(&ilb->lock);
|
||||
sk = sk_nulls_head(&ilb->nulls_head);
|
||||
st->offset = 0;
|
||||
goto get_sk;
|
||||
}
|
||||
ilb = &tcp_hashinfo.listening_hash[st->bucket];
|
||||
++st->num;
|
||||
++st->offset;
|
||||
|
||||
sk = sk_nulls_next(sk);
|
||||
get_sk:
|
||||
sk_nulls_for_each_from(sk, node) {
|
||||
if (!net_eq(sock_net(sk), net))
|
||||
continue;
|
||||
if (afinfo->family == AF_UNSPEC ||
|
||||
sk->sk_family == afinfo->family)
|
||||
icsk = inet_csk(sk);
|
||||
inet_lhash2_for_each_icsk_continue(icsk) {
|
||||
sk = (struct sock *)icsk;
|
||||
if (seq_sk_match(seq, sk))
|
||||
return sk;
|
||||
}
|
||||
spin_unlock(&ilb->lock);
|
||||
st->offset = 0;
|
||||
if (++st->bucket < INET_LHTABLE_SIZE)
|
||||
goto get_head;
|
||||
return NULL;
|
||||
|
||||
ilb2 = &tcp_hashinfo.lhash2[st->bucket];
|
||||
spin_unlock(&ilb2->lock);
|
||||
++st->bucket;
|
||||
return listening_get_first(seq);
|
||||
}
|
||||
|
||||
static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
|
||||
@ -2331,7 +2352,7 @@ static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
|
||||
|
||||
st->bucket = 0;
|
||||
st->offset = 0;
|
||||
rc = listening_get_next(seq, NULL);
|
||||
rc = listening_get_first(seq);
|
||||
|
||||
while (rc && *pos) {
|
||||
rc = listening_get_next(seq, rc);
|
||||
@ -2351,15 +2372,7 @@ static inline bool empty_bucket(const struct tcp_iter_state *st)
|
||||
*/
|
||||
static void *established_get_first(struct seq_file *seq)
|
||||
{
|
||||
struct tcp_seq_afinfo *afinfo;
|
||||
struct tcp_iter_state *st = seq->private;
|
||||
struct net *net = seq_file_net(seq);
|
||||
void *rc = NULL;
|
||||
|
||||
if (st->bpf_seq_afinfo)
|
||||
afinfo = st->bpf_seq_afinfo;
|
||||
else
|
||||
afinfo = PDE_DATA(file_inode(seq->file));
|
||||
|
||||
st->offset = 0;
|
||||
for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
|
||||
@ -2373,32 +2386,20 @@ static void *established_get_first(struct seq_file *seq)
|
||||
|
||||
spin_lock_bh(lock);
|
||||
sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
|
||||
if ((afinfo->family != AF_UNSPEC &&
|
||||
sk->sk_family != afinfo->family) ||
|
||||
!net_eq(sock_net(sk), net)) {
|
||||
continue;
|
||||
}
|
||||
rc = sk;
|
||||
goto out;
|
||||
if (seq_sk_match(seq, sk))
|
||||
return sk;
|
||||
}
|
||||
spin_unlock_bh(lock);
|
||||
}
|
||||
out:
|
||||
return rc;
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void *established_get_next(struct seq_file *seq, void *cur)
|
||||
{
|
||||
struct tcp_seq_afinfo *afinfo;
|
||||
struct sock *sk = cur;
|
||||
struct hlist_nulls_node *node;
|
||||
struct tcp_iter_state *st = seq->private;
|
||||
struct net *net = seq_file_net(seq);
|
||||
|
||||
if (st->bpf_seq_afinfo)
|
||||
afinfo = st->bpf_seq_afinfo;
|
||||
else
|
||||
afinfo = PDE_DATA(file_inode(seq->file));
|
||||
|
||||
++st->num;
|
||||
++st->offset;
|
||||
@ -2406,9 +2407,7 @@ static void *established_get_next(struct seq_file *seq, void *cur)
|
||||
sk = sk_nulls_next(sk);
|
||||
|
||||
sk_nulls_for_each_from(sk, node) {
|
||||
if ((afinfo->family == AF_UNSPEC ||
|
||||
sk->sk_family == afinfo->family) &&
|
||||
net_eq(sock_net(sk), net))
|
||||
if (seq_sk_match(seq, sk))
|
||||
return sk;
|
||||
}
|
||||
|
||||
@ -2451,17 +2450,18 @@ static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
|
||||
static void *tcp_seek_last_pos(struct seq_file *seq)
|
||||
{
|
||||
struct tcp_iter_state *st = seq->private;
|
||||
int bucket = st->bucket;
|
||||
int offset = st->offset;
|
||||
int orig_num = st->num;
|
||||
void *rc = NULL;
|
||||
|
||||
switch (st->state) {
|
||||
case TCP_SEQ_STATE_LISTENING:
|
||||
if (st->bucket >= INET_LHTABLE_SIZE)
|
||||
if (st->bucket > tcp_hashinfo.lhash2_mask)
|
||||
break;
|
||||
st->state = TCP_SEQ_STATE_LISTENING;
|
||||
rc = listening_get_next(seq, NULL);
|
||||
while (offset-- && rc)
|
||||
rc = listening_get_first(seq);
|
||||
while (offset-- && rc && bucket == st->bucket)
|
||||
rc = listening_get_next(seq, rc);
|
||||
if (rc)
|
||||
break;
|
||||
@ -2472,7 +2472,7 @@ static void *tcp_seek_last_pos(struct seq_file *seq)
|
||||
if (st->bucket > tcp_hashinfo.ehash_mask)
|
||||
break;
|
||||
rc = established_get_first(seq);
|
||||
while (offset-- && rc)
|
||||
while (offset-- && rc && bucket == st->bucket)
|
||||
rc = established_get_next(seq, rc);
|
||||
}
|
||||
|
||||
@ -2542,7 +2542,7 @@ void tcp_seq_stop(struct seq_file *seq, void *v)
|
||||
switch (st->state) {
|
||||
case TCP_SEQ_STATE_LISTENING:
|
||||
if (v != SEQ_START_TOKEN)
|
||||
spin_unlock(&tcp_hashinfo.listening_hash[st->bucket].lock);
|
||||
spin_unlock(&tcp_hashinfo.lhash2[st->bucket].lock);
|
||||
break;
|
||||
case TCP_SEQ_STATE_ESTABLISHED:
|
||||
if (v)
|
||||
@ -2687,6 +2687,15 @@ out:
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BPF_SYSCALL
|
||||
struct bpf_tcp_iter_state {
|
||||
struct tcp_iter_state state;
|
||||
unsigned int cur_sk;
|
||||
unsigned int end_sk;
|
||||
unsigned int max_sk;
|
||||
struct sock **batch;
|
||||
bool st_bucket_done;
|
||||
};
|
||||
|
||||
struct bpf_iter__tcp {
|
||||
__bpf_md_ptr(struct bpf_iter_meta *, meta);
|
||||
__bpf_md_ptr(struct sock_common *, sk_common);
|
||||
@ -2705,16 +2714,204 @@ static int tcp_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
|
||||
return bpf_iter_run_prog(prog, &ctx);
|
||||
}
|
||||
|
||||
static void bpf_iter_tcp_put_batch(struct bpf_tcp_iter_state *iter)
|
||||
{
|
||||
while (iter->cur_sk < iter->end_sk)
|
||||
sock_put(iter->batch[iter->cur_sk++]);
|
||||
}
|
||||
|
||||
static int bpf_iter_tcp_realloc_batch(struct bpf_tcp_iter_state *iter,
|
||||
unsigned int new_batch_sz)
|
||||
{
|
||||
struct sock **new_batch;
|
||||
|
||||
new_batch = kvmalloc(sizeof(*new_batch) * new_batch_sz,
|
||||
GFP_USER | __GFP_NOWARN);
|
||||
if (!new_batch)
|
||||
return -ENOMEM;
|
||||
|
||||
bpf_iter_tcp_put_batch(iter);
|
||||
kvfree(iter->batch);
|
||||
iter->batch = new_batch;
|
||||
iter->max_sk = new_batch_sz;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static unsigned int bpf_iter_tcp_listening_batch(struct seq_file *seq,
|
||||
struct sock *start_sk)
|
||||
{
|
||||
struct bpf_tcp_iter_state *iter = seq->private;
|
||||
struct tcp_iter_state *st = &iter->state;
|
||||
struct inet_connection_sock *icsk;
|
||||
unsigned int expected = 1;
|
||||
struct sock *sk;
|
||||
|
||||
sock_hold(start_sk);
|
||||
iter->batch[iter->end_sk++] = start_sk;
|
||||
|
||||
icsk = inet_csk(start_sk);
|
||||
inet_lhash2_for_each_icsk_continue(icsk) {
|
||||
sk = (struct sock *)icsk;
|
||||
if (seq_sk_match(seq, sk)) {
|
||||
if (iter->end_sk < iter->max_sk) {
|
||||
sock_hold(sk);
|
||||
iter->batch[iter->end_sk++] = sk;
|
||||
}
|
||||
expected++;
|
||||
}
|
||||
}
|
||||
spin_unlock(&tcp_hashinfo.lhash2[st->bucket].lock);
|
||||
|
||||
return expected;
|
||||
}
|
||||
|
||||
static unsigned int bpf_iter_tcp_established_batch(struct seq_file *seq,
|
||||
struct sock *start_sk)
|
||||
{
|
||||
struct bpf_tcp_iter_state *iter = seq->private;
|
||||
struct tcp_iter_state *st = &iter->state;
|
||||
struct hlist_nulls_node *node;
|
||||
unsigned int expected = 1;
|
||||
struct sock *sk;
|
||||
|
||||
sock_hold(start_sk);
|
||||
iter->batch[iter->end_sk++] = start_sk;
|
||||
|
||||
sk = sk_nulls_next(start_sk);
|
||||
sk_nulls_for_each_from(sk, node) {
|
||||
if (seq_sk_match(seq, sk)) {
|
||||
if (iter->end_sk < iter->max_sk) {
|
||||
sock_hold(sk);
|
||||
iter->batch[iter->end_sk++] = sk;
|
||||
}
|
||||
expected++;
|
||||
}
|
||||
}
|
||||
spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
|
||||
|
||||
return expected;
|
||||
}
|
||||
|
||||
static struct sock *bpf_iter_tcp_batch(struct seq_file *seq)
|
||||
{
|
||||
struct bpf_tcp_iter_state *iter = seq->private;
|
||||
struct tcp_iter_state *st = &iter->state;
|
||||
unsigned int expected;
|
||||
bool resized = false;
|
||||
struct sock *sk;
|
||||
|
||||
/* The st->bucket is done. Directly advance to the next
|
||||
* bucket instead of having the tcp_seek_last_pos() to skip
|
||||
* one by one in the current bucket and eventually find out
|
||||
* it has to advance to the next bucket.
|
||||
*/
|
||||
if (iter->st_bucket_done) {
|
||||
st->offset = 0;
|
||||
st->bucket++;
|
||||
if (st->state == TCP_SEQ_STATE_LISTENING &&
|
||||
st->bucket > tcp_hashinfo.lhash2_mask) {
|
||||
st->state = TCP_SEQ_STATE_ESTABLISHED;
|
||||
st->bucket = 0;
|
||||
}
|
||||
}
|
||||
|
||||
again:
|
||||
/* Get a new batch */
|
||||
iter->cur_sk = 0;
|
||||
iter->end_sk = 0;
|
||||
iter->st_bucket_done = false;
|
||||
|
||||
sk = tcp_seek_last_pos(seq);
|
||||
if (!sk)
|
||||
return NULL; /* Done */
|
||||
|
||||
if (st->state == TCP_SEQ_STATE_LISTENING)
|
||||
expected = bpf_iter_tcp_listening_batch(seq, sk);
|
||||
else
|
||||
expected = bpf_iter_tcp_established_batch(seq, sk);
|
||||
|
||||
if (iter->end_sk == expected) {
|
||||
iter->st_bucket_done = true;
|
||||
return sk;
|
||||
}
|
||||
|
||||
if (!resized && !bpf_iter_tcp_realloc_batch(iter, expected * 3 / 2)) {
|
||||
resized = true;
|
||||
goto again;
|
||||
}
|
||||
|
||||
return sk;
|
||||
}
|
||||
|
||||
static void *bpf_iter_tcp_seq_start(struct seq_file *seq, loff_t *pos)
|
||||
{
|
||||
/* bpf iter does not support lseek, so it always
|
||||
* continue from where it was stop()-ped.
|
||||
*/
|
||||
if (*pos)
|
||||
return bpf_iter_tcp_batch(seq);
|
||||
|
||||
return SEQ_START_TOKEN;
|
||||
}
|
||||
|
||||
static void *bpf_iter_tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
|
||||
{
|
||||
struct bpf_tcp_iter_state *iter = seq->private;
|
||||
struct tcp_iter_state *st = &iter->state;
|
||||
struct sock *sk;
|
||||
|
||||
/* Whenever seq_next() is called, the iter->cur_sk is
|
||||
* done with seq_show(), so advance to the next sk in
|
||||
* the batch.
|
||||
*/
|
||||
if (iter->cur_sk < iter->end_sk) {
|
||||
/* Keeping st->num consistent in tcp_iter_state.
|
||||
* bpf_iter_tcp does not use st->num.
|
||||
* meta.seq_num is used instead.
|
||||
*/
|
||||
st->num++;
|
||||
/* Move st->offset to the next sk in the bucket such that
|
||||
* the future start() will resume at st->offset in
|
||||
* st->bucket. See tcp_seek_last_pos().
|
||||
*/
|
||||
st->offset++;
|
||||
sock_put(iter->batch[iter->cur_sk++]);
|
||||
}
|
||||
|
||||
if (iter->cur_sk < iter->end_sk)
|
||||
sk = iter->batch[iter->cur_sk];
|
||||
else
|
||||
sk = bpf_iter_tcp_batch(seq);
|
||||
|
||||
++*pos;
|
||||
/* Keeping st->last_pos consistent in tcp_iter_state.
|
||||
* bpf iter does not do lseek, so st->last_pos always equals to *pos.
|
||||
*/
|
||||
st->last_pos = *pos;
|
||||
return sk;
|
||||
}
|
||||
|
||||
static int bpf_iter_tcp_seq_show(struct seq_file *seq, void *v)
|
||||
{
|
||||
struct bpf_iter_meta meta;
|
||||
struct bpf_prog *prog;
|
||||
struct sock *sk = v;
|
||||
bool slow;
|
||||
uid_t uid;
|
||||
int ret;
|
||||
|
||||
if (v == SEQ_START_TOKEN)
|
||||
return 0;
|
||||
|
||||
if (sk_fullsock(sk))
|
||||
slow = lock_sock_fast(sk);
|
||||
|
||||
if (unlikely(sk_unhashed(sk))) {
|
||||
ret = SEQ_SKIP;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
if (sk->sk_state == TCP_TIME_WAIT) {
|
||||
uid = 0;
|
||||
} else if (sk->sk_state == TCP_NEW_SYN_RECV) {
|
||||
@ -2728,11 +2925,18 @@ static int bpf_iter_tcp_seq_show(struct seq_file *seq, void *v)
|
||||
|
||||
meta.seq = seq;
|
||||
prog = bpf_iter_get_info(&meta, false);
|
||||
return tcp_prog_seq_show(prog, &meta, v, uid);
|
||||
ret = tcp_prog_seq_show(prog, &meta, v, uid);
|
||||
|
||||
unlock:
|
||||
if (sk_fullsock(sk))
|
||||
unlock_sock_fast(sk, slow);
|
||||
return ret;
|
||||
|
||||
}
|
||||
|
||||
static void bpf_iter_tcp_seq_stop(struct seq_file *seq, void *v)
|
||||
{
|
||||
struct bpf_tcp_iter_state *iter = seq->private;
|
||||
struct bpf_iter_meta meta;
|
||||
struct bpf_prog *prog;
|
||||
|
||||
@ -2743,16 +2947,33 @@ static void bpf_iter_tcp_seq_stop(struct seq_file *seq, void *v)
|
||||
(void)tcp_prog_seq_show(prog, &meta, v, 0);
|
||||
}
|
||||
|
||||
tcp_seq_stop(seq, v);
|
||||
if (iter->cur_sk < iter->end_sk) {
|
||||
bpf_iter_tcp_put_batch(iter);
|
||||
iter->st_bucket_done = false;
|
||||
}
|
||||
}
|
||||
|
||||
static const struct seq_operations bpf_iter_tcp_seq_ops = {
|
||||
.show = bpf_iter_tcp_seq_show,
|
||||
.start = tcp_seq_start,
|
||||
.next = tcp_seq_next,
|
||||
.start = bpf_iter_tcp_seq_start,
|
||||
.next = bpf_iter_tcp_seq_next,
|
||||
.stop = bpf_iter_tcp_seq_stop,
|
||||
};
|
||||
#endif
|
||||
static unsigned short seq_file_family(const struct seq_file *seq)
|
||||
{
|
||||
const struct tcp_seq_afinfo *afinfo;
|
||||
|
||||
#ifdef CONFIG_BPF_SYSCALL
|
||||
/* Iterated from bpf_iter. Let the bpf prog to filter instead. */
|
||||
if (seq->op == &bpf_iter_tcp_seq_ops)
|
||||
return AF_UNSPEC;
|
||||
#endif
|
||||
|
||||
/* Iterated from proc fs */
|
||||
afinfo = PDE_DATA(file_inode(seq->file));
|
||||
return afinfo->family;
|
||||
}
|
||||
|
||||
static const struct seq_operations tcp4_seq_ops = {
|
||||
.show = tcp4_seq_show,
|
||||
@ -3003,39 +3224,55 @@ static struct pernet_operations __net_initdata tcp_sk_ops = {
|
||||
DEFINE_BPF_ITER_FUNC(tcp, struct bpf_iter_meta *meta,
|
||||
struct sock_common *sk_common, uid_t uid)
|
||||
|
||||
#define INIT_BATCH_SZ 16
|
||||
|
||||
static int bpf_iter_init_tcp(void *priv_data, struct bpf_iter_aux_info *aux)
|
||||
{
|
||||
struct tcp_iter_state *st = priv_data;
|
||||
struct tcp_seq_afinfo *afinfo;
|
||||
int ret;
|
||||
struct bpf_tcp_iter_state *iter = priv_data;
|
||||
int err;
|
||||
|
||||
afinfo = kmalloc(sizeof(*afinfo), GFP_USER | __GFP_NOWARN);
|
||||
if (!afinfo)
|
||||
return -ENOMEM;
|
||||
err = bpf_iter_init_seq_net(priv_data, aux);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
afinfo->family = AF_UNSPEC;
|
||||
st->bpf_seq_afinfo = afinfo;
|
||||
ret = bpf_iter_init_seq_net(priv_data, aux);
|
||||
if (ret)
|
||||
kfree(afinfo);
|
||||
return ret;
|
||||
err = bpf_iter_tcp_realloc_batch(iter, INIT_BATCH_SZ);
|
||||
if (err) {
|
||||
bpf_iter_fini_seq_net(priv_data);
|
||||
return err;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void bpf_iter_fini_tcp(void *priv_data)
|
||||
{
|
||||
struct tcp_iter_state *st = priv_data;
|
||||
struct bpf_tcp_iter_state *iter = priv_data;
|
||||
|
||||
kfree(st->bpf_seq_afinfo);
|
||||
bpf_iter_fini_seq_net(priv_data);
|
||||
kvfree(iter->batch);
|
||||
}
|
||||
|
||||
static const struct bpf_iter_seq_info tcp_seq_info = {
|
||||
.seq_ops = &bpf_iter_tcp_seq_ops,
|
||||
.init_seq_private = bpf_iter_init_tcp,
|
||||
.fini_seq_private = bpf_iter_fini_tcp,
|
||||
.seq_priv_size = sizeof(struct tcp_iter_state),
|
||||
.seq_priv_size = sizeof(struct bpf_tcp_iter_state),
|
||||
};
|
||||
|
||||
static const struct bpf_func_proto *
|
||||
bpf_iter_tcp_get_func_proto(enum bpf_func_id func_id,
|
||||
const struct bpf_prog *prog)
|
||||
{
|
||||
switch (func_id) {
|
||||
case BPF_FUNC_setsockopt:
|
||||
return &bpf_sk_setsockopt_proto;
|
||||
case BPF_FUNC_getsockopt:
|
||||
return &bpf_sk_getsockopt_proto;
|
||||
default:
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static struct bpf_iter_reg tcp_reg_info = {
|
||||
.target = "tcp",
|
||||
.ctx_arg_info_size = 1,
|
||||
@ -3043,6 +3280,7 @@ static struct bpf_iter_reg tcp_reg_info = {
|
||||
{ offsetof(struct bpf_iter__tcp, sk_common),
|
||||
PTR_TO_BTF_ID_OR_NULL },
|
||||
},
|
||||
.get_func_proto = bpf_iter_tcp_get_func_proto,
|
||||
.seq_info = &tcp_seq_info,
|
||||
};
|
||||
|
||||
|
@ -66,17 +66,13 @@ int settimeo(int fd, int timeout_ms)
|
||||
|
||||
#define save_errno_close(fd) ({ int __save = errno; close(fd); errno = __save; })
|
||||
|
||||
int start_server(int family, int type, const char *addr_str, __u16 port,
|
||||
int timeout_ms)
|
||||
static int __start_server(int type, const struct sockaddr *addr,
|
||||
socklen_t addrlen, int timeout_ms, bool reuseport)
|
||||
{
|
||||
struct sockaddr_storage addr = {};
|
||||
socklen_t len;
|
||||
int on = 1;
|
||||
int fd;
|
||||
|
||||
if (make_sockaddr(family, addr_str, port, &addr, &len))
|
||||
return -1;
|
||||
|
||||
fd = socket(family, type, 0);
|
||||
fd = socket(addr->sa_family, type, 0);
|
||||
if (fd < 0) {
|
||||
log_err("Failed to create server socket");
|
||||
return -1;
|
||||
@ -85,7 +81,13 @@ int start_server(int family, int type, const char *addr_str, __u16 port,
|
||||
if (settimeo(fd, timeout_ms))
|
||||
goto error_close;
|
||||
|
||||
if (bind(fd, (const struct sockaddr *)&addr, len) < 0) {
|
||||
if (reuseport &&
|
||||
setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &on, sizeof(on))) {
|
||||
log_err("Failed to set SO_REUSEPORT");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (bind(fd, addr, addrlen) < 0) {
|
||||
log_err("Failed to bind socket");
|
||||
goto error_close;
|
||||
}
|
||||
@ -104,6 +106,69 @@ error_close:
|
||||
return -1;
|
||||
}
|
||||
|
||||
int start_server(int family, int type, const char *addr_str, __u16 port,
|
||||
int timeout_ms)
|
||||
{
|
||||
struct sockaddr_storage addr;
|
||||
socklen_t addrlen;
|
||||
|
||||
if (make_sockaddr(family, addr_str, port, &addr, &addrlen))
|
||||
return -1;
|
||||
|
||||
return __start_server(type, (struct sockaddr *)&addr,
|
||||
addrlen, timeout_ms, false);
|
||||
}
|
||||
|
||||
int *start_reuseport_server(int family, int type, const char *addr_str,
|
||||
__u16 port, int timeout_ms, unsigned int nr_listens)
|
||||
{
|
||||
struct sockaddr_storage addr;
|
||||
unsigned int nr_fds = 0;
|
||||
socklen_t addrlen;
|
||||
int *fds;
|
||||
|
||||
if (!nr_listens)
|
||||
return NULL;
|
||||
|
||||
if (make_sockaddr(family, addr_str, port, &addr, &addrlen))
|
||||
return NULL;
|
||||
|
||||
fds = malloc(sizeof(*fds) * nr_listens);
|
||||
if (!fds)
|
||||
return NULL;
|
||||
|
||||
fds[0] = __start_server(type, (struct sockaddr *)&addr, addrlen,
|
||||
timeout_ms, true);
|
||||
if (fds[0] == -1)
|
||||
goto close_fds;
|
||||
nr_fds = 1;
|
||||
|
||||
if (getsockname(fds[0], (struct sockaddr *)&addr, &addrlen))
|
||||
goto close_fds;
|
||||
|
||||
for (; nr_fds < nr_listens; nr_fds++) {
|
||||
fds[nr_fds] = __start_server(type, (struct sockaddr *)&addr,
|
||||
addrlen, timeout_ms, true);
|
||||
if (fds[nr_fds] == -1)
|
||||
goto close_fds;
|
||||
}
|
||||
|
||||
return fds;
|
||||
|
||||
close_fds:
|
||||
free_fds(fds, nr_fds);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void free_fds(int *fds, unsigned int nr_close_fds)
|
||||
{
|
||||
if (fds) {
|
||||
while (nr_close_fds)
|
||||
close(fds[--nr_close_fds]);
|
||||
free(fds);
|
||||
}
|
||||
}
|
||||
|
||||
int fastopen_connect(int server_fd, const char *data, unsigned int data_len,
|
||||
int timeout_ms)
|
||||
{
|
||||
@ -217,6 +282,7 @@ int make_sockaddr(int family, const char *addr_str, __u16 port,
|
||||
if (family == AF_INET) {
|
||||
struct sockaddr_in *sin = (void *)addr;
|
||||
|
||||
memset(addr, 0, sizeof(*sin));
|
||||
sin->sin_family = AF_INET;
|
||||
sin->sin_port = htons(port);
|
||||
if (addr_str &&
|
||||
@ -230,6 +296,7 @@ int make_sockaddr(int family, const char *addr_str, __u16 port,
|
||||
} else if (family == AF_INET6) {
|
||||
struct sockaddr_in6 *sin6 = (void *)addr;
|
||||
|
||||
memset(addr, 0, sizeof(*sin6));
|
||||
sin6->sin6_family = AF_INET6;
|
||||
sin6->sin6_port = htons(port);
|
||||
if (addr_str &&
|
||||
|
@ -36,6 +36,10 @@ extern struct ipv6_packet pkt_v6;
|
||||
int settimeo(int fd, int timeout_ms);
|
||||
int start_server(int family, int type, const char *addr, __u16 port,
|
||||
int timeout_ms);
|
||||
int *start_reuseport_server(int family, int type, const char *addr_str,
|
||||
__u16 port, int timeout_ms,
|
||||
unsigned int nr_listens);
|
||||
void free_fds(int *fds, unsigned int nr_close_fds);
|
||||
int connect_to_fd(int server_fd, int timeout_ms);
|
||||
int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms);
|
||||
int fastopen_connect(int server_fd, const char *data, unsigned int data_len,
|
||||
|
226
tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt.c
Normal file
226
tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt.c
Normal file
@ -0,0 +1,226 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2021 Facebook */
|
||||
#define _GNU_SOURCE
|
||||
#include <sched.h>
|
||||
#include <test_progs.h>
|
||||
#include "network_helpers.h"
|
||||
#include "bpf_dctcp.skel.h"
|
||||
#include "bpf_cubic.skel.h"
|
||||
#include "bpf_iter_setsockopt.skel.h"
|
||||
|
||||
static int create_netns(void)
|
||||
{
|
||||
if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns"))
|
||||
return -1;
|
||||
|
||||
if (!ASSERT_OK(system("ip link set dev lo up"), "bring up lo"))
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static unsigned int set_bpf_cubic(int *fds, unsigned int nr_fds)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0; i < nr_fds; i++) {
|
||||
if (setsockopt(fds[i], SOL_TCP, TCP_CONGESTION, "bpf_cubic",
|
||||
sizeof("bpf_cubic")))
|
||||
return i;
|
||||
}
|
||||
|
||||
return nr_fds;
|
||||
}
|
||||
|
||||
static unsigned int check_bpf_dctcp(int *fds, unsigned int nr_fds)
|
||||
{
|
||||
char tcp_cc[16];
|
||||
socklen_t optlen = sizeof(tcp_cc);
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0; i < nr_fds; i++) {
|
||||
if (getsockopt(fds[i], SOL_TCP, TCP_CONGESTION,
|
||||
tcp_cc, &optlen) ||
|
||||
strcmp(tcp_cc, "bpf_dctcp"))
|
||||
return i;
|
||||
}
|
||||
|
||||
return nr_fds;
|
||||
}
|
||||
|
||||
static int *make_established(int listen_fd, unsigned int nr_est,
|
||||
int **paccepted_fds)
|
||||
{
|
||||
int *est_fds, *accepted_fds;
|
||||
unsigned int i;
|
||||
|
||||
est_fds = malloc(sizeof(*est_fds) * nr_est);
|
||||
if (!est_fds)
|
||||
return NULL;
|
||||
|
||||
accepted_fds = malloc(sizeof(*accepted_fds) * nr_est);
|
||||
if (!accepted_fds) {
|
||||
free(est_fds);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
for (i = 0; i < nr_est; i++) {
|
||||
est_fds[i] = connect_to_fd(listen_fd, 0);
|
||||
if (est_fds[i] == -1)
|
||||
break;
|
||||
if (set_bpf_cubic(&est_fds[i], 1) != 1) {
|
||||
close(est_fds[i]);
|
||||
break;
|
||||
}
|
||||
|
||||
accepted_fds[i] = accept(listen_fd, NULL, 0);
|
||||
if (accepted_fds[i] == -1) {
|
||||
close(est_fds[i]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!ASSERT_EQ(i, nr_est, "create established fds")) {
|
||||
free_fds(accepted_fds, i);
|
||||
free_fds(est_fds, i);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
*paccepted_fds = accepted_fds;
|
||||
return est_fds;
|
||||
}
|
||||
|
||||
static unsigned short get_local_port(int fd)
|
||||
{
|
||||
struct sockaddr_in6 addr;
|
||||
socklen_t addrlen = sizeof(addr);
|
||||
|
||||
if (!getsockname(fd, &addr, &addrlen))
|
||||
return ntohs(addr.sin6_port);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void do_bpf_iter_setsockopt(struct bpf_iter_setsockopt *iter_skel,
|
||||
bool random_retry)
|
||||
{
|
||||
int *reuse_listen_fds = NULL, *accepted_fds = NULL, *est_fds = NULL;
|
||||
unsigned int nr_reuse_listens = 256, nr_est = 256;
|
||||
int err, iter_fd = -1, listen_fd = -1;
|
||||
char buf;
|
||||
|
||||
/* Prepare non-reuseport listen_fd */
|
||||
listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
|
||||
if (!ASSERT_GE(listen_fd, 0, "start_server"))
|
||||
return;
|
||||
if (!ASSERT_EQ(set_bpf_cubic(&listen_fd, 1), 1,
|
||||
"set listen_fd to cubic"))
|
||||
goto done;
|
||||
iter_skel->bss->listen_hport = get_local_port(listen_fd);
|
||||
if (!ASSERT_NEQ(iter_skel->bss->listen_hport, 0,
|
||||
"get_local_port(listen_fd)"))
|
||||
goto done;
|
||||
|
||||
/* Connect to non-reuseport listen_fd */
|
||||
est_fds = make_established(listen_fd, nr_est, &accepted_fds);
|
||||
if (!ASSERT_OK_PTR(est_fds, "create established"))
|
||||
goto done;
|
||||
|
||||
/* Prepare reuseport listen fds */
|
||||
reuse_listen_fds = start_reuseport_server(AF_INET6, SOCK_STREAM,
|
||||
"::1", 0, 0,
|
||||
nr_reuse_listens);
|
||||
if (!ASSERT_OK_PTR(reuse_listen_fds, "start_reuseport_server"))
|
||||
goto done;
|
||||
if (!ASSERT_EQ(set_bpf_cubic(reuse_listen_fds, nr_reuse_listens),
|
||||
nr_reuse_listens, "set reuse_listen_fds to cubic"))
|
||||
goto done;
|
||||
iter_skel->bss->reuse_listen_hport = get_local_port(reuse_listen_fds[0]);
|
||||
if (!ASSERT_NEQ(iter_skel->bss->reuse_listen_hport, 0,
|
||||
"get_local_port(reuse_listen_fds[0])"))
|
||||
goto done;
|
||||
|
||||
/* Run bpf tcp iter to switch from bpf_cubic to bpf_dctcp */
|
||||
iter_skel->bss->random_retry = random_retry;
|
||||
iter_fd = bpf_iter_create(bpf_link__fd(iter_skel->links.change_tcp_cc));
|
||||
if (!ASSERT_GE(iter_fd, 0, "create iter_fd"))
|
||||
goto done;
|
||||
|
||||
while ((err = read(iter_fd, &buf, sizeof(buf))) == -1 &&
|
||||
errno == EAGAIN)
|
||||
;
|
||||
if (!ASSERT_OK(err, "read iter error"))
|
||||
goto done;
|
||||
|
||||
/* Check reuseport listen fds for dctcp */
|
||||
ASSERT_EQ(check_bpf_dctcp(reuse_listen_fds, nr_reuse_listens),
|
||||
nr_reuse_listens,
|
||||
"check reuse_listen_fds dctcp");
|
||||
|
||||
/* Check non reuseport listen fd for dctcp */
|
||||
ASSERT_EQ(check_bpf_dctcp(&listen_fd, 1), 1,
|
||||
"check listen_fd dctcp");
|
||||
|
||||
/* Check established fds for dctcp */
|
||||
ASSERT_EQ(check_bpf_dctcp(est_fds, nr_est), nr_est,
|
||||
"check est_fds dctcp");
|
||||
|
||||
/* Check accepted fds for dctcp */
|
||||
ASSERT_EQ(check_bpf_dctcp(accepted_fds, nr_est), nr_est,
|
||||
"check accepted_fds dctcp");
|
||||
|
||||
done:
|
||||
if (iter_fd != -1)
|
||||
close(iter_fd);
|
||||
if (listen_fd != -1)
|
||||
close(listen_fd);
|
||||
free_fds(reuse_listen_fds, nr_reuse_listens);
|
||||
free_fds(accepted_fds, nr_est);
|
||||
free_fds(est_fds, nr_est);
|
||||
}
|
||||
|
||||
void test_bpf_iter_setsockopt(void)
|
||||
{
|
||||
struct bpf_iter_setsockopt *iter_skel = NULL;
|
||||
struct bpf_cubic *cubic_skel = NULL;
|
||||
struct bpf_dctcp *dctcp_skel = NULL;
|
||||
struct bpf_link *cubic_link = NULL;
|
||||
struct bpf_link *dctcp_link = NULL;
|
||||
|
||||
if (create_netns())
|
||||
return;
|
||||
|
||||
/* Load iter_skel */
|
||||
iter_skel = bpf_iter_setsockopt__open_and_load();
|
||||
if (!ASSERT_OK_PTR(iter_skel, "iter_skel"))
|
||||
return;
|
||||
iter_skel->links.change_tcp_cc = bpf_program__attach_iter(iter_skel->progs.change_tcp_cc, NULL);
|
||||
if (!ASSERT_OK_PTR(iter_skel->links.change_tcp_cc, "attach iter"))
|
||||
goto done;
|
||||
|
||||
/* Load bpf_cubic */
|
||||
cubic_skel = bpf_cubic__open_and_load();
|
||||
if (!ASSERT_OK_PTR(cubic_skel, "cubic_skel"))
|
||||
goto done;
|
||||
cubic_link = bpf_map__attach_struct_ops(cubic_skel->maps.cubic);
|
||||
if (!ASSERT_OK_PTR(cubic_link, "cubic_link"))
|
||||
goto done;
|
||||
|
||||
/* Load bpf_dctcp */
|
||||
dctcp_skel = bpf_dctcp__open_and_load();
|
||||
if (!ASSERT_OK_PTR(dctcp_skel, "dctcp_skel"))
|
||||
goto done;
|
||||
dctcp_link = bpf_map__attach_struct_ops(dctcp_skel->maps.dctcp);
|
||||
if (!ASSERT_OK_PTR(dctcp_link, "dctcp_link"))
|
||||
goto done;
|
||||
|
||||
do_bpf_iter_setsockopt(iter_skel, true);
|
||||
do_bpf_iter_setsockopt(iter_skel, false);
|
||||
|
||||
done:
|
||||
bpf_link__destroy(cubic_link);
|
||||
bpf_link__destroy(dctcp_link);
|
||||
bpf_cubic__destroy(cubic_skel);
|
||||
bpf_dctcp__destroy(dctcp_skel);
|
||||
bpf_iter_setsockopt__destroy(iter_skel);
|
||||
}
|
72
tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c
Normal file
72
tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c
Normal file
@ -0,0 +1,72 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2021 Facebook */
|
||||
#include "bpf_iter.h"
|
||||
#include "bpf_tracing_net.h"
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <bpf/bpf_endian.h>
|
||||
|
||||
#define bpf_tcp_sk(skc) ({ \
|
||||
struct sock_common *_skc = skc; \
|
||||
sk = NULL; \
|
||||
tp = NULL; \
|
||||
if (_skc) { \
|
||||
tp = bpf_skc_to_tcp_sock(_skc); \
|
||||
sk = (struct sock *)tp; \
|
||||
} \
|
||||
tp; \
|
||||
})
|
||||
|
||||
unsigned short reuse_listen_hport = 0;
|
||||
unsigned short listen_hport = 0;
|
||||
char cubic_cc[TCP_CA_NAME_MAX] = "bpf_cubic";
|
||||
char dctcp_cc[TCP_CA_NAME_MAX] = "bpf_dctcp";
|
||||
bool random_retry = false;
|
||||
|
||||
static bool tcp_cc_eq(const char *a, const char *b)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < TCP_CA_NAME_MAX; i++) {
|
||||
if (a[i] != b[i])
|
||||
return false;
|
||||
if (!a[i])
|
||||
break;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
SEC("iter/tcp")
|
||||
int change_tcp_cc(struct bpf_iter__tcp *ctx)
|
||||
{
|
||||
char cur_cc[TCP_CA_NAME_MAX];
|
||||
struct tcp_sock *tp;
|
||||
struct sock *sk;
|
||||
int ret;
|
||||
|
||||
if (!bpf_tcp_sk(ctx->sk_common))
|
||||
return 0;
|
||||
|
||||
if (sk->sk_family != AF_INET6 ||
|
||||
(sk->sk_state != TCP_LISTEN &&
|
||||
sk->sk_state != TCP_ESTABLISHED) ||
|
||||
(sk->sk_num != reuse_listen_hport &&
|
||||
sk->sk_num != listen_hport &&
|
||||
bpf_ntohs(sk->sk_dport) != listen_hport))
|
||||
return 0;
|
||||
|
||||
if (bpf_getsockopt(tp, SOL_TCP, TCP_CONGESTION,
|
||||
cur_cc, sizeof(cur_cc)))
|
||||
return 0;
|
||||
|
||||
if (!tcp_cc_eq(cur_cc, cubic_cc))
|
||||
return 0;
|
||||
|
||||
if (random_retry && bpf_get_prandom_u32() % 4 == 1)
|
||||
return 1;
|
||||
|
||||
bpf_setsockopt(tp, SOL_TCP, TCP_CONGESTION, dctcp_cc, sizeof(dctcp_cc));
|
||||
return 0;
|
||||
}
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
@ -5,6 +5,10 @@
|
||||
#define AF_INET 2
|
||||
#define AF_INET6 10
|
||||
|
||||
#define SOL_TCP 6
|
||||
#define TCP_CONGESTION 13
|
||||
#define TCP_CA_NAME_MAX 16
|
||||
|
||||
#define ICSK_TIME_RETRANS 1
|
||||
#define ICSK_TIME_PROBE0 3
|
||||
#define ICSK_TIME_LOSS_PROBE 5
|
||||
@ -32,6 +36,8 @@
|
||||
#define ir_v6_rmt_addr req.__req_common.skc_v6_daddr
|
||||
#define ir_v6_loc_addr req.__req_common.skc_v6_rcv_saddr
|
||||
|
||||
#define sk_num __sk_common.skc_num
|
||||
#define sk_dport __sk_common.skc_dport
|
||||
#define sk_family __sk_common.skc_family
|
||||
#define sk_rmem_alloc sk_backlog.rmem_alloc
|
||||
#define sk_refcnt __sk_common.skc_refcnt
|
||||
|
Loading…
x
Reference in New Issue
Block a user