Alexei Starovoitov says: ==================== pull-request: bpf 2021-11-09 We've added 7 non-merge commits during the last 3 day(s) which contain a total of 10 files changed, 174 insertions(+), 48 deletions(-). The main changes are: 1) Various sockmap fixes, from John and Jussi. 2) Fix out-of-bound issue with bpf_pseudo_func, from Martin. * https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf: bpf, sockmap: sk_skb data_end access incorrect when src_reg = dst_reg bpf: sockmap, strparser, and tls are reusing qdisc_skb_cb and colliding bpf, sockmap: Fix race in ingress receive verdict with redirect to self bpf, sockmap: Remove unhash handler for BPF sockmap usage bpf, sockmap: Use stricter sk state checks in sk_lookup_assign bpf: selftest: Trigger a DCE on the whole subprog bpf: Stop caching subprog index in the bpf_pseudo_func insn ==================== Link: https://lore.kernel.org/r/20211109215702.38350-1-alexei.starovoitov@gmail.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
commit
fceb07950a
@ -484,6 +484,12 @@ bpf_ctx_record_field_size(struct bpf_insn_access_aux *aux, u32 size)
|
||||
aux->ctx_field_size = size;
|
||||
}
|
||||
|
||||
static inline bool bpf_pseudo_func(const struct bpf_insn *insn)
|
||||
{
|
||||
return insn->code == (BPF_LD | BPF_IMM | BPF_DW) &&
|
||||
insn->src_reg == BPF_PSEUDO_FUNC;
|
||||
}
|
||||
|
||||
struct bpf_prog_ops {
|
||||
int (*test_run)(struct bpf_prog *prog, const union bpf_attr *kattr,
|
||||
union bpf_attr __user *uattr);
|
||||
|
@ -507,6 +507,18 @@ static inline bool sk_psock_strp_enabled(struct sk_psock *psock)
|
||||
return !!psock->saved_data_ready;
|
||||
}
|
||||
|
||||
static inline bool sk_is_tcp(const struct sock *sk)
|
||||
{
|
||||
return sk->sk_type == SOCK_STREAM &&
|
||||
sk->sk_protocol == IPPROTO_TCP;
|
||||
}
|
||||
|
||||
static inline bool sk_is_udp(const struct sock *sk)
|
||||
{
|
||||
return sk->sk_type == SOCK_DGRAM &&
|
||||
sk->sk_protocol == IPPROTO_UDP;
|
||||
}
|
||||
|
||||
#if IS_ENABLED(CONFIG_NET_SOCK_MSG)
|
||||
|
||||
#define BPF_F_STRPARSER (1UL << 1)
|
||||
|
@ -54,10 +54,28 @@ struct strp_msg {
|
||||
int offset;
|
||||
};
|
||||
|
||||
struct _strp_msg {
|
||||
/* Internal cb structure. struct strp_msg must be first for passing
|
||||
* to upper layer.
|
||||
*/
|
||||
struct strp_msg strp;
|
||||
int accum_len;
|
||||
};
|
||||
|
||||
struct sk_skb_cb {
|
||||
#define SK_SKB_CB_PRIV_LEN 20
|
||||
unsigned char data[SK_SKB_CB_PRIV_LEN];
|
||||
struct _strp_msg strp;
|
||||
/* temp_reg is a temporary register used for bpf_convert_data_end_access
|
||||
* when dst_reg == src_reg.
|
||||
*/
|
||||
u64 temp_reg;
|
||||
};
|
||||
|
||||
static inline struct strp_msg *strp_msg(struct sk_buff *skb)
|
||||
{
|
||||
return (struct strp_msg *)((void *)skb->cb +
|
||||
offsetof(struct qdisc_skb_cb, data));
|
||||
offsetof(struct sk_skb_cb, strp));
|
||||
}
|
||||
|
||||
/* Structure for an attached lower socket */
|
||||
|
@ -390,6 +390,13 @@ static int bpf_adj_branches(struct bpf_prog *prog, u32 pos, s32 end_old,
|
||||
i = end_new;
|
||||
insn = prog->insnsi + end_old;
|
||||
}
|
||||
if (bpf_pseudo_func(insn)) {
|
||||
ret = bpf_adj_delta_to_imm(insn, pos, end_old,
|
||||
end_new, i, probe_pass);
|
||||
if (ret)
|
||||
return ret;
|
||||
continue;
|
||||
}
|
||||
code = insn->code;
|
||||
if ((BPF_CLASS(code) != BPF_JMP &&
|
||||
BPF_CLASS(code) != BPF_JMP32) ||
|
||||
|
@ -240,12 +240,6 @@ static bool bpf_pseudo_kfunc_call(const struct bpf_insn *insn)
|
||||
insn->src_reg == BPF_PSEUDO_KFUNC_CALL;
|
||||
}
|
||||
|
||||
static bool bpf_pseudo_func(const struct bpf_insn *insn)
|
||||
{
|
||||
return insn->code == (BPF_LD | BPF_IMM | BPF_DW) &&
|
||||
insn->src_reg == BPF_PSEUDO_FUNC;
|
||||
}
|
||||
|
||||
struct bpf_call_arg_meta {
|
||||
struct bpf_map *map_ptr;
|
||||
bool raw_mode;
|
||||
@ -1960,16 +1954,10 @@ static int add_subprog_and_kfunc(struct bpf_verifier_env *env)
|
||||
return -EPERM;
|
||||
}
|
||||
|
||||
if (bpf_pseudo_func(insn)) {
|
||||
if (bpf_pseudo_func(insn) || bpf_pseudo_call(insn))
|
||||
ret = add_subprog(env, i + insn->imm + 1);
|
||||
if (ret >= 0)
|
||||
/* remember subprog */
|
||||
insn[1].imm = ret;
|
||||
} else if (bpf_pseudo_call(insn)) {
|
||||
ret = add_subprog(env, i + insn->imm + 1);
|
||||
} else {
|
||||
else
|
||||
ret = add_kfunc_call(env, insn->imm, insn->off);
|
||||
}
|
||||
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
@ -9387,7 +9375,8 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
|
||||
|
||||
if (insn->src_reg == BPF_PSEUDO_FUNC) {
|
||||
struct bpf_prog_aux *aux = env->prog->aux;
|
||||
u32 subprogno = insn[1].imm;
|
||||
u32 subprogno = find_subprog(env,
|
||||
env->insn_idx + insn->imm + 1);
|
||||
|
||||
if (!aux->func_info) {
|
||||
verbose(env, "missing btf func_info\n");
|
||||
@ -12557,14 +12546,9 @@ static int jit_subprogs(struct bpf_verifier_env *env)
|
||||
return 0;
|
||||
|
||||
for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
|
||||
if (bpf_pseudo_func(insn)) {
|
||||
env->insn_aux_data[i].call_imm = insn->imm;
|
||||
/* subprog is encoded in insn[1].imm */
|
||||
if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn))
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!bpf_pseudo_call(insn))
|
||||
continue;
|
||||
/* Upon error here we cannot fall back to interpreter but
|
||||
* need a hard reject of the program. Thus -EFAULT is
|
||||
* propagated in any case.
|
||||
@ -12585,6 +12569,12 @@ static int jit_subprogs(struct bpf_verifier_env *env)
|
||||
env->insn_aux_data[i].call_imm = insn->imm;
|
||||
/* point imm to __bpf_call_base+1 from JITs point of view */
|
||||
insn->imm = 1;
|
||||
if (bpf_pseudo_func(insn))
|
||||
/* jit (e.g. x86_64) may emit fewer instructions
|
||||
* if it learns a u32 imm is the same as a u64 imm.
|
||||
* Force a non zero here.
|
||||
*/
|
||||
insn[1].imm = 1;
|
||||
}
|
||||
|
||||
err = bpf_prog_alloc_jited_linfo(prog);
|
||||
@ -12669,7 +12659,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
|
||||
insn = func[i]->insnsi;
|
||||
for (j = 0; j < func[i]->len; j++, insn++) {
|
||||
if (bpf_pseudo_func(insn)) {
|
||||
subprog = insn[1].imm;
|
||||
subprog = insn->off;
|
||||
insn[0].imm = (u32)(long)func[subprog]->bpf_func;
|
||||
insn[1].imm = ((u64)(long)func[subprog]->bpf_func) >> 32;
|
||||
continue;
|
||||
@ -12720,7 +12710,8 @@ static int jit_subprogs(struct bpf_verifier_env *env)
|
||||
for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
|
||||
if (bpf_pseudo_func(insn)) {
|
||||
insn[0].imm = env->insn_aux_data[i].call_imm;
|
||||
insn[1].imm = find_subprog(env, i + insn[0].imm + 1);
|
||||
insn[1].imm = insn->off;
|
||||
insn->off = 0;
|
||||
continue;
|
||||
}
|
||||
if (!bpf_pseudo_call(insn))
|
||||
|
@ -9756,22 +9756,46 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
|
||||
static struct bpf_insn *bpf_convert_data_end_access(const struct bpf_insn *si,
|
||||
struct bpf_insn *insn)
|
||||
{
|
||||
/* si->dst_reg = skb->data */
|
||||
int reg;
|
||||
int temp_reg_off = offsetof(struct sk_buff, cb) +
|
||||
offsetof(struct sk_skb_cb, temp_reg);
|
||||
|
||||
if (si->src_reg == si->dst_reg) {
|
||||
/* We need an extra register, choose and save a register. */
|
||||
reg = BPF_REG_9;
|
||||
if (si->src_reg == reg || si->dst_reg == reg)
|
||||
reg--;
|
||||
if (si->src_reg == reg || si->dst_reg == reg)
|
||||
reg--;
|
||||
*insn++ = BPF_STX_MEM(BPF_DW, si->src_reg, reg, temp_reg_off);
|
||||
} else {
|
||||
reg = si->dst_reg;
|
||||
}
|
||||
|
||||
/* reg = skb->data */
|
||||
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data),
|
||||
si->dst_reg, si->src_reg,
|
||||
reg, si->src_reg,
|
||||
offsetof(struct sk_buff, data));
|
||||
/* AX = skb->len */
|
||||
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, len),
|
||||
BPF_REG_AX, si->src_reg,
|
||||
offsetof(struct sk_buff, len));
|
||||
/* si->dst_reg = skb->data + skb->len */
|
||||
*insn++ = BPF_ALU64_REG(BPF_ADD, si->dst_reg, BPF_REG_AX);
|
||||
/* reg = skb->data + skb->len */
|
||||
*insn++ = BPF_ALU64_REG(BPF_ADD, reg, BPF_REG_AX);
|
||||
/* AX = skb->data_len */
|
||||
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data_len),
|
||||
BPF_REG_AX, si->src_reg,
|
||||
offsetof(struct sk_buff, data_len));
|
||||
/* si->dst_reg = skb->data + skb->len - skb->data_len */
|
||||
*insn++ = BPF_ALU64_REG(BPF_SUB, si->dst_reg, BPF_REG_AX);
|
||||
|
||||
/* reg = skb->data + skb->len - skb->data_len */
|
||||
*insn++ = BPF_ALU64_REG(BPF_SUB, reg, BPF_REG_AX);
|
||||
|
||||
if (si->src_reg == si->dst_reg) {
|
||||
/* Restore the saved register */
|
||||
*insn++ = BPF_MOV64_REG(BPF_REG_AX, si->src_reg);
|
||||
*insn++ = BPF_MOV64_REG(si->dst_reg, reg);
|
||||
*insn++ = BPF_LDX_MEM(BPF_DW, reg, BPF_REG_AX, temp_reg_off);
|
||||
}
|
||||
|
||||
return insn;
|
||||
}
|
||||
@ -9782,11 +9806,33 @@ static u32 sk_skb_convert_ctx_access(enum bpf_access_type type,
|
||||
struct bpf_prog *prog, u32 *target_size)
|
||||
{
|
||||
struct bpf_insn *insn = insn_buf;
|
||||
int off;
|
||||
|
||||
switch (si->off) {
|
||||
case offsetof(struct __sk_buff, data_end):
|
||||
insn = bpf_convert_data_end_access(si, insn);
|
||||
break;
|
||||
case offsetof(struct __sk_buff, cb[0]) ...
|
||||
offsetofend(struct __sk_buff, cb[4]) - 1:
|
||||
BUILD_BUG_ON(sizeof_field(struct sk_skb_cb, data) < 20);
|
||||
BUILD_BUG_ON((offsetof(struct sk_buff, cb) +
|
||||
offsetof(struct sk_skb_cb, data)) %
|
||||
sizeof(__u64));
|
||||
|
||||
prog->cb_access = 1;
|
||||
off = si->off;
|
||||
off -= offsetof(struct __sk_buff, cb[0]);
|
||||
off += offsetof(struct sk_buff, cb);
|
||||
off += offsetof(struct sk_skb_cb, data);
|
||||
if (type == BPF_WRITE)
|
||||
*insn++ = BPF_STX_MEM(BPF_SIZE(si->code), si->dst_reg,
|
||||
si->src_reg, off);
|
||||
else
|
||||
*insn++ = BPF_LDX_MEM(BPF_SIZE(si->code), si->dst_reg,
|
||||
si->src_reg, off);
|
||||
break;
|
||||
|
||||
|
||||
default:
|
||||
return bpf_convert_ctx_access(type, si, insn_buf, prog,
|
||||
target_size);
|
||||
@ -10423,8 +10469,10 @@ BPF_CALL_3(bpf_sk_lookup_assign, struct bpf_sk_lookup_kern *, ctx,
|
||||
return -EINVAL;
|
||||
if (unlikely(sk && sk_is_refcounted(sk)))
|
||||
return -ESOCKTNOSUPPORT; /* reject non-RCU freed sockets */
|
||||
if (unlikely(sk && sk->sk_state == TCP_ESTABLISHED))
|
||||
return -ESOCKTNOSUPPORT; /* reject connected sockets */
|
||||
if (unlikely(sk && sk_is_tcp(sk) && sk->sk_state != TCP_LISTEN))
|
||||
return -ESOCKTNOSUPPORT; /* only accept TCP socket in LISTEN */
|
||||
if (unlikely(sk && sk_is_udp(sk) && sk->sk_state != TCP_CLOSE))
|
||||
return -ESOCKTNOSUPPORT; /* only accept UDP socket in CLOSE */
|
||||
|
||||
/* Check if socket is suitable for packet L3/L4 protocol */
|
||||
if (sk && sk->sk_protocol != ctx->protocol)
|
||||
|
@ -511,12 +511,6 @@ static bool sock_map_op_okay(const struct bpf_sock_ops_kern *ops)
|
||||
ops->op == BPF_SOCK_OPS_TCP_LISTEN_CB;
|
||||
}
|
||||
|
||||
static bool sk_is_tcp(const struct sock *sk)
|
||||
{
|
||||
return sk->sk_type == SOCK_STREAM &&
|
||||
sk->sk_protocol == IPPROTO_TCP;
|
||||
}
|
||||
|
||||
static bool sock_map_redirect_allowed(const struct sock *sk)
|
||||
{
|
||||
if (sk_is_tcp(sk))
|
||||
|
@ -172,6 +172,41 @@ static int tcp_msg_wait_data(struct sock *sk, struct sk_psock *psock,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int tcp_bpf_recvmsg_parser(struct sock *sk,
|
||||
struct msghdr *msg,
|
||||
size_t len,
|
||||
int nonblock,
|
||||
int flags,
|
||||
int *addr_len)
|
||||
{
|
||||
struct sk_psock *psock;
|
||||
int copied;
|
||||
|
||||
if (unlikely(flags & MSG_ERRQUEUE))
|
||||
return inet_recv_error(sk, msg, len, addr_len);
|
||||
|
||||
psock = sk_psock_get(sk);
|
||||
if (unlikely(!psock))
|
||||
return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
|
||||
|
||||
lock_sock(sk);
|
||||
msg_bytes_ready:
|
||||
copied = sk_msg_recvmsg(sk, psock, msg, len, flags);
|
||||
if (!copied) {
|
||||
long timeo;
|
||||
int data;
|
||||
|
||||
timeo = sock_rcvtimeo(sk, nonblock);
|
||||
data = tcp_msg_wait_data(sk, psock, timeo);
|
||||
if (data && !sk_psock_queue_empty(psock))
|
||||
goto msg_bytes_ready;
|
||||
copied = -EAGAIN;
|
||||
}
|
||||
release_sock(sk);
|
||||
sk_psock_put(sk, psock);
|
||||
return copied;
|
||||
}
|
||||
|
||||
static int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
|
||||
int nonblock, int flags, int *addr_len)
|
||||
{
|
||||
@ -464,6 +499,8 @@ enum {
|
||||
enum {
|
||||
TCP_BPF_BASE,
|
||||
TCP_BPF_TX,
|
||||
TCP_BPF_RX,
|
||||
TCP_BPF_TXRX,
|
||||
TCP_BPF_NUM_CFGS,
|
||||
};
|
||||
|
||||
@ -475,7 +512,6 @@ static void tcp_bpf_rebuild_protos(struct proto prot[TCP_BPF_NUM_CFGS],
|
||||
struct proto *base)
|
||||
{
|
||||
prot[TCP_BPF_BASE] = *base;
|
||||
prot[TCP_BPF_BASE].unhash = sock_map_unhash;
|
||||
prot[TCP_BPF_BASE].close = sock_map_close;
|
||||
prot[TCP_BPF_BASE].recvmsg = tcp_bpf_recvmsg;
|
||||
prot[TCP_BPF_BASE].sock_is_readable = sk_msg_is_readable;
|
||||
@ -483,6 +519,12 @@ static void tcp_bpf_rebuild_protos(struct proto prot[TCP_BPF_NUM_CFGS],
|
||||
prot[TCP_BPF_TX] = prot[TCP_BPF_BASE];
|
||||
prot[TCP_BPF_TX].sendmsg = tcp_bpf_sendmsg;
|
||||
prot[TCP_BPF_TX].sendpage = tcp_bpf_sendpage;
|
||||
|
||||
prot[TCP_BPF_RX] = prot[TCP_BPF_BASE];
|
||||
prot[TCP_BPF_RX].recvmsg = tcp_bpf_recvmsg_parser;
|
||||
|
||||
prot[TCP_BPF_TXRX] = prot[TCP_BPF_TX];
|
||||
prot[TCP_BPF_TXRX].recvmsg = tcp_bpf_recvmsg_parser;
|
||||
}
|
||||
|
||||
static void tcp_bpf_check_v6_needs_rebuild(struct proto *ops)
|
||||
@ -520,6 +562,10 @@ int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore)
|
||||
int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4;
|
||||
int config = psock->progs.msg_parser ? TCP_BPF_TX : TCP_BPF_BASE;
|
||||
|
||||
if (psock->progs.stream_verdict || psock->progs.skb_verdict) {
|
||||
config = (config == TCP_BPF_TX) ? TCP_BPF_TXRX : TCP_BPF_RX;
|
||||
}
|
||||
|
||||
if (restore) {
|
||||
if (inet_csk_has_ulp(sk)) {
|
||||
/* TLS does not have an unhash proto in SW cases,
|
||||
|
@ -27,18 +27,10 @@
|
||||
|
||||
static struct workqueue_struct *strp_wq;
|
||||
|
||||
struct _strp_msg {
|
||||
/* Internal cb structure. struct strp_msg must be first for passing
|
||||
* to upper layer.
|
||||
*/
|
||||
struct strp_msg strp;
|
||||
int accum_len;
|
||||
};
|
||||
|
||||
static inline struct _strp_msg *_strp_msg(struct sk_buff *skb)
|
||||
{
|
||||
return (struct _strp_msg *)((void *)skb->cb +
|
||||
offsetof(struct qdisc_skb_cb, data));
|
||||
offsetof(struct sk_skb_cb, strp));
|
||||
}
|
||||
|
||||
/* Lower lock held */
|
||||
|
@ -23,6 +23,16 @@ struct callback_ctx {
|
||||
int output;
|
||||
};
|
||||
|
||||
const volatile int bypass_unused = 1;
|
||||
|
||||
static __u64
|
||||
unused_subprog(struct bpf_map *map, __u32 *key, __u64 *val,
|
||||
struct callback_ctx *data)
|
||||
{
|
||||
data->output = 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static __u64
|
||||
check_array_elem(struct bpf_map *map, __u32 *key, __u64 *val,
|
||||
struct callback_ctx *data)
|
||||
@ -54,6 +64,8 @@ int test_pkt_access(struct __sk_buff *skb)
|
||||
|
||||
data.output = 0;
|
||||
bpf_for_each_map_elem(&arraymap, check_array_elem, &data, 0);
|
||||
if (!bypass_unused)
|
||||
bpf_for_each_map_elem(&arraymap, unused_subprog, &data, 0);
|
||||
arraymap_output = data.output;
|
||||
|
||||
bpf_for_each_map_elem(&percpu_map, check_percpu_elem, (void *)0, 0);
|
||||
|
Loading…
x
Reference in New Issue
Block a user