Merge branch 'Replace mono_delivery_time with tstamp_type'
Abhishek Chauhan says: ==================== Patch 1 :- This patch takes care of only renaming the mono delivery timestamp to tstamp_type with no change in functionality of existing available code in kernel also Starts assigning tstamp_type with either mono or real and introduces a new enum in the skbuff.h, again no change in functionality of the existing available code in kernel , just making the code scalable. Patch 2 :- Additional bit was added to support tai timestamp type to avoid tstamp drops in the forwarding path when testing TC-ETF. Patch is also updating bpf filter.c Some updates to bpf header files with introduction to BPF_SKB_CLOCK_TAI and documentation updates stating deprecation of BPF_SKB_TSTAMP_UNSPEC and BPF_SKB_TSTAMP_DELIVERY_MONO Patch 3:- Handles forwarding of UDP packets with TAI clock id tstamp_type type with supported changes for tc_redirect/tc_redirect_dtime to handle forwarding of UDP packets with TAI tstamp_type ==================== Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
This commit is contained in:
commit
ecec1887e2
@ -706,6 +706,13 @@ typedef unsigned int sk_buff_data_t;
|
||||
typedef unsigned char *sk_buff_data_t;
|
||||
#endif
|
||||
|
||||
enum skb_tstamp_type {
|
||||
SKB_CLOCK_REALTIME,
|
||||
SKB_CLOCK_MONOTONIC,
|
||||
SKB_CLOCK_TAI,
|
||||
__SKB_CLOCK_MAX = SKB_CLOCK_TAI,
|
||||
};
|
||||
|
||||
/**
|
||||
* DOC: Basic sk_buff geometry
|
||||
*
|
||||
@ -823,10 +830,8 @@ typedef unsigned char *sk_buff_data_t;
|
||||
* @dst_pending_confirm: need to confirm neighbour
|
||||
* @decrypted: Decrypted SKB
|
||||
* @slow_gro: state present at GRO time, slower prepare step required
|
||||
* @mono_delivery_time: When set, skb->tstamp has the
|
||||
* delivery_time in mono clock base (i.e. EDT). Otherwise, the
|
||||
* skb->tstamp has the (rcv) timestamp at ingress and
|
||||
* delivery_time at egress.
|
||||
* @tstamp_type: When set, skb->tstamp has the
|
||||
* delivery_time clock base of skb->tstamp.
|
||||
* @napi_id: id of the NAPI struct this skb came from
|
||||
* @sender_cpu: (aka @napi_id) source CPU in XPS
|
||||
* @alloc_cpu: CPU which did the skb allocation.
|
||||
@ -954,7 +959,7 @@ struct sk_buff {
|
||||
/* private: */
|
||||
__u8 __mono_tc_offset[0];
|
||||
/* public: */
|
||||
__u8 mono_delivery_time:1; /* See SKB_MONO_DELIVERY_TIME_MASK */
|
||||
__u8 tstamp_type:2; /* See skb_tstamp_type */
|
||||
#ifdef CONFIG_NET_XGRESS
|
||||
__u8 tc_at_ingress:1; /* See TC_AT_INGRESS_MASK */
|
||||
__u8 tc_skip_classify:1;
|
||||
@ -1084,15 +1089,16 @@ struct sk_buff {
|
||||
#endif
|
||||
#define PKT_TYPE_OFFSET offsetof(struct sk_buff, __pkt_type_offset)
|
||||
|
||||
/* if you move tc_at_ingress or mono_delivery_time
|
||||
/* if you move tc_at_ingress or tstamp_type
|
||||
* around, you also must adapt these constants.
|
||||
*/
|
||||
#ifdef __BIG_ENDIAN_BITFIELD
|
||||
#define SKB_MONO_DELIVERY_TIME_MASK (1 << 7)
|
||||
#define TC_AT_INGRESS_MASK (1 << 6)
|
||||
#define SKB_TSTAMP_TYPE_MASK (3 << 6)
|
||||
#define SKB_TSTAMP_TYPE_RSHIFT (6)
|
||||
#define TC_AT_INGRESS_MASK (1 << 5)
|
||||
#else
|
||||
#define SKB_MONO_DELIVERY_TIME_MASK (1 << 0)
|
||||
#define TC_AT_INGRESS_MASK (1 << 1)
|
||||
#define SKB_TSTAMP_TYPE_MASK (3)
|
||||
#define TC_AT_INGRESS_MASK (1 << 2)
|
||||
#endif
|
||||
#define SKB_BF_MONO_TC_OFFSET offsetof(struct sk_buff, __mono_tc_offset)
|
||||
|
||||
@ -4183,7 +4189,7 @@ static inline void skb_get_new_timestampns(const struct sk_buff *skb,
|
||||
static inline void __net_timestamp(struct sk_buff *skb)
|
||||
{
|
||||
skb->tstamp = ktime_get_real();
|
||||
skb->mono_delivery_time = 0;
|
||||
skb->tstamp_type = SKB_CLOCK_REALTIME;
|
||||
}
|
||||
|
||||
static inline ktime_t net_timedelta(ktime_t t)
|
||||
@ -4192,10 +4198,36 @@ static inline ktime_t net_timedelta(ktime_t t)
|
||||
}
|
||||
|
||||
static inline void skb_set_delivery_time(struct sk_buff *skb, ktime_t kt,
|
||||
bool mono)
|
||||
u8 tstamp_type)
|
||||
{
|
||||
skb->tstamp = kt;
|
||||
skb->mono_delivery_time = kt && mono;
|
||||
|
||||
if (kt)
|
||||
skb->tstamp_type = tstamp_type;
|
||||
else
|
||||
skb->tstamp_type = SKB_CLOCK_REALTIME;
|
||||
}
|
||||
|
||||
static inline void skb_set_delivery_type_by_clockid(struct sk_buff *skb,
|
||||
ktime_t kt, clockid_t clockid)
|
||||
{
|
||||
u8 tstamp_type = SKB_CLOCK_REALTIME;
|
||||
|
||||
switch (clockid) {
|
||||
case CLOCK_REALTIME:
|
||||
break;
|
||||
case CLOCK_MONOTONIC:
|
||||
tstamp_type = SKB_CLOCK_MONOTONIC;
|
||||
break;
|
||||
case CLOCK_TAI:
|
||||
tstamp_type = SKB_CLOCK_TAI;
|
||||
break;
|
||||
default:
|
||||
WARN_ON_ONCE(1);
|
||||
kt = 0;
|
||||
}
|
||||
|
||||
skb_set_delivery_time(skb, kt, tstamp_type);
|
||||
}
|
||||
|
||||
DECLARE_STATIC_KEY_FALSE(netstamp_needed_key);
|
||||
@ -4205,8 +4237,8 @@ DECLARE_STATIC_KEY_FALSE(netstamp_needed_key);
|
||||
*/
|
||||
static inline void skb_clear_delivery_time(struct sk_buff *skb)
|
||||
{
|
||||
if (skb->mono_delivery_time) {
|
||||
skb->mono_delivery_time = 0;
|
||||
if (skb->tstamp_type) {
|
||||
skb->tstamp_type = SKB_CLOCK_REALTIME;
|
||||
if (static_branch_unlikely(&netstamp_needed_key))
|
||||
skb->tstamp = ktime_get_real();
|
||||
else
|
||||
@ -4216,7 +4248,7 @@ static inline void skb_clear_delivery_time(struct sk_buff *skb)
|
||||
|
||||
static inline void skb_clear_tstamp(struct sk_buff *skb)
|
||||
{
|
||||
if (skb->mono_delivery_time)
|
||||
if (skb->tstamp_type)
|
||||
return;
|
||||
|
||||
skb->tstamp = 0;
|
||||
@ -4224,7 +4256,7 @@ static inline void skb_clear_tstamp(struct sk_buff *skb)
|
||||
|
||||
static inline ktime_t skb_tstamp(const struct sk_buff *skb)
|
||||
{
|
||||
if (skb->mono_delivery_time)
|
||||
if (skb->tstamp_type)
|
||||
return 0;
|
||||
|
||||
return skb->tstamp;
|
||||
@ -4232,7 +4264,7 @@ static inline ktime_t skb_tstamp(const struct sk_buff *skb)
|
||||
|
||||
static inline ktime_t skb_tstamp_cond(const struct sk_buff *skb, bool cond)
|
||||
{
|
||||
if (!skb->mono_delivery_time && skb->tstamp)
|
||||
if (skb->tstamp_type != SKB_CLOCK_MONOTONIC && skb->tstamp)
|
||||
return skb->tstamp;
|
||||
|
||||
if (static_branch_unlikely(&netstamp_needed_key) || cond)
|
||||
|
@ -76,7 +76,7 @@ struct frag_v6_compare_key {
|
||||
* @stamp: timestamp of the last received fragment
|
||||
* @len: total length of the original datagram
|
||||
* @meat: length of received fragments so far
|
||||
* @mono_delivery_time: stamp has a mono delivery time (EDT)
|
||||
* @tstamp_type: stamp has a mono delivery time (EDT)
|
||||
* @flags: fragment queue flags
|
||||
* @max_size: maximum received fragment size
|
||||
* @fqdir: pointer to struct fqdir
|
||||
@ -97,7 +97,7 @@ struct inet_frag_queue {
|
||||
ktime_t stamp;
|
||||
int len;
|
||||
int meat;
|
||||
u8 mono_delivery_time;
|
||||
u8 tstamp_type;
|
||||
__u8 flags;
|
||||
u16 max_size;
|
||||
struct fqdir *fqdir;
|
||||
|
@ -6207,12 +6207,17 @@ union { \
|
||||
__u64 :64; \
|
||||
} __attribute__((aligned(8)))
|
||||
|
||||
/* The enum used in skb->tstamp_type. It specifies the clock type
|
||||
* of the time stored in the skb->tstamp.
|
||||
*/
|
||||
enum {
|
||||
BPF_SKB_TSTAMP_UNSPEC,
|
||||
BPF_SKB_TSTAMP_DELIVERY_MONO, /* tstamp has mono delivery time */
|
||||
/* For any BPF_SKB_TSTAMP_* that the bpf prog cannot handle,
|
||||
* the bpf prog should handle it like BPF_SKB_TSTAMP_UNSPEC
|
||||
* and try to deduce it by ingress, egress or skb->sk->sk_clockid.
|
||||
BPF_SKB_TSTAMP_UNSPEC = 0, /* DEPRECATED */
|
||||
BPF_SKB_TSTAMP_DELIVERY_MONO = 1, /* DEPRECATED */
|
||||
BPF_SKB_CLOCK_REALTIME = 0,
|
||||
BPF_SKB_CLOCK_MONOTONIC = 1,
|
||||
BPF_SKB_CLOCK_TAI = 2,
|
||||
/* For any future BPF_SKB_CLOCK_* that the bpf prog cannot handle,
|
||||
* the bpf prog can try to deduce it by ingress/egress/skb->sk->sk_clockid.
|
||||
*/
|
||||
};
|
||||
|
||||
|
@ -32,7 +32,7 @@ static int nf_br_ip_fragment(struct net *net, struct sock *sk,
|
||||
struct sk_buff *))
|
||||
{
|
||||
int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size;
|
||||
bool mono_delivery_time = skb->mono_delivery_time;
|
||||
u8 tstamp_type = skb->tstamp_type;
|
||||
unsigned int hlen, ll_rs, mtu;
|
||||
ktime_t tstamp = skb->tstamp;
|
||||
struct ip_frag_state state;
|
||||
@ -82,7 +82,7 @@ static int nf_br_ip_fragment(struct net *net, struct sock *sk,
|
||||
if (iter.frag)
|
||||
ip_fraglist_prepare(skb, &iter);
|
||||
|
||||
skb_set_delivery_time(skb, tstamp, mono_delivery_time);
|
||||
skb_set_delivery_time(skb, tstamp, tstamp_type);
|
||||
err = output(net, sk, data, skb);
|
||||
if (err || !iter.frag)
|
||||
break;
|
||||
@ -113,7 +113,7 @@ slow_path:
|
||||
goto blackhole;
|
||||
}
|
||||
|
||||
skb_set_delivery_time(skb2, tstamp, mono_delivery_time);
|
||||
skb_set_delivery_time(skb2, tstamp, tstamp_type);
|
||||
err = output(net, sk, data, skb2);
|
||||
if (err)
|
||||
goto blackhole;
|
||||
|
@ -2160,7 +2160,7 @@ EXPORT_SYMBOL(net_disable_timestamp);
|
||||
static inline void net_timestamp_set(struct sk_buff *skb)
|
||||
{
|
||||
skb->tstamp = 0;
|
||||
skb->mono_delivery_time = 0;
|
||||
skb->tstamp_type = SKB_CLOCK_REALTIME;
|
||||
if (static_branch_unlikely(&netstamp_needed_key))
|
||||
skb->tstamp = ktime_get_real();
|
||||
}
|
||||
|
@ -7726,17 +7726,21 @@ BPF_CALL_3(bpf_skb_set_tstamp, struct sk_buff *, skb,
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
switch (tstamp_type) {
|
||||
case BPF_SKB_TSTAMP_DELIVERY_MONO:
|
||||
case BPF_SKB_CLOCK_REALTIME:
|
||||
skb->tstamp = tstamp;
|
||||
skb->tstamp_type = SKB_CLOCK_REALTIME;
|
||||
break;
|
||||
case BPF_SKB_CLOCK_MONOTONIC:
|
||||
if (!tstamp)
|
||||
return -EINVAL;
|
||||
skb->tstamp = tstamp;
|
||||
skb->mono_delivery_time = 1;
|
||||
skb->tstamp_type = SKB_CLOCK_MONOTONIC;
|
||||
break;
|
||||
case BPF_SKB_TSTAMP_UNSPEC:
|
||||
if (tstamp)
|
||||
case BPF_SKB_CLOCK_TAI:
|
||||
if (!tstamp)
|
||||
return -EINVAL;
|
||||
skb->tstamp = 0;
|
||||
skb->mono_delivery_time = 0;
|
||||
skb->tstamp = tstamp;
|
||||
skb->tstamp_type = SKB_CLOCK_TAI;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
@ -9387,16 +9391,17 @@ static struct bpf_insn *bpf_convert_tstamp_type_read(const struct bpf_insn *si,
|
||||
{
|
||||
__u8 value_reg = si->dst_reg;
|
||||
__u8 skb_reg = si->src_reg;
|
||||
/* AX is needed because src_reg and dst_reg could be the same */
|
||||
__u8 tmp_reg = BPF_REG_AX;
|
||||
|
||||
*insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg,
|
||||
SKB_BF_MONO_TC_OFFSET);
|
||||
*insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg,
|
||||
SKB_MONO_DELIVERY_TIME_MASK, 2);
|
||||
*insn++ = BPF_MOV32_IMM(value_reg, BPF_SKB_TSTAMP_UNSPEC);
|
||||
*insn++ = BPF_JMP_A(1);
|
||||
*insn++ = BPF_MOV32_IMM(value_reg, BPF_SKB_TSTAMP_DELIVERY_MONO);
|
||||
BUILD_BUG_ON(__SKB_CLOCK_MAX != (int)BPF_SKB_CLOCK_TAI);
|
||||
BUILD_BUG_ON(SKB_CLOCK_REALTIME != (int)BPF_SKB_CLOCK_REALTIME);
|
||||
BUILD_BUG_ON(SKB_CLOCK_MONOTONIC != (int)BPF_SKB_CLOCK_MONOTONIC);
|
||||
BUILD_BUG_ON(SKB_CLOCK_TAI != (int)BPF_SKB_CLOCK_TAI);
|
||||
*insn++ = BPF_LDX_MEM(BPF_B, value_reg, skb_reg, SKB_BF_MONO_TC_OFFSET);
|
||||
*insn++ = BPF_ALU32_IMM(BPF_AND, value_reg, SKB_TSTAMP_TYPE_MASK);
|
||||
#ifdef __BIG_ENDIAN_BITFIELD
|
||||
*insn++ = BPF_ALU32_IMM(BPF_RSH, value_reg, SKB_TSTAMP_TYPE_RSHIFT);
|
||||
#else
|
||||
BUILD_BUG_ON(!(SKB_TSTAMP_TYPE_MASK & 0x1));
|
||||
#endif
|
||||
|
||||
return insn;
|
||||
}
|
||||
@ -9439,11 +9444,12 @@ static struct bpf_insn *bpf_convert_tstamp_read(const struct bpf_prog *prog,
|
||||
__u8 tmp_reg = BPF_REG_AX;
|
||||
|
||||
*insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, SKB_BF_MONO_TC_OFFSET);
|
||||
*insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg,
|
||||
TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK);
|
||||
*insn++ = BPF_JMP32_IMM(BPF_JNE, tmp_reg,
|
||||
TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK, 2);
|
||||
/* skb->tc_at_ingress && skb->mono_delivery_time,
|
||||
/* check if ingress mask bits is set */
|
||||
*insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg, TC_AT_INGRESS_MASK, 1);
|
||||
*insn++ = BPF_JMP_A(4);
|
||||
*insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg, SKB_TSTAMP_TYPE_MASK, 1);
|
||||
*insn++ = BPF_JMP_A(2);
|
||||
/* skb->tc_at_ingress && skb->tstamp_type,
|
||||
* read 0 as the (rcv) timestamp.
|
||||
*/
|
||||
*insn++ = BPF_MOV64_IMM(value_reg, 0);
|
||||
@ -9468,7 +9474,7 @@ static struct bpf_insn *bpf_convert_tstamp_write(const struct bpf_prog *prog,
|
||||
* the bpf prog is aware the tstamp could have delivery time.
|
||||
* Thus, write skb->tstamp as is if tstamp_type_access is true.
|
||||
* Otherwise, writing at ingress will have to clear the
|
||||
* mono_delivery_time bit also.
|
||||
* skb->tstamp_type bit also.
|
||||
*/
|
||||
if (!prog->tstamp_type_access) {
|
||||
__u8 tmp_reg = BPF_REG_AX;
|
||||
@ -9478,8 +9484,8 @@ static struct bpf_insn *bpf_convert_tstamp_write(const struct bpf_prog *prog,
|
||||
*insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg, TC_AT_INGRESS_MASK, 1);
|
||||
/* goto <store> */
|
||||
*insn++ = BPF_JMP_A(2);
|
||||
/* <clear>: mono_delivery_time */
|
||||
*insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg, ~SKB_MONO_DELIVERY_TIME_MASK);
|
||||
/* <clear>: skb->tstamp_type */
|
||||
*insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg, ~SKB_TSTAMP_TYPE_MASK);
|
||||
*insn++ = BPF_STX_MEM(BPF_B, skb_reg, tmp_reg, SKB_BF_MONO_TC_OFFSET);
|
||||
}
|
||||
#endif
|
||||
|
@ -130,7 +130,7 @@ static int lowpan_frag_queue(struct lowpan_frag_queue *fq,
|
||||
goto err;
|
||||
|
||||
fq->q.stamp = skb->tstamp;
|
||||
fq->q.mono_delivery_time = skb->mono_delivery_time;
|
||||
fq->q.tstamp_type = skb->tstamp_type;
|
||||
if (frag_type == LOWPAN_DISPATCH_FRAG1)
|
||||
fq->q.flags |= INET_FRAG_FIRST_IN;
|
||||
|
||||
|
@ -619,7 +619,7 @@ void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head,
|
||||
skb_mark_not_on_list(head);
|
||||
head->prev = NULL;
|
||||
head->tstamp = q->stamp;
|
||||
head->mono_delivery_time = q->mono_delivery_time;
|
||||
head->tstamp_type = q->tstamp_type;
|
||||
|
||||
if (sk)
|
||||
refcount_add(sum_truesize - head_truesize, &sk->sk_wmem_alloc);
|
||||
|
@ -355,7 +355,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
|
||||
qp->iif = dev->ifindex;
|
||||
|
||||
qp->q.stamp = skb->tstamp;
|
||||
qp->q.mono_delivery_time = skb->mono_delivery_time;
|
||||
qp->q.tstamp_type = skb->tstamp_type;
|
||||
qp->q.meat += skb->len;
|
||||
qp->ecn |= ecn;
|
||||
add_frag_mem_limit(qp->q.fqdir, skb->truesize);
|
||||
|
@ -764,7 +764,7 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
|
||||
{
|
||||
struct iphdr *iph;
|
||||
struct sk_buff *skb2;
|
||||
bool mono_delivery_time = skb->mono_delivery_time;
|
||||
u8 tstamp_type = skb->tstamp_type;
|
||||
struct rtable *rt = skb_rtable(skb);
|
||||
unsigned int mtu, hlen, ll_rs;
|
||||
struct ip_fraglist_iter iter;
|
||||
@ -856,7 +856,7 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
|
||||
}
|
||||
}
|
||||
|
||||
skb_set_delivery_time(skb, tstamp, mono_delivery_time);
|
||||
skb_set_delivery_time(skb, tstamp, tstamp_type);
|
||||
err = output(net, sk, skb);
|
||||
|
||||
if (!err)
|
||||
@ -912,7 +912,7 @@ slow_path:
|
||||
/*
|
||||
* Put this fragment into the sending queue.
|
||||
*/
|
||||
skb_set_delivery_time(skb2, tstamp, mono_delivery_time);
|
||||
skb_set_delivery_time(skb2, tstamp, tstamp_type);
|
||||
err = output(net, sk, skb2);
|
||||
if (err)
|
||||
goto fail;
|
||||
@ -1457,7 +1457,10 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
|
||||
|
||||
skb->priority = (cork->tos != -1) ? cork->priority: READ_ONCE(sk->sk_priority);
|
||||
skb->mark = cork->mark;
|
||||
skb->tstamp = cork->transmit_time;
|
||||
if (sk_is_tcp(sk))
|
||||
skb_set_delivery_time(skb, cork->transmit_time, SKB_CLOCK_MONOTONIC);
|
||||
else
|
||||
skb_set_delivery_type_by_clockid(skb, cork->transmit_time, sk->sk_clockid);
|
||||
/*
|
||||
* Steal rt from cork.dst to avoid a pair of atomic_inc/atomic_dec
|
||||
* on dst refcount
|
||||
@ -1649,7 +1652,8 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
|
||||
arg->csumoffset) = csum_fold(csum_add(nskb->csum,
|
||||
arg->csum));
|
||||
nskb->ip_summed = CHECKSUM_NONE;
|
||||
nskb->mono_delivery_time = !!transmit_time;
|
||||
if (transmit_time)
|
||||
nskb->tstamp_type = SKB_CLOCK_MONOTONIC;
|
||||
if (txhash)
|
||||
skb_set_hash(nskb, txhash, PKT_HASH_TYPE_L4);
|
||||
ip_push_pending_frames(sk, &fl4);
|
||||
|
@ -360,7 +360,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
|
||||
skb->protocol = htons(ETH_P_IP);
|
||||
skb->priority = READ_ONCE(sk->sk_priority);
|
||||
skb->mark = sockc->mark;
|
||||
skb->tstamp = sockc->transmit_time;
|
||||
skb_set_delivery_type_by_clockid(skb, sockc->transmit_time, sk->sk_clockid);
|
||||
skb_dst_set(skb, &rt->dst);
|
||||
*rtp = NULL;
|
||||
|
||||
|
@ -3625,6 +3625,8 @@ void __init tcp_v4_init(void)
|
||||
*/
|
||||
inet_sk(sk)->pmtudisc = IP_PMTUDISC_DO;
|
||||
|
||||
sk->sk_clockid = CLOCK_MONOTONIC;
|
||||
|
||||
per_cpu(ipv4_tcp_sk, cpu) = sk;
|
||||
}
|
||||
if (register_pernet_subsys(&tcp_sk_ops))
|
||||
|
@ -1301,7 +1301,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
|
||||
tp = tcp_sk(sk);
|
||||
prior_wstamp = tp->tcp_wstamp_ns;
|
||||
tp->tcp_wstamp_ns = max(tp->tcp_wstamp_ns, tp->tcp_clock_cache);
|
||||
skb_set_delivery_time(skb, tp->tcp_wstamp_ns, true);
|
||||
skb_set_delivery_time(skb, tp->tcp_wstamp_ns, SKB_CLOCK_MONOTONIC);
|
||||
if (clone_it) {
|
||||
oskb = skb;
|
||||
|
||||
@ -1655,7 +1655,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
|
||||
|
||||
skb_split(skb, buff, len);
|
||||
|
||||
skb_set_delivery_time(buff, skb->tstamp, true);
|
||||
skb_set_delivery_time(buff, skb->tstamp, SKB_CLOCK_MONOTONIC);
|
||||
tcp_fragment_tstamp(skb, buff);
|
||||
|
||||
old_factor = tcp_skb_pcount(skb);
|
||||
@ -2764,7 +2764,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
|
||||
if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) {
|
||||
/* "skb_mstamp_ns" is used as a start point for the retransmit timer */
|
||||
tp->tcp_wstamp_ns = tp->tcp_clock_cache;
|
||||
skb_set_delivery_time(skb, tp->tcp_wstamp_ns, true);
|
||||
skb_set_delivery_time(skb, tp->tcp_wstamp_ns, SKB_CLOCK_MONOTONIC);
|
||||
list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue);
|
||||
tcp_init_tso_segs(skb, mss_now);
|
||||
goto repair; /* Skip network transmission */
|
||||
@ -3752,11 +3752,11 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
|
||||
#ifdef CONFIG_SYN_COOKIES
|
||||
if (unlikely(synack_type == TCP_SYNACK_COOKIE && ireq->tstamp_ok))
|
||||
skb_set_delivery_time(skb, cookie_init_timestamp(req, now),
|
||||
true);
|
||||
SKB_CLOCK_MONOTONIC);
|
||||
else
|
||||
#endif
|
||||
{
|
||||
skb_set_delivery_time(skb, now, true);
|
||||
skb_set_delivery_time(skb, now, SKB_CLOCK_MONOTONIC);
|
||||
if (!tcp_rsk(req)->snt_synack) /* Timestamp first SYNACK */
|
||||
tcp_rsk(req)->snt_synack = tcp_skb_timestamp_us(skb);
|
||||
}
|
||||
@ -3843,7 +3843,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
|
||||
bpf_skops_write_hdr_opt((struct sock *)sk, skb, req, syn_skb,
|
||||
synack_type, &opts);
|
||||
|
||||
skb_set_delivery_time(skb, now, true);
|
||||
skb_set_delivery_time(skb, now, SKB_CLOCK_MONOTONIC);
|
||||
tcp_add_tx_delay(skb, tp);
|
||||
|
||||
return skb;
|
||||
@ -4027,7 +4027,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
|
||||
|
||||
err = tcp_transmit_skb(sk, syn_data, 1, sk->sk_allocation);
|
||||
|
||||
skb_set_delivery_time(syn, syn_data->skb_mstamp_ns, true);
|
||||
skb_set_delivery_time(syn, syn_data->skb_mstamp_ns, SKB_CLOCK_MONOTONIC);
|
||||
|
||||
/* Now full SYN+DATA was cloned and sent (or not),
|
||||
* remove the SYN from the original skb (syn_data)
|
||||
|
@ -859,7 +859,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
|
||||
struct rt6_info *rt = dst_rt6_info(skb_dst(skb));
|
||||
struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
|
||||
inet6_sk(skb->sk) : NULL;
|
||||
bool mono_delivery_time = skb->mono_delivery_time;
|
||||
u8 tstamp_type = skb->tstamp_type;
|
||||
struct ip6_frag_state state;
|
||||
unsigned int mtu, hlen, nexthdr_offset;
|
||||
ktime_t tstamp = skb->tstamp;
|
||||
@ -955,7 +955,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
|
||||
if (iter.frag)
|
||||
ip6_fraglist_prepare(skb, &iter);
|
||||
|
||||
skb_set_delivery_time(skb, tstamp, mono_delivery_time);
|
||||
skb_set_delivery_time(skb, tstamp, tstamp_type);
|
||||
err = output(net, sk, skb);
|
||||
if (!err)
|
||||
IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
|
||||
@ -1016,7 +1016,7 @@ slow_path:
|
||||
/*
|
||||
* Put this fragment into the sending queue.
|
||||
*/
|
||||
skb_set_delivery_time(frag, tstamp, mono_delivery_time);
|
||||
skb_set_delivery_time(frag, tstamp, tstamp_type);
|
||||
err = output(net, sk, frag);
|
||||
if (err)
|
||||
goto fail;
|
||||
@ -1924,7 +1924,10 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
|
||||
|
||||
skb->priority = READ_ONCE(sk->sk_priority);
|
||||
skb->mark = cork->base.mark;
|
||||
skb->tstamp = cork->base.transmit_time;
|
||||
if (sk_is_tcp(sk))
|
||||
skb_set_delivery_time(skb, cork->base.transmit_time, SKB_CLOCK_MONOTONIC);
|
||||
else
|
||||
skb_set_delivery_type_by_clockid(skb, cork->base.transmit_time, sk->sk_clockid);
|
||||
|
||||
ip6_cork_steal_dst(skb, cork);
|
||||
IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
|
||||
|
@ -126,7 +126,7 @@ int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
|
||||
struct sk_buff *))
|
||||
{
|
||||
int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size;
|
||||
bool mono_delivery_time = skb->mono_delivery_time;
|
||||
u8 tstamp_type = skb->tstamp_type;
|
||||
ktime_t tstamp = skb->tstamp;
|
||||
struct ip6_frag_state state;
|
||||
u8 *prevhdr, nexthdr = 0;
|
||||
@ -192,7 +192,7 @@ int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
|
||||
if (iter.frag)
|
||||
ip6_fraglist_prepare(skb, &iter);
|
||||
|
||||
skb_set_delivery_time(skb, tstamp, mono_delivery_time);
|
||||
skb_set_delivery_time(skb, tstamp, tstamp_type);
|
||||
err = output(net, sk, data, skb);
|
||||
if (err || !iter.frag)
|
||||
break;
|
||||
@ -225,7 +225,7 @@ slow_path:
|
||||
goto blackhole;
|
||||
}
|
||||
|
||||
skb_set_delivery_time(skb2, tstamp, mono_delivery_time);
|
||||
skb_set_delivery_time(skb2, tstamp, tstamp_type);
|
||||
err = output(net, sk, data, skb2);
|
||||
if (err)
|
||||
goto blackhole;
|
||||
|
@ -263,7 +263,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
|
||||
fq->iif = dev->ifindex;
|
||||
|
||||
fq->q.stamp = skb->tstamp;
|
||||
fq->q.mono_delivery_time = skb->mono_delivery_time;
|
||||
fq->q.tstamp_type = skb->tstamp_type;
|
||||
fq->q.meat += skb->len;
|
||||
fq->ecn |= ecn;
|
||||
if (payload_len > fq->q.max_size)
|
||||
|
@ -621,7 +621,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
|
||||
skb->protocol = htons(ETH_P_IPV6);
|
||||
skb->priority = READ_ONCE(sk->sk_priority);
|
||||
skb->mark = sockc->mark;
|
||||
skb->tstamp = sockc->transmit_time;
|
||||
skb_set_delivery_type_by_clockid(skb, sockc->transmit_time, sk->sk_clockid);
|
||||
|
||||
skb_put(skb, length);
|
||||
skb_reset_network_header(skb);
|
||||
|
@ -198,7 +198,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
|
||||
fq->iif = dev->ifindex;
|
||||
|
||||
fq->q.stamp = skb->tstamp;
|
||||
fq->q.mono_delivery_time = skb->mono_delivery_time;
|
||||
fq->q.tstamp_type = skb->tstamp_type;
|
||||
fq->q.meat += skb->len;
|
||||
fq->ecn |= ecn;
|
||||
add_frag_mem_limit(fq->q.fqdir, skb->truesize);
|
||||
|
@ -975,7 +975,7 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
|
||||
mark = inet_twsk(sk)->tw_mark;
|
||||
else
|
||||
mark = READ_ONCE(sk->sk_mark);
|
||||
skb_set_delivery_time(buff, tcp_transmit_time(sk), true);
|
||||
skb_set_delivery_time(buff, tcp_transmit_time(sk), SKB_CLOCK_MONOTONIC);
|
||||
}
|
||||
if (txhash) {
|
||||
/* autoflowlabel/skb_get_hash_flowi6 rely on buff->hash */
|
||||
@ -2387,8 +2387,14 @@ static struct inet_protosw tcpv6_protosw = {
|
||||
|
||||
static int __net_init tcpv6_net_init(struct net *net)
|
||||
{
|
||||
return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
|
||||
SOCK_RAW, IPPROTO_TCP, net);
|
||||
int res;
|
||||
|
||||
res = inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
|
||||
SOCK_RAW, IPPROTO_TCP, net);
|
||||
if (!res)
|
||||
net->ipv6.tcp_sk->sk_clockid = CLOCK_MONOTONIC;
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static void __net_exit tcpv6_net_exit(struct net *net)
|
||||
|
@ -2056,8 +2056,7 @@ retry:
|
||||
skb->dev = dev;
|
||||
skb->priority = READ_ONCE(sk->sk_priority);
|
||||
skb->mark = READ_ONCE(sk->sk_mark);
|
||||
skb->tstamp = sockc.transmit_time;
|
||||
|
||||
skb_set_delivery_type_by_clockid(skb, sockc.transmit_time, sk->sk_clockid);
|
||||
skb_setup_tx_timestamp(skb, sockc.tsflags);
|
||||
|
||||
if (unlikely(extra_len == 4))
|
||||
@ -2584,7 +2583,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
|
||||
skb->dev = dev;
|
||||
skb->priority = READ_ONCE(po->sk.sk_priority);
|
||||
skb->mark = READ_ONCE(po->sk.sk_mark);
|
||||
skb->tstamp = sockc->transmit_time;
|
||||
skb_set_delivery_type_by_clockid(skb, sockc->transmit_time, po->sk.sk_clockid);
|
||||
skb_setup_tx_timestamp(skb, sockc->tsflags);
|
||||
skb_zcopy_set_nouarg(skb, ph.raw);
|
||||
|
||||
@ -3062,7 +3061,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
|
||||
skb->dev = dev;
|
||||
skb->priority = READ_ONCE(sk->sk_priority);
|
||||
skb->mark = sockc.mark;
|
||||
skb->tstamp = sockc.transmit_time;
|
||||
skb_set_delivery_type_by_clockid(skb, sockc.transmit_time, sk->sk_clockid);
|
||||
|
||||
if (unlikely(extra_len == 4))
|
||||
skb->no_fcs = 1;
|
||||
|
@ -54,8 +54,8 @@ TC_INDIRECT_SCOPE int tcf_bpf_act(struct sk_buff *skb,
|
||||
bpf_compute_data_pointers(skb);
|
||||
filter_res = bpf_prog_run(filter, skb);
|
||||
}
|
||||
if (unlikely(!skb->tstamp && skb->mono_delivery_time))
|
||||
skb->mono_delivery_time = 0;
|
||||
if (unlikely(!skb->tstamp && skb->tstamp_type))
|
||||
skb->tstamp_type = SKB_CLOCK_REALTIME;
|
||||
if (skb_sk_is_prefetched(skb) && filter_res != TC_ACT_OK)
|
||||
skb_orphan(skb);
|
||||
|
||||
|
@ -104,8 +104,8 @@ TC_INDIRECT_SCOPE int cls_bpf_classify(struct sk_buff *skb,
|
||||
bpf_compute_data_pointers(skb);
|
||||
filter_res = bpf_prog_run(prog->filter, skb);
|
||||
}
|
||||
if (unlikely(!skb->tstamp && skb->mono_delivery_time))
|
||||
skb->mono_delivery_time = 0;
|
||||
if (unlikely(!skb->tstamp && skb->tstamp_type))
|
||||
skb->tstamp_type = SKB_CLOCK_REALTIME;
|
||||
|
||||
if (prog->exts_integrated) {
|
||||
res->class = 0;
|
||||
|
@ -6207,12 +6207,17 @@ union { \
|
||||
__u64 :64; \
|
||||
} __attribute__((aligned(8)))
|
||||
|
||||
/* The enum used in skb->tstamp_type. It specifies the clock type
|
||||
* of the time stored in the skb->tstamp.
|
||||
*/
|
||||
enum {
|
||||
BPF_SKB_TSTAMP_UNSPEC,
|
||||
BPF_SKB_TSTAMP_DELIVERY_MONO, /* tstamp has mono delivery time */
|
||||
/* For any BPF_SKB_TSTAMP_* that the bpf prog cannot handle,
|
||||
* the bpf prog should handle it like BPF_SKB_TSTAMP_UNSPEC
|
||||
* and try to deduce it by ingress, egress or skb->sk->sk_clockid.
|
||||
BPF_SKB_TSTAMP_UNSPEC = 0, /* DEPRECATED */
|
||||
BPF_SKB_TSTAMP_DELIVERY_MONO = 1, /* DEPRECATED */
|
||||
BPF_SKB_CLOCK_REALTIME = 0,
|
||||
BPF_SKB_CLOCK_MONOTONIC = 1,
|
||||
BPF_SKB_CLOCK_TAI = 2,
|
||||
/* For any future BPF_SKB_CLOCK_* that the bpf prog cannot handle,
|
||||
* the bpf prog can try to deduce it by ingress/egress/skb->sk->sk_clockid.
|
||||
*/
|
||||
};
|
||||
|
||||
|
@ -69,15 +69,17 @@ static struct test_case test_cases[] = {
|
||||
{
|
||||
N(SCHED_CLS, struct __sk_buff, tstamp),
|
||||
.read = "r11 = *(u8 *)($ctx + sk_buff::__mono_tc_offset);"
|
||||
"w11 &= 3;"
|
||||
"if w11 != 0x3 goto pc+2;"
|
||||
"if w11 & 0x4 goto pc+1;"
|
||||
"goto pc+4;"
|
||||
"if w11 & 0x3 goto pc+1;"
|
||||
"goto pc+2;"
|
||||
"$dst = 0;"
|
||||
"goto pc+1;"
|
||||
"$dst = *(u64 *)($ctx + sk_buff::tstamp);",
|
||||
.write = "r11 = *(u8 *)($ctx + sk_buff::__mono_tc_offset);"
|
||||
"if w11 & 0x2 goto pc+1;"
|
||||
"if w11 & 0x4 goto pc+1;"
|
||||
"goto pc+2;"
|
||||
"w11 &= -2;"
|
||||
"w11 &= -4;"
|
||||
"*(u8 *)($ctx + sk_buff::__mono_tc_offset) = r11;"
|
||||
"*(u64 *)($ctx + sk_buff::tstamp) = $src;",
|
||||
},
|
||||
|
@ -890,9 +890,6 @@ static void test_udp_dtime(struct test_tc_dtime *skel, int family, bool bpf_fwd)
|
||||
|
||||
ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0,
|
||||
dtime_cnt_str(t, INGRESS_FWDNS_P100));
|
||||
/* non mono delivery time is not forwarded */
|
||||
ASSERT_EQ(dtimes[INGRESS_FWDNS_P101], 0,
|
||||
dtime_cnt_str(t, INGRESS_FWDNS_P101));
|
||||
for (i = EGRESS_FWDNS_P100; i < SET_DTIME; i++)
|
||||
ASSERT_GT(dtimes[i], 0, dtime_cnt_str(t, i));
|
||||
|
||||
|
@ -222,16 +222,20 @@ int egress_host(struct __sk_buff *skb)
|
||||
return TC_ACT_OK;
|
||||
|
||||
if (skb_proto(skb_type) == IPPROTO_TCP) {
|
||||
if (skb->tstamp_type == BPF_SKB_TSTAMP_DELIVERY_MONO &&
|
||||
if (skb->tstamp_type == BPF_SKB_CLOCK_MONOTONIC &&
|
||||
skb->tstamp)
|
||||
inc_dtimes(EGRESS_ENDHOST);
|
||||
else
|
||||
inc_errs(EGRESS_ENDHOST);
|
||||
} else if (skb_proto(skb_type) == IPPROTO_UDP) {
|
||||
if (skb->tstamp_type == BPF_SKB_CLOCK_TAI &&
|
||||
skb->tstamp)
|
||||
inc_dtimes(EGRESS_ENDHOST);
|
||||
else
|
||||
inc_errs(EGRESS_ENDHOST);
|
||||
} else {
|
||||
if (skb->tstamp_type == BPF_SKB_TSTAMP_UNSPEC &&
|
||||
if (skb->tstamp_type == BPF_SKB_CLOCK_REALTIME &&
|
||||
skb->tstamp)
|
||||
inc_dtimes(EGRESS_ENDHOST);
|
||||
else
|
||||
inc_errs(EGRESS_ENDHOST);
|
||||
}
|
||||
|
||||
@ -252,7 +256,7 @@ int ingress_host(struct __sk_buff *skb)
|
||||
if (!skb_type)
|
||||
return TC_ACT_OK;
|
||||
|
||||
if (skb->tstamp_type == BPF_SKB_TSTAMP_DELIVERY_MONO &&
|
||||
if (skb->tstamp_type == BPF_SKB_CLOCK_MONOTONIC &&
|
||||
skb->tstamp == EGRESS_FWDNS_MAGIC)
|
||||
inc_dtimes(INGRESS_ENDHOST);
|
||||
else
|
||||
@ -315,7 +319,6 @@ int egress_fwdns_prio100(struct __sk_buff *skb)
|
||||
SEC("tc")
|
||||
int ingress_fwdns_prio101(struct __sk_buff *skb)
|
||||
{
|
||||
__u64 expected_dtime = EGRESS_ENDHOST_MAGIC;
|
||||
int skb_type;
|
||||
|
||||
skb_type = skb_get_type(skb);
|
||||
@ -323,29 +326,24 @@ int ingress_fwdns_prio101(struct __sk_buff *skb)
|
||||
/* Should have handled in prio100 */
|
||||
return TC_ACT_SHOT;
|
||||
|
||||
if (skb_proto(skb_type) == IPPROTO_UDP)
|
||||
expected_dtime = 0;
|
||||
|
||||
if (skb->tstamp_type) {
|
||||
if (fwdns_clear_dtime() ||
|
||||
skb->tstamp_type != BPF_SKB_TSTAMP_DELIVERY_MONO ||
|
||||
skb->tstamp != expected_dtime)
|
||||
(skb->tstamp_type != BPF_SKB_CLOCK_MONOTONIC &&
|
||||
skb->tstamp_type != BPF_SKB_CLOCK_TAI) ||
|
||||
skb->tstamp != EGRESS_ENDHOST_MAGIC)
|
||||
inc_errs(INGRESS_FWDNS_P101);
|
||||
else
|
||||
inc_dtimes(INGRESS_FWDNS_P101);
|
||||
} else {
|
||||
if (!fwdns_clear_dtime() && expected_dtime)
|
||||
if (!fwdns_clear_dtime())
|
||||
inc_errs(INGRESS_FWDNS_P101);
|
||||
}
|
||||
|
||||
if (skb->tstamp_type == BPF_SKB_TSTAMP_DELIVERY_MONO) {
|
||||
if (skb->tstamp_type == BPF_SKB_CLOCK_MONOTONIC) {
|
||||
skb->tstamp = INGRESS_FWDNS_MAGIC;
|
||||
} else {
|
||||
if (bpf_skb_set_tstamp(skb, INGRESS_FWDNS_MAGIC,
|
||||
BPF_SKB_TSTAMP_DELIVERY_MONO))
|
||||
inc_errs(SET_DTIME);
|
||||
if (!bpf_skb_set_tstamp(skb, INGRESS_FWDNS_MAGIC,
|
||||
BPF_SKB_TSTAMP_UNSPEC))
|
||||
BPF_SKB_CLOCK_MONOTONIC))
|
||||
inc_errs(SET_DTIME);
|
||||
}
|
||||
|
||||
@ -370,7 +368,7 @@ int egress_fwdns_prio101(struct __sk_buff *skb)
|
||||
|
||||
if (skb->tstamp_type) {
|
||||
if (fwdns_clear_dtime() ||
|
||||
skb->tstamp_type != BPF_SKB_TSTAMP_DELIVERY_MONO ||
|
||||
skb->tstamp_type != BPF_SKB_CLOCK_MONOTONIC ||
|
||||
skb->tstamp != INGRESS_FWDNS_MAGIC)
|
||||
inc_errs(EGRESS_FWDNS_P101);
|
||||
else
|
||||
@ -380,14 +378,11 @@ int egress_fwdns_prio101(struct __sk_buff *skb)
|
||||
inc_errs(EGRESS_FWDNS_P101);
|
||||
}
|
||||
|
||||
if (skb->tstamp_type == BPF_SKB_TSTAMP_DELIVERY_MONO) {
|
||||
if (skb->tstamp_type == BPF_SKB_CLOCK_MONOTONIC) {
|
||||
skb->tstamp = EGRESS_FWDNS_MAGIC;
|
||||
} else {
|
||||
if (bpf_skb_set_tstamp(skb, EGRESS_FWDNS_MAGIC,
|
||||
BPF_SKB_TSTAMP_DELIVERY_MONO))
|
||||
inc_errs(SET_DTIME);
|
||||
if (!bpf_skb_set_tstamp(skb, INGRESS_FWDNS_MAGIC,
|
||||
BPF_SKB_TSTAMP_UNSPEC))
|
||||
BPF_SKB_CLOCK_MONOTONIC))
|
||||
inc_errs(SET_DTIME);
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user