From 3ded97bc41a1e76e1e72eeb192331c01ceacc4bc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 27 Oct 2021 13:19:17 -0700 Subject: [PATCH 1/7] tcp: remove dead code from tcp_sendmsg_locked() TCP sendmsg() no longer puts payload in skb head, we can remove dead code. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index d0b848ff5c0f..4053ace0cd76 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1309,14 +1309,7 @@ new_segment: if (copy > msg_data_left(msg)) copy = msg_data_left(msg); - /* Where to copy to? */ - if (skb_availroom(skb) > 0 && !zc) { - /* We have some space in skb head. Superb! */ - copy = min_t(int, copy, skb_availroom(skb)); - err = skb_add_data_nocache(sk, skb, &msg->msg_iter, copy); - if (err) - goto do_fault; - } else if (!zc) { + if (!zc) { bool merge = true; int i = skb_shinfo(skb)->nr_frags; struct page_frag *pfrag = sk_page_frag(sk); @@ -1416,7 +1409,6 @@ out_nopush: do_error: skb = tcp_write_queue_tail(sk); -do_fault: tcp_remove_empty_skb(sk, skb); if (copied + copied_syn) From 27728ba80f1eb279b209bbd5922fdeebe52d9e30 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 27 Oct 2021 13:19:18 -0700 Subject: [PATCH 2/7] tcp: cleanup tcp_remove_empty_skb() use All tcp_remove_empty_skb() callers now use tcp_write_queue_tail() for the skb argument, we can therefore factorize code. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 2 +- net/ipv4/tcp.c | 9 +++++---- net/mptcp/protocol.c | 4 ++-- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 701587af6852..8e8c5922a7b0 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -311,7 +311,7 @@ void tcp_shutdown(struct sock *sk, int how); int tcp_v4_early_demux(struct sk_buff *skb); int tcp_v4_rcv(struct sk_buff *skb); -void tcp_remove_empty_skb(struct sock *sk, struct sk_buff *skb); +void tcp_remove_empty_skb(struct sock *sk); int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw); int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 4053ace0cd76..68b946cfd433 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -933,8 +933,10 @@ int tcp_send_mss(struct sock *sk, int *size_goal, int flags) * importantly be able to generate EPOLLOUT for Edge Trigger epoll() * users. */ -void tcp_remove_empty_skb(struct sock *sk, struct sk_buff *skb) +void tcp_remove_empty_skb(struct sock *sk) { + struct sk_buff *skb = tcp_write_queue_tail(sk); + if (skb && TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) { tcp_unlink_write_queue(skb, sk); if (tcp_write_queue_empty(sk)) @@ -1087,7 +1089,7 @@ out: return copied; do_error: - tcp_remove_empty_skb(sk, tcp_write_queue_tail(sk)); + tcp_remove_empty_skb(sk); if (copied) goto out; out_err: @@ -1408,8 +1410,7 @@ out_nopush: return copied + copied_syn; do_error: - skb = tcp_write_queue_tail(sk); - tcp_remove_empty_skb(sk, skb); + tcp_remove_empty_skb(sk); if (copied + copied_syn) goto out; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index eb316bd578bb..10b336d21865 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1291,7 +1291,7 @@ alloc_skb: u64 snd_una = READ_ONCE(msk->snd_una); if (snd_una != msk->snd_nxt) { - tcp_remove_empty_skb(ssk, tcp_write_queue_tail(ssk)); + tcp_remove_empty_skb(ssk); return 0; } @@ -1307,7 +1307,7 @@ alloc_skb: copy = min_t(size_t, copy, info->limit - info->sent); if (!sk_wmem_schedule(ssk, copy)) { - tcp_remove_empty_skb(ssk, tcp_write_queue_tail(ssk)); + tcp_remove_empty_skb(ssk); return -ENOMEM; } From bd446314717176507e629b6b5511c107b99c1c25 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 27 Oct 2021 13:19:19 -0700 Subject: [PATCH 3/7] tcp: remove dead code from tcp_collapse_retrans() TCP sendmsg() no longer puts payload in skb->head, remove some dead code from tcp_collapse_retrans(). Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index c0c55a8be8f7..e1dcc93d5b6d 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3045,13 +3045,9 @@ static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1); - if (next_skb_size) { - if (next_skb_size <= skb_availroom(skb)) - skb_copy_bits(next_skb, 0, skb_put(skb, next_skb_size), - next_skb_size); - else if (!tcp_skb_shift(skb, next_skb, 1, next_skb_size)) - return false; - } + if (next_skb_size && !tcp_skb_shift(skb, next_skb, 1, next_skb_size)) + return false; + tcp_highest_sack_replace(sk, next_skb, skb); /* Update sequence range on original skb. */ From f401da475f98c1840d48c9e00a6eb228237357c0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 27 Oct 2021 13:19:20 -0700 Subject: [PATCH 4/7] tcp: no longer set skb->reserved_tailroom TCP/MPTCP sendmsg() no longer puts payload in skb->head, we can remove not needed code. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 5 ----- net/mptcp/protocol.c | 1 - 2 files changed, 6 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 68b946cfd433..66ed0d79f414 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -876,11 +876,6 @@ struct sk_buff *tcp_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp, } if (likely(mem_scheduled)) { skb_reserve(skb, MAX_TCP_HEADER); - /* - * Make sure that we have exactly size bytes - * available to the caller, no more, no less. - */ - skb->reserved_tailroom = skb->end - skb->tail - size; INIT_LIST_HEAD(&skb->tcp_tsorted_anchor); return skb; } diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 10b336d21865..aa96d311bea0 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1174,7 +1174,6 @@ static struct sk_buff *__mptcp_do_alloc_tx_skb(struct sock *sk, gfp_t gfp) if (likely(skb)) { if (likely(__mptcp_add_ext(skb, gfp))) { skb_reserve(skb, MAX_TCP_HEADER); - skb->reserved_tailroom = skb->end - skb->tail; INIT_LIST_HEAD(&skb->tcp_tsorted_anchor); return skb; } From a52fe46ef160b4101b8d14209729f49a71388b52 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 27 Oct 2021 13:19:21 -0700 Subject: [PATCH 5/7] tcp: factorize ip_summed setting Setting skb->ip_summed to CHECKSUM_PARTIAL can be centralized in tcp_stream_alloc_skb() and __mptcp_do_alloc_tx_skb() instead of being done multiple times. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 3 +-- net/ipv4/tcp_output.c | 6 ------ net/mptcp/protocol.c | 2 +- 3 files changed, 2 insertions(+), 9 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 66ed0d79f414..c58d448b45a0 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -876,6 +876,7 @@ struct sk_buff *tcp_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp, } if (likely(mem_scheduled)) { skb_reserve(skb, MAX_TCP_HEADER); + skb->ip_summed = CHECKSUM_PARTIAL; INIT_LIST_HEAD(&skb->tcp_tsorted_anchor); return skb; } @@ -993,7 +994,6 @@ new_segment: skb->truesize += copy; sk_wmem_queued_add(sk, copy); sk_mem_charge(sk, copy); - skb->ip_summed = CHECKSUM_PARTIAL; WRITE_ONCE(tp->write_seq, tp->write_seq + copy); TCP_SKB_CB(skb)->end_seq += copy; tcp_skb_pcount_set(skb, 0); @@ -1289,7 +1289,6 @@ new_segment: goto wait_for_space; process_backlog++; - skb->ip_summed = CHECKSUM_PARTIAL; tcp_skb_entail(sk, skb); copy = size_goal; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index e1dcc93d5b6d..7ecf35d0f847 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1590,8 +1590,6 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, skb_split(skb, buff, len); - buff->ip_summed = CHECKSUM_PARTIAL; - buff->tstamp = skb->tstamp; tcp_fragment_tstamp(skb, buff); @@ -1676,7 +1674,6 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) delta_truesize = __pskb_trim_head(skb, len); TCP_SKB_CB(skb)->seq += len; - skb->ip_summed = CHECKSUM_PARTIAL; if (delta_truesize) { skb->truesize -= delta_truesize; @@ -2147,7 +2144,6 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, tcp_skb_fragment_eor(skb, buff); - buff->ip_summed = CHECKSUM_PARTIAL; skb_split(skb, buff, len); tcp_fragment_tstamp(skb, buff); @@ -2403,7 +2399,6 @@ static int tcp_mtu_probe(struct sock *sk) TCP_SKB_CB(nskb)->tcp_flags = TCPHDR_ACK; TCP_SKB_CB(nskb)->sacked = 0; nskb->csum = 0; - nskb->ip_summed = CHECKSUM_PARTIAL; tcp_insert_write_queue_before(nskb, skb, sk); tcp_highest_sack_replace(sk, skb, nskb); @@ -3753,7 +3748,6 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) syn_data = tcp_stream_alloc_skb(sk, space, sk->sk_allocation, false); if (!syn_data) goto fallback; - syn_data->ip_summed = CHECKSUM_PARTIAL; memcpy(syn_data->cb, syn->cb, sizeof(syn->cb)); if (space) { int copied = copy_from_iter(skb_put(syn_data, space), space, diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index aa96d311bea0..b7e32e316738 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1174,6 +1174,7 @@ static struct sk_buff *__mptcp_do_alloc_tx_skb(struct sock *sk, gfp_t gfp) if (likely(skb)) { if (likely(__mptcp_add_ext(skb, gfp))) { skb_reserve(skb, MAX_TCP_HEADER); + skb->ip_summed = CHECKSUM_PARTIAL; INIT_LIST_HEAD(&skb->tcp_tsorted_anchor); return skb; } @@ -1322,7 +1323,6 @@ alloc_skb: skb->truesize += copy; sk_wmem_queued_add(ssk, copy); sk_mem_charge(ssk, copy); - skb->ip_summed = CHECKSUM_PARTIAL; WRITE_ONCE(tcp_sk(ssk)->write_seq, tcp_sk(ssk)->write_seq + copy); TCP_SKB_CB(skb)->end_seq += copy; tcp_skb_pcount_set(skb, 0); From 4f2266748eabc42f107ecb6a3cc5b34614b29a12 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 27 Oct 2021 13:19:22 -0700 Subject: [PATCH 6/7] tcp: do not clear skb->csum if already zero Freshly allocated skbs have their csum field cleared already. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 1 - net/ipv4/tcp_output.c | 1 - net/ipv6/tcp_ipv6.c | 1 - 3 files changed, 3 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index c58d448b45a0..88475b640344 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -658,7 +658,6 @@ void tcp_skb_entail(struct sock *sk, struct sk_buff *skb) struct tcp_sock *tp = tcp_sk(sk); struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); - skb->csum = 0; tcb->seq = tcb->end_seq = tp->write_seq; tcb->tcp_flags = TCPHDR_ACK; tcb->sacked = 0; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 7ecf35d0f847..5664355b0d09 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2398,7 +2398,6 @@ static int tcp_mtu_probe(struct sock *sk) TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size; TCP_SKB_CB(nskb)->tcp_flags = TCPHDR_ACK; TCP_SKB_CB(nskb)->sacked = 0; - nskb->csum = 0; tcp_insert_write_queue_before(nskb, skb, sk); tcp_highest_sack_replace(sk, skb, nskb); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index c678e778c1fb..2cc9b0e53ad1 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -969,7 +969,6 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 fl6.flowlabel = label; buff->ip_summed = CHECKSUM_PARTIAL; - buff->csum = 0; __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr); From 8b7d8c2bdb7653605aaa1770b882e79b25ba4002 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 27 Oct 2021 13:19:23 -0700 Subject: [PATCH 7/7] tcp: do not clear TCP_SKB_CB(skb)->sacked if already zero Freshly allocated skbs have zero in skb->cb[] already. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 1 - net/ipv4/tcp_output.c | 5 ----- 2 files changed, 6 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 88475b640344..7a7b9aa8f19a 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -660,7 +660,6 @@ void tcp_skb_entail(struct sock *sk, struct sk_buff *skb) tcb->seq = tcb->end_seq = tp->write_seq; tcb->tcp_flags = TCPHDR_ACK; - tcb->sacked = 0; __skb_header_release(skb); tcp_add_write_queue_tail(sk, skb); sk_wmem_queued_add(sk, skb->truesize); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 5664355b0d09..6867e5db3e35 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -394,7 +394,6 @@ static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags) skb->ip_summed = CHECKSUM_PARTIAL; TCP_SKB_CB(skb)->tcp_flags = flags; - TCP_SKB_CB(skb)->sacked = 0; tcp_skb_pcount_set(skb, 1); @@ -2139,9 +2138,6 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH); TCP_SKB_CB(buff)->tcp_flags = flags; - /* This packet was never sent out yet, so no SACK bits. */ - TCP_SKB_CB(buff)->sacked = 0; - tcp_skb_fragment_eor(skb, buff); skb_split(skb, buff, len); @@ -2397,7 +2393,6 @@ static int tcp_mtu_probe(struct sock *sk) TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq; TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size; TCP_SKB_CB(nskb)->tcp_flags = TCPHDR_ACK; - TCP_SKB_CB(nskb)->sacked = 0; tcp_insert_write_queue_before(nskb, skb, sk); tcp_highest_sack_replace(sk, skb, nskb);