From 7dee5d7747a69aa2be41f04c6a7ecfe3ac8cdf18 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 11 Jul 2022 17:15:19 -0700 Subject: [PATCH 01/15] sysctl: Fix data-races in proc_dou8vec_minmax(). A sysctl variable is accessed concurrently, and there is always a chance of data-race. So, all readers and writers need some basic protection to avoid load/store-tearing. This patch changes proc_dou8vec_minmax() to use READ_ONCE() and WRITE_ONCE() internally to fix data-races on the sysctl side. For now, proc_dou8vec_minmax() itself is tolerant to a data-race, but we still need to add annotations on the other subsystem's side. Fixes: cb9444130662 ("sysctl: add proc_dou8vec_minmax()") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- kernel/sysctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index bf9383d17e1b..b016d68da08a 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1007,13 +1007,13 @@ int proc_dou8vec_minmax(struct ctl_table *table, int write, tmp.maxlen = sizeof(val); tmp.data = &val; - val = *data; + val = READ_ONCE(*data); res = do_proc_douintvec(&tmp, write, buffer, lenp, ppos, do_proc_douintvec_minmax_conv, ¶m); if (res) return res; if (write) - *data = val; + WRITE_ONCE(*data, val); return 0; } EXPORT_SYMBOL_GPL(proc_dou8vec_minmax); From 7d1025e559782b58824b36cb8ad547a69f2e4b31 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 11 Jul 2022 17:15:20 -0700 Subject: [PATCH 02/15] sysctl: Fix data-races in proc_dointvec_ms_jiffies(). A sysctl variable is accessed concurrently, and there is always a chance of data-race. So, all readers and writers need some basic protection to avoid load/store-tearing. This patch changes proc_dointvec_ms_jiffies() to use READ_ONCE() and WRITE_ONCE() internally to fix data-races on the sysctl side. For now, proc_dointvec_ms_jiffies() itself is tolerant to a data-race, but we still need to add annotations on the other subsystem's side. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- kernel/sysctl.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index b016d68da08a..d99bc3945445 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1224,9 +1224,9 @@ static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp, if (jif > INT_MAX) return 1; - *valp = (int)jif; + WRITE_ONCE(*valp, (int)jif); } else { - int val = *valp; + int val = READ_ONCE(*valp); unsigned long lval; if (val < 0) { *negp = true; @@ -1294,8 +1294,8 @@ int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, * @ppos: the current position in the file * * Reads/writes up to table->maxlen/sizeof(unsigned int) integer - * values from/to the user buffer, treated as an ASCII string. - * The values read are assumed to be in 1/1000 seconds, and + * values from/to the user buffer, treated as an ASCII string. + * The values read are assumed to be in 1/1000 seconds, and * are converted into jiffies. * * Returns 0 on success. From 6f605b57f3782114e330e108ce1903ede22ec675 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 11 Jul 2022 17:15:21 -0700 Subject: [PATCH 03/15] tcp: Fix a data-race around sysctl_max_tw_buckets. While reading sysctl_max_tw_buckets, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- net/ipv4/inet_timewait_sock.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 0ec501845cb3..47ccc343c9fb 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -156,7 +156,8 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, { struct inet_timewait_sock *tw; - if (refcount_read(&dr->tw_refcount) - 1 >= dr->sysctl_max_tw_buckets) + if (refcount_read(&dr->tw_refcount) - 1 >= + READ_ONCE(dr->sysctl_max_tw_buckets)) return NULL; tw = kmem_cache_alloc(sk->sk_prot_creator->twsk_prot->twsk_slab, From bb7bb35a63b4812da8e3aff587773678e31d23e3 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 11 Jul 2022 17:15:22 -0700 Subject: [PATCH 04/15] icmp: Fix a data-race around sysctl_icmp_echo_ignore_all. While reading sysctl_icmp_echo_ignore_all, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- net/ipv4/icmp.c | 2 +- net/ipv4/sysctl_net_ipv4.c | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 0f9e61d29f73..1df4aae310e0 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -993,7 +993,7 @@ static enum skb_drop_reason icmp_echo(struct sk_buff *skb) net = dev_net(skb_dst(skb)->dev); /* should there be an ICMP stat for ignored echos? */ - if (net->ipv4.sysctl_icmp_echo_ignore_all) + if (READ_ONCE(net->ipv4.sysctl_icmp_echo_ignore_all)) return SKB_NOT_DROPPED_YET; icmp_param.data.icmph = *icmp_hdr(skb); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index cd448cdd3b38..64682a01914a 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -599,6 +599,8 @@ static struct ctl_table ipv4_net_table[] = { .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE }, { .procname = "icmp_echo_enable_probe", From 4a2f7083cc6cb72dade9a63699ca352fad26d1cd Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 11 Jul 2022 17:15:23 -0700 Subject: [PATCH 05/15] icmp: Fix data-races around sysctl_icmp_echo_enable_probe. While reading sysctl_icmp_echo_enable_probe, it can be changed concurrently. Thus, we need to add READ_ONCE() to its readers. Fixes: d329ea5bd884 ("icmp: add response to RFC 8335 PROBE messages") Fixes: 1fd07f33c3ea ("ipv6: ICMPV6: add response to ICMPV6 RFC 8335 PROBE messages") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- net/ipv4/icmp.c | 2 +- net/ipv6/icmp.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 1df4aae310e0..c406e324c594 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -1028,7 +1028,7 @@ bool icmp_build_probe(struct sk_buff *skb, struct icmphdr *icmphdr) u16 ident_len; u8 status; - if (!net->ipv4.sysctl_icmp_echo_enable_probe) + if (!READ_ONCE(net->ipv4.sysctl_icmp_echo_enable_probe)) return false; /* We currently only support probing interfaces on the proxy node diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 61770220774e..9d92d51c4757 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -925,7 +925,7 @@ static int icmpv6_rcv(struct sk_buff *skb) break; case ICMPV6_EXT_ECHO_REQUEST: if (!net->ipv6.sysctl.icmpv6_echo_ignore_all && - net->ipv4.sysctl_icmp_echo_enable_probe) + READ_ONCE(net->ipv4.sysctl_icmp_echo_enable_probe)) icmpv6_echo_reply(skb); break; From 66484bb98ed2dfa1dda37a32411483d8311ac269 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 11 Jul 2022 17:15:24 -0700 Subject: [PATCH 06/15] icmp: Fix a data-race around sysctl_icmp_echo_ignore_broadcasts. While reading sysctl_icmp_echo_ignore_broadcasts, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- net/ipv4/icmp.c | 2 +- net/ipv4/sysctl_net_ipv4.c | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index c406e324c594..4b1d84d3feaa 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -1249,7 +1249,7 @@ int icmp_rcv(struct sk_buff *skb) */ if ((icmph->type == ICMP_ECHO || icmph->type == ICMP_TIMESTAMP) && - net->ipv4.sysctl_icmp_echo_ignore_broadcasts) { + READ_ONCE(net->ipv4.sysctl_icmp_echo_ignore_broadcasts)) { reason = SKB_DROP_REASON_INVALID_PROTO; goto error; } diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 64682a01914a..cb53476a292e 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -617,6 +617,8 @@ static struct ctl_table ipv4_net_table[] = { .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE }, { .procname = "icmp_ignore_bogus_error_responses", From b04f9b7e85c7d7aecbada620e8759a662af068d3 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 11 Jul 2022 17:15:25 -0700 Subject: [PATCH 07/15] icmp: Fix a data-race around sysctl_icmp_ignore_bogus_error_responses. While reading sysctl_icmp_ignore_bogus_error_responses, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- net/ipv4/icmp.c | 2 +- net/ipv4/sysctl_net_ipv4.c | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 4b1d84d3feaa..0c01696c5584 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -933,7 +933,7 @@ static enum skb_drop_reason icmp_unreach(struct sk_buff *skb) * get the other vendor to fix their kit. */ - if (!net->ipv4.sysctl_icmp_ignore_bogus_error_responses && + if (!READ_ONCE(net->ipv4.sysctl_icmp_ignore_bogus_error_responses) && inet_addr_type_dev_table(net, skb->dev, iph->daddr) == RTN_BROADCAST) { net_warn_ratelimited("%pI4 sent an invalid ICMP type %u, code %u error to a broadcast: %pI4 on %s\n", &ip_hdr(skb)->saddr, diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index cb53476a292e..3122e652f3c3 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -626,6 +626,8 @@ static struct ctl_table ipv4_net_table[] = { .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE }, { .procname = "icmp_errors_use_inbound_ifaddr", From d2efabce81db7eed1c98fa1a3f203f0edd738ac3 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 11 Jul 2022 17:15:26 -0700 Subject: [PATCH 08/15] icmp: Fix a data-race around sysctl_icmp_errors_use_inbound_ifaddr. While reading sysctl_icmp_errors_use_inbound_ifaddr, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: 1c2fb7f93cb2 ("[IPV4]: Sysctl configurable icmp error source address.") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- net/ipv4/icmp.c | 2 +- net/ipv4/sysctl_net_ipv4.c | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 0c01696c5584..a49b68d6b969 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -693,7 +693,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, rcu_read_lock(); if (rt_is_input_route(rt) && - net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr) + READ_ONCE(net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr)) dev = dev_get_by_index_rcu(net, inet_iif(skb_in)); if (dev) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 3122e652f3c3..66159d49a575 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -635,6 +635,8 @@ static struct ctl_table ipv4_net_table[] = { .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE }, { .procname = "icmp_ratelimit", From 2a4eb714841f288cf51c7d942d98af6a8c6e4b01 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 11 Jul 2022 17:15:27 -0700 Subject: [PATCH 09/15] icmp: Fix a data-race around sysctl_icmp_ratelimit. While reading sysctl_icmp_ratelimit, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- net/ipv4/icmp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index a49b68d6b969..1d454cc41713 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -320,7 +320,8 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt, vif = l3mdev_master_ifindex(dst->dev); peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, vif, 1); - rc = inet_peer_xrlim_allow(peer, net->ipv4.sysctl_icmp_ratelimit); + rc = inet_peer_xrlim_allow(peer, + READ_ONCE(net->ipv4.sysctl_icmp_ratelimit)); if (peer) inet_putpeer(peer); out: From 1ebcb25ad6fc3d50fca87350acf451b9a66dd31e Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 11 Jul 2022 17:15:28 -0700 Subject: [PATCH 10/15] icmp: Fix a data-race around sysctl_icmp_ratemask. While reading sysctl_icmp_ratemask, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- net/ipv4/icmp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 1d454cc41713..57c4f0d87a7a 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -282,7 +282,7 @@ static bool icmpv4_mask_allow(struct net *net, int type, int code) return true; /* Limit if icmp type is enabled in ratemask. */ - if (!((1 << type) & net->ipv4.sysctl_icmp_ratemask)) + if (!((1 << type) & READ_ONCE(net->ipv4.sysctl_icmp_ratemask))) return true; return false; From 1dace014928e6e385363032d359a04dee9158af0 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 11 Jul 2022 17:15:29 -0700 Subject: [PATCH 11/15] raw: Fix a data-race around sysctl_raw_l3mdev_accept. While reading sysctl_raw_l3mdev_accept, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: 6897445fb194 ("net: provide a sysctl raw_l3mdev_accept for raw socket lookup with VRFs") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- include/net/raw.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/raw.h b/include/net/raw.h index 8ad8df594853..c51a635671a7 100644 --- a/include/net/raw.h +++ b/include/net/raw.h @@ -75,7 +75,7 @@ static inline bool raw_sk_bound_dev_eq(struct net *net, int bound_dev_if, int dif, int sdif) { #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) - return inet_bound_dev_eq(!!net->ipv4.sysctl_raw_l3mdev_accept, + return inet_bound_dev_eq(READ_ONCE(net->ipv4.sysctl_raw_l3mdev_accept), bound_dev_if, dif, sdif); #else return inet_bound_dev_eq(true, bound_dev_if, dif, sdif); From 4785a66702f086cf2ea84bdbe6ec921f274bd9f2 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 11 Jul 2022 17:15:30 -0700 Subject: [PATCH 12/15] tcp: Fix data-races around sysctl_tcp_ecn. While reading sysctl_tcp_ecn, it can be changed concurrently. Thus, we need to add READ_ONCE() to its readers. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c | 2 +- net/ipv4/syncookies.c | 2 +- net/ipv4/sysctl_net_ipv4.c | 2 ++ net/ipv4/tcp_input.c | 2 +- net/ipv4/tcp_output.c | 2 +- 5 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c index 4af5561cbfc5..7c760aa65540 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c @@ -1392,7 +1392,7 @@ static void chtls_pass_accept_request(struct sock *sk, th_ecn = tcph->ece && tcph->cwr; if (th_ecn) { ect = !INET_ECN_is_not_ect(ip_dsfield); - ecn_ok = sock_net(sk)->ipv4.sysctl_tcp_ecn; + ecn_ok = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn); if ((!ect && ecn_ok) || tcp_ca_needs_ecn(sk)) inet_rsk(oreq)->ecn_ok = 1; } diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index f33c31dd7366..b387c4835155 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -273,7 +273,7 @@ bool cookie_ecn_ok(const struct tcp_options_received *tcp_opt, if (!ecn_ok) return false; - if (net->ipv4.sysctl_tcp_ecn) + if (READ_ONCE(net->ipv4.sysctl_tcp_ecn)) return true; return dst_feature(dst, RTAX_FEATURE_ECN); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 66159d49a575..5e308c1715af 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -676,6 +676,8 @@ static struct ctl_table ipv4_net_table[] = { .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_TWO, }, { .procname = "tcp_ecn_fallback", diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 2e2a9ece9af2..3ec4edc37313 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6729,7 +6729,7 @@ static void tcp_ecn_create_request(struct request_sock *req, ect = !INET_ECN_is_not_ect(TCP_SKB_CB(skb)->ip_dsfield); ecn_ok_dst = dst_feature(dst, DST_FEATURE_ECN_MASK); - ecn_ok = net->ipv4.sysctl_tcp_ecn || ecn_ok_dst; + ecn_ok = READ_ONCE(net->ipv4.sysctl_tcp_ecn) || ecn_ok_dst; if (((!ect || th->res1) && ecn_ok) || tcp_ca_needs_ecn(listen_sk) || (ecn_ok_dst & DST_FEATURE_ECN_CA) || diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 1c054431e358..3dc17551ce25 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -324,7 +324,7 @@ static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); bool bpf_needs_ecn = tcp_bpf_ca_needs_ecn(sk); - bool use_ecn = sock_net(sk)->ipv4.sysctl_tcp_ecn == 1 || + bool use_ecn = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn) == 1 || tcp_ca_needs_ecn(sk) || bpf_needs_ecn; if (!use_ecn) { From 12b8d9ca7e678abc48195294494f1815b555d658 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 11 Jul 2022 17:15:31 -0700 Subject: [PATCH 13/15] tcp: Fix a data-race around sysctl_tcp_ecn_fallback. While reading sysctl_tcp_ecn_fallback, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: 492135557dc0 ("tcp: add rfc3168, section 6.1.1.1. fallback") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- net/ipv4/sysctl_net_ipv4.c | 2 ++ net/ipv4/tcp_output.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 5e308c1715af..108fd86f2718 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -685,6 +685,8 @@ static struct ctl_table ipv4_net_table[] = { .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, { .procname = "ip_dynaddr", diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 3dc17551ce25..11aa0ab10bba 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -346,7 +346,7 @@ static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb) static void tcp_ecn_clear_syn(struct sock *sk, struct sk_buff *skb) { - if (sock_net(sk)->ipv4.sysctl_tcp_ecn_fallback) + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn_fallback)) /* tp->ecn_flags are cleared at a later point in time when * SYN ACK is ultimatively being received. */ From e49e4aff7ec19b2d0d0957ee30e93dade57dab9e Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 11 Jul 2022 17:15:32 -0700 Subject: [PATCH 14/15] ipv4: Fix data-races around sysctl_ip_dynaddr. While reading sysctl_ip_dynaddr, it can be changed concurrently. Thus, we need to add READ_ONCE() to its readers. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.rst | 2 +- net/ipv4/af_inet.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index 0e58001f8580..b3a534ed0e7c 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -1179,7 +1179,7 @@ ip_autobind_reuse - BOOLEAN option should only be set by experts. Default: 0 -ip_dynaddr - BOOLEAN +ip_dynaddr - INTEGER If set non-zero, enables support for dynamic addresses. If set to a non-zero value larger than 1, a kernel log message will be printed when dynamic address rewriting diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 93da9f783bec..ac67f6b4ec70 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1246,7 +1246,7 @@ static int inet_sk_reselect_saddr(struct sock *sk) if (new_saddr == old_saddr) return 0; - if (sock_net(sk)->ipv4.sysctl_ip_dynaddr > 1) { + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_ip_dynaddr) > 1) { pr_info("%s(): shifting inet->saddr from %pI4 to %pI4\n", __func__, &old_saddr, &new_saddr); } @@ -1301,7 +1301,7 @@ int inet_sk_rebuild_header(struct sock *sk) * Other protocols have to map its equivalent state to TCP_SYN_SENT. * DCCP maps its DCCP_REQUESTING state to TCP_SYN_SENT. -acme */ - if (!sock_net(sk)->ipv4.sysctl_ip_dynaddr || + if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_ip_dynaddr) || sk->sk_state != TCP_SYN_SENT || (sk->sk_userlocks & SOCK_BINDADDR_LOCK) || (err = inet_sk_reselect_saddr(sk)) != 0) From bdf00bf24bef9be1ca641a6390fd5487873e0d2e Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 11 Jul 2022 17:15:33 -0700 Subject: [PATCH 15/15] nexthop: Fix data-races around nexthop_compat_mode. While reading nexthop_compat_mode, it can be changed concurrently. Thus, we need to add READ_ONCE() to its readers. Fixes: 4f80116d3df3 ("net: ipv4: add sysctl for nexthop api compatibility mode") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- net/ipv4/fib_semantics.c | 2 +- net/ipv4/nexthop.c | 5 +++-- net/ipv6/route.c | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index a57ba23571c9..16dbd5075284 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1811,7 +1811,7 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, goto nla_put_failure; if (nexthop_is_blackhole(fi->nh)) rtm->rtm_type = RTN_BLACKHOLE; - if (!fi->fib_net->ipv4.sysctl_nexthop_compat_mode) + if (!READ_ONCE(fi->fib_net->ipv4.sysctl_nexthop_compat_mode)) goto offload; } diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index e459a391e607..853a75a8fbaf 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -1858,7 +1858,7 @@ static void __remove_nexthop_fib(struct net *net, struct nexthop *nh) /* __ip6_del_rt does a release, so do a hold here */ fib6_info_hold(f6i); ipv6_stub->ip6_del_rt(net, f6i, - !net->ipv4.sysctl_nexthop_compat_mode); + !READ_ONCE(net->ipv4.sysctl_nexthop_compat_mode)); } } @@ -2361,7 +2361,8 @@ out: if (!rc) { nh_base_seq_inc(net); nexthop_notify(RTM_NEWNEXTHOP, new_nh, &cfg->nlinfo); - if (replace_notify && net->ipv4.sysctl_nexthop_compat_mode) + if (replace_notify && + READ_ONCE(net->ipv4.sysctl_nexthop_compat_mode)) nexthop_replace_notify(net, new_nh, &cfg->nlinfo); } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 828355710c57..916417944ec8 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -5741,7 +5741,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, if (nexthop_is_blackhole(rt->nh)) rtm->rtm_type = RTN_BLACKHOLE; - if (net->ipv4.sysctl_nexthop_compat_mode && + if (READ_ONCE(net->ipv4.sysctl_nexthop_compat_mode) && rt6_fill_node_nexthop(skb, rt->nh, &nh_flags) < 0) goto nla_put_failure;