From 98fb9b9680c9f3895ced02d6a73e27f5d7b5892b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 6 Dec 2023 22:37:57 +0100 Subject: [PATCH 01/69] wifi: ieee80211: don't require protected vendor action frames For vendor action frames, whether a protected one should be used or not is clearly up to the individual vendor and frame, so even though a protected dual is defined, it may not get used. Thus, don't require protection for vendor action frames when they're used in a connection. Since we obviously don't process frames unknown to the kernel in the kernel, it may makes sense to invert this list to have all the ones the kernel processes and knows to be requiring protection, but that'd be a different change. Fixes: 91535613b609 ("wifi: mac80211: don't drop all unprotected public action frames") Reported-by: Jouni Malinen Link: https://msgid.link/20231206223801.f6a2cf4e67ec.Ifa6acc774bd67801d3dafb405278f297683187aa@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index c2ac9e9e7ee9..2b5e500bf093 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -4447,7 +4447,8 @@ ieee80211_is_protected_dual_of_public_action(struct sk_buff *skb) action != WLAN_PUB_ACTION_LOC_TRACK_NOTI && action != WLAN_PUB_ACTION_FTM_REQUEST && action != WLAN_PUB_ACTION_FTM_RESPONSE && - action != WLAN_PUB_ACTION_FILS_DISCOVERY; + action != WLAN_PUB_ACTION_FILS_DISCOVERY && + action != WLAN_PUB_ACTION_VENDOR_SPECIFIC; } /** From a4754182dc936b97ec7e9f6b08cdf7ed97ef9069 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 8 Dec 2023 18:32:02 +0200 Subject: [PATCH 02/69] wifi: iwlwifi: pcie: add another missing bh-disable for rxq->lock Evidently I had only looked at all the ones in rx.c, and missed this. Add bh-disable to this use of the rxq->lock as well. Fixes: 25edc8f259c7 ("iwlwifi: pcie: properly implement NAPI") Reported-by: Brian Norris Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://msgid.link/20231208183100.e79ad3dae649.I8f19713c4383707f8be7fc20ff5cc1ecf12429bb@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/pcie/trans.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index a468e5efeecd..92253260f568 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -3106,7 +3106,7 @@ static u32 iwl_trans_pcie_dump_rbs(struct iwl_trans *trans, struct iwl_rxq *rxq = &trans_pcie->rxq[0]; u32 i, r, j, rb_len = 0; - spin_lock(&rxq->lock); + spin_lock_bh(&rxq->lock); r = iwl_get_closed_rb_stts(trans, rxq); @@ -3130,7 +3130,7 @@ static u32 iwl_trans_pcie_dump_rbs(struct iwl_trans *trans, *data = iwl_fw_error_next_data(*data); } - spin_unlock(&rxq->lock); + spin_unlock_bh(&rxq->lock); return rb_len; } From fb768d3b13ffa325b7e84480d488ac799c9d2cd7 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Thu, 7 Dec 2023 21:20:50 +0800 Subject: [PATCH 03/69] wifi: cfg80211: Add my certificate As announced [1][2], I have taken over maintainership of the wireless-regdb project. Add my certificate so that newer releases are valid to the kernel. Seth's certificate should be kept around for awhile, at least until a few new releases by me happen. This should also be applied to stable trees so that stable kernels can utilize newly released database binaries. [1] https://lore.kernel.org/linux-wireless/CAGb2v657baNMPKU3QADijx7hZa=GUcSv2LEDdn6N=QQaFX8r-g@mail.gmail.com/ [2] https://lore.kernel.org/linux-wireless/ZWmRR5ul7EDfxCan@wens.tw/ Cc: stable@vger.kernel.org Signed-off-by: Chen-Yu Tsai Acked-by: Seth Forshee Link: https://msgid.link/ZXHGsqs34qZyzZng@wens.tw Signed-off-by: Johannes Berg --- net/wireless/certs/wens.hex | 87 +++++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) create mode 100644 net/wireless/certs/wens.hex diff --git a/net/wireless/certs/wens.hex b/net/wireless/certs/wens.hex new file mode 100644 index 000000000000..ccd5b5dc3360 --- /dev/null +++ b/net/wireless/certs/wens.hex @@ -0,0 +1,87 @@ +/* Chen-Yu Tsai's regdb certificate */ +0x30, 0x82, 0x02, 0xa7, 0x30, 0x82, 0x01, 0x8f, +0x02, 0x14, 0x61, 0xc0, 0x38, 0x65, 0x1a, 0xab, +0xdc, 0xf9, 0x4b, 0xd0, 0xac, 0x7f, 0xf0, 0x6c, +0x72, 0x48, 0xdb, 0x18, 0xc6, 0x00, 0x30, 0x0d, +0x06, 0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, +0x01, 0x01, 0x0b, 0x05, 0x00, 0x30, 0x0f, 0x31, +0x0d, 0x30, 0x0b, 0x06, 0x03, 0x55, 0x04, 0x03, +0x0c, 0x04, 0x77, 0x65, 0x6e, 0x73, 0x30, 0x20, +0x17, 0x0d, 0x32, 0x33, 0x31, 0x32, 0x30, 0x31, +0x30, 0x37, 0x34, 0x31, 0x31, 0x34, 0x5a, 0x18, +0x0f, 0x32, 0x31, 0x32, 0x33, 0x31, 0x31, 0x30, +0x37, 0x30, 0x37, 0x34, 0x31, 0x31, 0x34, 0x5a, +0x30, 0x0f, 0x31, 0x0d, 0x30, 0x0b, 0x06, 0x03, +0x55, 0x04, 0x03, 0x0c, 0x04, 0x77, 0x65, 0x6e, +0x73, 0x30, 0x82, 0x01, 0x22, 0x30, 0x0d, 0x06, +0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, +0x01, 0x01, 0x05, 0x00, 0x03, 0x82, 0x01, 0x0f, +0x00, 0x30, 0x82, 0x01, 0x0a, 0x02, 0x82, 0x01, +0x01, 0x00, 0xa9, 0x7a, 0x2c, 0x78, 0x4d, 0xa7, +0x19, 0x2d, 0x32, 0x52, 0xa0, 0x2e, 0x6c, 0xef, +0x88, 0x7f, 0x15, 0xc5, 0xb6, 0x69, 0x54, 0x16, +0x43, 0x14, 0x79, 0x53, 0xb7, 0xae, 0x88, 0xfe, +0xc0, 0xb7, 0x5d, 0x47, 0x8e, 0x1a, 0xe1, 0xef, +0xb3, 0x90, 0x86, 0xda, 0xd3, 0x64, 0x81, 0x1f, +0xce, 0x5d, 0x9e, 0x4b, 0x6e, 0x58, 0x02, 0x3e, +0xb2, 0x6f, 0x5e, 0x42, 0x47, 0x41, 0xf4, 0x2c, +0xb8, 0xa8, 0xd4, 0xaa, 0xc0, 0x0e, 0xe6, 0x48, +0xf0, 0xa8, 0xce, 0xcb, 0x08, 0xae, 0x37, 0xaf, +0xf6, 0x40, 0x39, 0xcb, 0x55, 0x6f, 0x5b, 0x4f, +0x85, 0x34, 0xe6, 0x69, 0x10, 0x50, 0x72, 0x5e, +0x4e, 0x9d, 0x4c, 0xba, 0x38, 0x36, 0x0d, 0xce, +0x73, 0x38, 0xd7, 0x27, 0x02, 0x2a, 0x79, 0x03, +0xe1, 0xac, 0xcf, 0xb0, 0x27, 0x85, 0x86, 0x93, +0x17, 0xab, 0xec, 0x42, 0x77, 0x37, 0x65, 0x8a, +0x44, 0xcb, 0xd6, 0x42, 0x93, 0x92, 0x13, 0xe3, +0x39, 0x45, 0xc5, 0x6e, 0x00, 0x4a, 0x7f, 0xcb, +0x42, 0x17, 0x2b, 0x25, 0x8c, 0xb8, 0x17, 0x3b, +0x15, 0x36, 0x59, 0xde, 0x42, 0xce, 0x21, 0xe6, +0xb6, 0xc7, 0x6e, 0x5e, 0x26, 0x1f, 0xf7, 0x8a, +0x57, 0x9e, 0xa5, 0x96, 0x72, 0xb7, 0x02, 0x32, +0xeb, 0x07, 0x2b, 0x73, 0xe2, 0x4f, 0x66, 0x58, +0x9a, 0xeb, 0x0f, 0x07, 0xb6, 0xab, 0x50, 0x8b, +0xc3, 0x8f, 0x17, 0xfa, 0x0a, 0x99, 0xc2, 0x16, +0x25, 0xbf, 0x2d, 0x6b, 0x1a, 0xaa, 0xe6, 0x3e, +0x5f, 0xeb, 0x6d, 0x9b, 0x5d, 0x4d, 0x42, 0x83, +0x2d, 0x39, 0xb8, 0xc9, 0xac, 0xdb, 0x3a, 0x91, +0x50, 0xdf, 0xbb, 0xb1, 0x76, 0x6d, 0x15, 0x73, +0xfd, 0xc6, 0xe6, 0x6b, 0x71, 0x9e, 0x67, 0x36, +0x22, 0x83, 0x79, 0xb1, 0xd6, 0xb8, 0x84, 0x52, +0xaf, 0x96, 0x5b, 0xc3, 0x63, 0x02, 0x4e, 0x78, +0x70, 0x57, 0x02, 0x03, 0x01, 0x00, 0x01, 0x30, +0x0d, 0x06, 0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, +0x0d, 0x01, 0x01, 0x0b, 0x05, 0x00, 0x03, 0x82, +0x01, 0x01, 0x00, 0x24, 0x28, 0xee, 0x22, 0x74, +0x7f, 0x7c, 0xfa, 0x6c, 0x1f, 0xb3, 0x18, 0xd1, +0xc2, 0x3d, 0x7d, 0x29, 0x42, 0x88, 0xad, 0x82, +0xa5, 0xb1, 0x8a, 0x05, 0xd0, 0xec, 0x5c, 0x91, +0x20, 0xf6, 0x82, 0xfd, 0xd5, 0x67, 0x60, 0x5f, +0x31, 0xf5, 0xbd, 0x88, 0x91, 0x70, 0xbd, 0xb8, +0xb9, 0x8c, 0x88, 0xfe, 0x53, 0xc9, 0x54, 0x9b, +0x43, 0xc4, 0x7a, 0x43, 0x74, 0x6b, 0xdd, 0xb0, +0xb1, 0x3b, 0x33, 0x45, 0x46, 0x78, 0xa3, 0x1c, +0xef, 0x54, 0x68, 0xf7, 0x85, 0x9c, 0xe4, 0x51, +0x6f, 0x06, 0xaf, 0x81, 0xdb, 0x2a, 0x7b, 0x7b, +0x6f, 0xa8, 0x9c, 0x67, 0xd8, 0xcb, 0xc9, 0x91, +0x40, 0x00, 0xae, 0xd9, 0xa1, 0x9f, 0xdd, 0xa6, +0x43, 0x0e, 0x28, 0x7b, 0xaa, 0x1b, 0xe9, 0x84, +0xdb, 0x76, 0x64, 0x42, 0x70, 0xc9, 0xc0, 0xeb, +0xae, 0x84, 0x11, 0x16, 0x68, 0x4e, 0x84, 0x9e, +0x7e, 0x92, 0x36, 0xee, 0x1c, 0x3b, 0x08, 0x63, +0xeb, 0x79, 0x84, 0x15, 0x08, 0x9d, 0xaf, 0xc8, +0x9a, 0xc7, 0x34, 0xd3, 0x94, 0x4b, 0xd1, 0x28, +0x97, 0xbe, 0xd1, 0x45, 0x75, 0xdc, 0x35, 0x62, +0xac, 0x1d, 0x1f, 0xb7, 0xb7, 0x15, 0x87, 0xc8, +0x98, 0xc0, 0x24, 0x31, 0x56, 0x8d, 0xed, 0xdb, +0x06, 0xc6, 0x46, 0xbf, 0x4b, 0x6d, 0xa6, 0xd5, +0xab, 0xcc, 0x60, 0xfc, 0xe5, 0x37, 0xb6, 0x53, +0x7d, 0x58, 0x95, 0xa9, 0x56, 0xc7, 0xf7, 0xee, +0xc3, 0xa0, 0x76, 0xf7, 0x65, 0x4d, 0x53, 0xfa, +0xff, 0x5f, 0x76, 0x33, 0x5a, 0x08, 0xfa, 0x86, +0x92, 0x5a, 0x13, 0xfa, 0x1a, 0xfc, 0xf2, 0x1b, +0x8c, 0x7f, 0x42, 0x6d, 0xb7, 0x7e, 0xb7, 0xb4, +0xf0, 0xc7, 0x83, 0xbb, 0xa2, 0x81, 0x03, 0x2d, +0xd4, 0x2a, 0x63, 0x3f, 0xf7, 0x31, 0x2e, 0x40, +0x33, 0x5c, 0x46, 0xbc, 0x9b, 0xc1, 0x05, 0xa5, +0x45, 0x4e, 0xc3 From c1393c132b906fbdf91f6d1c9eb2ef7a00cce64e Mon Sep 17 00:00:00 2001 From: Edward Adam Davis Date: Wed, 29 Nov 2023 20:17:47 +0800 Subject: [PATCH 04/69] wifi: mac80211: check if the existing link config remains unchanged [Syz report] WARNING: CPU: 1 PID: 5067 at net/mac80211/rate.c:48 rate_control_rate_init+0x540/0x690 net/mac80211/rate.c:48 Modules linked in: CPU: 1 PID: 5067 Comm: syz-executor413 Not tainted 6.7.0-rc3-syzkaller-00014-gdf60cee26a2e #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 11/10/2023 RIP: 0010:rate_control_rate_init+0x540/0x690 net/mac80211/rate.c:48 Code: 48 c7 c2 00 46 0c 8c be 08 03 00 00 48 c7 c7 c0 45 0c 8c c6 05 70 79 0b 05 01 e8 1b a0 6f f7 e9 e0 fd ff ff e8 61 b3 8f f7 90 <0f> 0b 90 e9 36 ff ff ff e8 53 b3 8f f7 e8 5e 0b 78 f7 31 ff 89 c3 RSP: 0018:ffffc90003c57248 EFLAGS: 00010293 RAX: 0000000000000000 RBX: ffff888016bc4000 RCX: ffffffff89f7d519 RDX: ffff888076d43b80 RSI: ffffffff89f7d6df RDI: 0000000000000005 RBP: ffff88801daaae20 R08: 0000000000000005 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000002 R12: 0000000000000001 R13: 0000000000000000 R14: ffff888020030e20 R15: ffff888078f08000 FS: 0000555556b94380(0000) GS:ffff8880b9900000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000005fdeb8 CR3: 0000000076d22000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: sta_apply_auth_flags.constprop.0+0x4b7/0x510 net/mac80211/cfg.c:1674 sta_apply_parameters+0xaf1/0x16c0 net/mac80211/cfg.c:2002 ieee80211_add_station+0x3fa/0x6c0 net/mac80211/cfg.c:2068 rdev_add_station net/wireless/rdev-ops.h:201 [inline] nl80211_new_station+0x13ba/0x1a70 net/wireless/nl80211.c:7603 genl_family_rcv_msg_doit+0x1fc/0x2e0 net/netlink/genetlink.c:972 genl_family_rcv_msg net/netlink/genetlink.c:1052 [inline] genl_rcv_msg+0x561/0x800 net/netlink/genetlink.c:1067 netlink_rcv_skb+0x16b/0x440 net/netlink/af_netlink.c:2545 genl_rcv+0x28/0x40 net/netlink/genetlink.c:1076 netlink_unicast_kernel net/netlink/af_netlink.c:1342 [inline] netlink_unicast+0x53b/0x810 net/netlink/af_netlink.c:1368 netlink_sendmsg+0x93c/0xe40 net/netlink/af_netlink.c:1910 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0xd5/0x180 net/socket.c:745 ____sys_sendmsg+0x6ac/0x940 net/socket.c:2584 ___sys_sendmsg+0x135/0x1d0 net/socket.c:2638 __sys_sendmsg+0x117/0x1e0 net/socket.c:2667 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x40/0x110 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x63/0x6b [Analysis] It is inappropriate to make a link configuration change judgment on an non-existent and non new link. [Fix] Quickly exit when there is a existent link and the link configuration has not changed. Fixes: b303835dabe0 ("wifi: mac80211: accept STA changes without link changes") Reported-and-tested-by: syzbot+62d7eef57b09bfebcd84@syzkaller.appspotmail.com Signed-off-by: Edward Adam Davis Link: https://msgid.link/tencent_DE67FF86DB92ED465489A36ECD2EDDCC8C06@qq.com Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 606b1b2e4123..eb1d3ef84353 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1788,10 +1788,10 @@ static int sta_link_apply_parameters(struct ieee80211_local *local, lockdep_is_held(&local->hw.wiphy->mtx)); /* - * If there are no changes, then accept a link that doesn't exist, + * If there are no changes, then accept a link that exist, * unless it's a new link. */ - if (params->link_id < 0 && !new_link && + if (params->link_id >= 0 && !new_link && !params->link_mac && !params->txpwr_set && !params->supported_rates_len && !params->ht_capa && !params->vht_capa && From 23484d817082c3005252d8edfc8292c8a1006b5b Mon Sep 17 00:00:00 2001 From: Rouven Czerwinski Date: Thu, 7 Dec 2023 08:58:36 +0100 Subject: [PATCH 05/69] net: rfkill: gpio: set GPIO direction Fix the undefined usage of the GPIO consumer API after retrieving the GPIO description with GPIO_ASIS. The API documentation mentions that GPIO_ASIS won't set a GPIO direction and requires the user to set a direction before using the GPIO. This can be confirmed on i.MX6 hardware, where rfkill-gpio is no longer able to enabled/disable a device, presumably because the GPIO controller was never configured for the output direction. Fixes: b2f750c3a80b ("net: rfkill: gpio: prevent value glitch during probe") Cc: stable@vger.kernel.org Signed-off-by: Rouven Czerwinski Link: https://msgid.link/20231207075835.3091694-1-r.czerwinski@pengutronix.de Signed-off-by: Johannes Berg --- net/rfkill/rfkill-gpio.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c index 5a81505fba9a..4e32d659524e 100644 --- a/net/rfkill/rfkill-gpio.c +++ b/net/rfkill/rfkill-gpio.c @@ -126,6 +126,14 @@ static int rfkill_gpio_probe(struct platform_device *pdev) return -EINVAL; } + ret = gpiod_direction_output(rfkill->reset_gpio, true); + if (ret) + return ret; + + ret = gpiod_direction_output(rfkill->shutdown_gpio, true); + if (ret) + return ret; + rfkill->rfkill_dev = rfkill_alloc(rfkill->name, &pdev->dev, rfkill->type, &rfkill_gpio_ops, rfkill); From 63bafd9d5421959b2124dd940ed8d7462d99f449 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 11 Dec 2023 09:05:19 +0200 Subject: [PATCH 06/69] wifi: mac80211: don't re-add debugfs during reconfig If we're doing reconfig, then we cannot add the debugfs files that are already there from before the reconfig. Skip that in drv_change_sta_links() during reconfig. Fixes: d2caad527c19 ("wifi: mac80211: add API to show the link STAs in debugfs") Signed-off-by: Johannes Berg Reviewed-by: Gregory Greenman Reviewed-by: Benjamin Berg Signed-off-by: Miri Korenblit Link: https://msgid.link/20231211085121.88a950f43e16.Id71181780994649219685887c0fcad33d387cc78@changeid Signed-off-by: Johannes Berg --- net/mac80211/driver-ops.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c index 7938ec87ef25..d3820333cd59 100644 --- a/net/mac80211/driver-ops.c +++ b/net/mac80211/driver-ops.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright 2015 Intel Deutschland GmbH - * Copyright (C) 2022 Intel Corporation + * Copyright (C) 2022-2023 Intel Corporation */ #include #include "ieee80211_i.h" @@ -589,6 +589,10 @@ int drv_change_sta_links(struct ieee80211_local *local, if (ret) return ret; + /* during reconfig don't add it to debugfs again */ + if (local->in_reconfig) + return 0; + for_each_set_bit(link_id, &links_to_add, IEEE80211_MLD_MAX_NUM_LINKS) { link_sta = rcu_dereference_protected(info->link[link_id], lockdep_is_held(&local->hw.wiphy->mtx)); From 98849ba2aa9db46e62720fb686a9d63ed9887806 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 11 Dec 2023 09:05:30 +0200 Subject: [PATCH 07/69] wifi: mac80211: check defragmentation succeeded We need to check that cfg80211_defragment_element() didn't return an error, since it can fail due to bad input, and we didn't catch that before. Fixes: 8eb8dd2ffbbb ("wifi: mac80211: Support link removal using Reconfiguration ML element") Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://msgid.link/20231211085121.8595a6b67fc0.I1225edd8f98355e007f96502e358e476c7971d8c@changeid Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 887b496f2b81..c8998cf01b7a 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -5782,7 +5782,7 @@ static void ieee80211_ml_reconfiguration(struct ieee80211_sub_if_data *sdata, { const struct ieee80211_multi_link_elem *ml; const struct element *sub; - size_t ml_len; + ssize_t ml_len; unsigned long removed_links = 0; u16 link_removal_timeout[IEEE80211_MLD_MAX_NUM_LINKS] = {}; u8 link_id; @@ -5798,6 +5798,8 @@ static void ieee80211_ml_reconfiguration(struct ieee80211_sub_if_data *sdata, elems->scratch + elems->scratch_len - elems->scratch_pos, WLAN_EID_FRAGMENT); + if (ml_len < 0) + return; elems->ml_reconf = (const void *)elems->scratch_pos; elems->ml_reconf_len = ml_len; From 1fc4a3eec50d726f4663ad3c0bb0158354d6647a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 11 Dec 2023 09:05:32 +0200 Subject: [PATCH 08/69] wifi: mac80211: mesh: check element parsing succeeded ieee802_11_parse_elems() can return NULL, so we must check for the return value. Fixes: 5d24828d05f3 ("mac80211: always allocate struct ieee802_11_elems") Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://msgid.link/20231211085121.93dea364f3d3.Ie87781c6c48979fb25a744b90af4a33dc2d83a28@changeid Signed-off-by: Johannes Berg --- net/mac80211/mesh_plink.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index dbabeefe4515..2da6ecf0077d 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -1247,6 +1247,8 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, return; } elems = ieee802_11_parse_elems(baseaddr, len - baselen, true, NULL); - mesh_process_plink_frame(sdata, mgmt, elems, rx_status); - kfree(elems); + if (elems) { + mesh_process_plink_frame(sdata, mgmt, elems, rx_status); + kfree(elems); + } } From 8c386b166e2517cf3a123018e77941ec22625d0f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 11 Dec 2023 09:05:31 +0200 Subject: [PATCH 09/69] wifi: mac80211: mesh_plink: fix matches_local logic During refactoring the "else" here got lost, add it back. Fixes: c99a89edb106 ("mac80211: factor out plink event gathering") Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://msgid.link/20231211085121.795480fa0e0b.I017d501196a5bbdcd9afd33338d342d6fe1edd79@changeid Signed-off-by: Johannes Berg --- net/mac80211/mesh_plink.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index 2da6ecf0077d..28bf794f67f8 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -1068,8 +1068,8 @@ mesh_plink_get_event(struct ieee80211_sub_if_data *sdata, case WLAN_SP_MESH_PEERING_OPEN: if (!matches_local) event = OPN_RJCT; - if (!mesh_plink_free_count(sdata) || - (sta->mesh->plid && sta->mesh->plid != plid)) + else if (!mesh_plink_free_count(sdata) || + (sta->mesh->plid && sta->mesh->plid != plid)) event = OPN_IGNR; else event = OPN_ACPT; @@ -1077,9 +1077,9 @@ mesh_plink_get_event(struct ieee80211_sub_if_data *sdata, case WLAN_SP_MESH_PEERING_CONFIRM: if (!matches_local) event = CNF_RJCT; - if (!mesh_plink_free_count(sdata) || - sta->mesh->llid != llid || - (sta->mesh->plid && sta->mesh->plid != plid)) + else if (!mesh_plink_free_count(sdata) || + sta->mesh->llid != llid || + (sta->mesh->plid && sta->mesh->plid != plid)) event = CNF_IGNR; else event = CNF_ACPT; From ce038edfce43fb345f8dfdca0f7b17f535896701 Mon Sep 17 00:00:00 2001 From: Avraham Stern Date: Thu, 7 Dec 2023 04:50:17 +0200 Subject: [PATCH 10/69] wifi: iwlwifi: pcie: avoid a NULL pointer dereference It possible that while the rx rb is being handled, the transport has been stopped and re-started. In this case the tx queue pointer is not yet initialized, which will lead to a NULL pointer dereference. Fix it. Signed-off-by: Avraham Stern Signed-off-by: Miri Korenblit Link: https://msgid.link/20231207044813.cd0898cafd89.I0b84daae753ba9612092bf383f5c6f761446e964@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/pcie/rx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c index 146bc7bd14fb..bc6a9f861711 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c @@ -1385,7 +1385,7 @@ static void iwl_pcie_rx_handle_rb(struct iwl_trans *trans, * if it is true then one of the handlers took the page. */ - if (reclaim) { + if (reclaim && txq) { u16 sequence = le16_to_cpu(pkt->hdr.sequence); int index = SEQ_TO_INDEX(sequence); int cmd_index = iwl_txq_get_cmd_index(txq, index); From cd607f2cbbbec90682b2f6d6b85e1525d0f43b19 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 8 Dec 2023 08:50:04 +0100 Subject: [PATCH 11/69] wifi: mt76: fix crash with WED rx support enabled If WED rx is enabled, rx buffers are added to a buffer pool that can be filled from multiple page pools. Because buffers freed from rx poll are not guaranteed to belong to the processed queue's page pool, lockless caching must not be used in this case. Cc: stable@vger.kernel.org Fixes: 2f5c3c77fc9b ("wifi: mt76: switch to page_pool allocator") Signed-off-by: Felix Fietkau Acked-by: Lorenzo Bianconi Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20231208075004.69843-1-nbd@nbd.name --- drivers/net/wireless/mediatek/mt76/dma.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c index 511fe7e6e744..68ad915203aa 100644 --- a/drivers/net/wireless/mediatek/mt76/dma.c +++ b/drivers/net/wireless/mediatek/mt76/dma.c @@ -783,7 +783,7 @@ mt76_dma_rx_reset(struct mt76_dev *dev, enum mt76_rxq_id qid) static void mt76_add_fragment(struct mt76_dev *dev, struct mt76_queue *q, void *data, - int len, bool more, u32 info) + int len, bool more, u32 info, bool allow_direct) { struct sk_buff *skb = q->rx_head; struct skb_shared_info *shinfo = skb_shinfo(skb); @@ -795,7 +795,7 @@ mt76_add_fragment(struct mt76_dev *dev, struct mt76_queue *q, void *data, skb_add_rx_frag(skb, nr_frags, page, offset, len, q->buf_size); } else { - mt76_put_page_pool_buf(data, true); + mt76_put_page_pool_buf(data, allow_direct); } if (more) @@ -815,6 +815,7 @@ mt76_dma_rx_process(struct mt76_dev *dev, struct mt76_queue *q, int budget) struct sk_buff *skb; unsigned char *data; bool check_ddone = false; + bool allow_direct = !mt76_queue_is_wed_rx(q); bool more; if (IS_ENABLED(CONFIG_NET_MEDIATEK_SOC_WED) && @@ -855,7 +856,8 @@ mt76_dma_rx_process(struct mt76_dev *dev, struct mt76_queue *q, int budget) } if (q->rx_head) { - mt76_add_fragment(dev, q, data, len, more, info); + mt76_add_fragment(dev, q, data, len, more, info, + allow_direct); continue; } @@ -884,7 +886,7 @@ mt76_dma_rx_process(struct mt76_dev *dev, struct mt76_queue *q, int budget) continue; free_frag: - mt76_put_page_pool_buf(data, true); + mt76_put_page_pool_buf(data, allow_direct); } mt76_dma_rx_fill(dev, q, true); From 91f9181c738101a276d9da333e0ab665ad806e6d Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Thu, 30 Nov 2023 17:58:06 +0100 Subject: [PATCH 12/69] ice: fix theoretical out-of-bounds access in ethtool link modes To map phy types reported by the hardware to ethtool link mode bits, ice uses two lookup tables (phy_type_low_lkup, phy_type_high_lkup). The "low" table has 64 elements to cover every possible bit the hardware may report, but the "high" table has only 13. If the hardware reports a higher bit in phy_types_high, the driver would access memory beyond the lookup table's end. Instead of iterating through all 64 bits of phy_types_{low,high}, use the sizes of the respective lookup tables. Fixes: 9136e1f1e5c3 ("ice: refactor PHY type to ethtool link mode") Signed-off-by: Michal Schmidt Reviewed-by: Przemek Kitszel Reviewed-by: Simon Horman Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_ethtool.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index a34083567e6f..bde9bc74f928 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -1850,14 +1850,14 @@ ice_phy_type_to_ethtool(struct net_device *netdev, linkmode_zero(ks->link_modes.supported); linkmode_zero(ks->link_modes.advertising); - for (i = 0; i < BITS_PER_TYPE(u64); i++) { + for (i = 0; i < ARRAY_SIZE(phy_type_low_lkup); i++) { if (phy_types_low & BIT_ULL(i)) ice_linkmode_set_bit(&phy_type_low_lkup[i], ks, req_speeds, advert_phy_type_lo, i); } - for (i = 0; i < BITS_PER_TYPE(u64); i++) { + for (i = 0; i < ARRAY_SIZE(phy_type_high_lkup); i++) { if (phy_types_high & BIT_ULL(i)) ice_linkmode_set_bit(&phy_type_high_lkup[i], ks, req_speeds, advert_phy_type_hi, From 9b3daf2b0443eeba23c3888059342aec920dfd53 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Wed, 29 Nov 2023 17:17:10 +0100 Subject: [PATCH 13/69] i40e: Fix ST code value for Clause 45 ST code value for clause 45 that has been changed by commit 8196b5fd6c73 ("i40e: Refactor I40E_MDIO_CLAUSE* macros") is currently wrong. The mentioned commit refactored ..MDIO_CLAUSE??_STCODE_MASK so their value is the same for both clauses. The value is correct for clause 22 but not for clause 45. Fix the issue by adding a parameter to I40E_GLGEN_MSCA_STCODE_MASK macro that specifies required value. Fixes: 8196b5fd6c73 ("i40e: Refactor I40E_MDIO_CLAUSE* macros") Signed-off-by: Ivan Vecera Reviewed-by: Jacob Keller Reviewed-by: Simon Horman Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_register.h | 2 +- drivers/net/ethernet/intel/i40e/i40e_type.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_register.h b/drivers/net/ethernet/intel/i40e/i40e_register.h index f408fcf23ce8..f6671ac79735 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_register.h +++ b/drivers/net/ethernet/intel/i40e/i40e_register.h @@ -207,7 +207,7 @@ #define I40E_GLGEN_MSCA_OPCODE_SHIFT 26 #define I40E_GLGEN_MSCA_OPCODE_MASK(_i) I40E_MASK(_i, I40E_GLGEN_MSCA_OPCODE_SHIFT) #define I40E_GLGEN_MSCA_STCODE_SHIFT 28 -#define I40E_GLGEN_MSCA_STCODE_MASK I40E_MASK(0x1, I40E_GLGEN_MSCA_STCODE_SHIFT) +#define I40E_GLGEN_MSCA_STCODE_MASK(_i) I40E_MASK(_i, I40E_GLGEN_MSCA_STCODE_SHIFT) #define I40E_GLGEN_MSCA_MDICMD_SHIFT 30 #define I40E_GLGEN_MSCA_MDICMD_MASK I40E_MASK(0x1, I40E_GLGEN_MSCA_MDICMD_SHIFT) #define I40E_GLGEN_MSCA_MDIINPROGEN_SHIFT 31 diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h index aff6dc6afbe2..f95bc2a4a838 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_type.h +++ b/drivers/net/ethernet/intel/i40e/i40e_type.h @@ -37,11 +37,11 @@ typedef void (*I40E_ADMINQ_CALLBACK)(struct i40e_hw *, struct i40e_aq_desc *); #define I40E_QTX_CTL_VM_QUEUE 0x1 #define I40E_QTX_CTL_PF_QUEUE 0x2 -#define I40E_MDIO_CLAUSE22_STCODE_MASK I40E_GLGEN_MSCA_STCODE_MASK +#define I40E_MDIO_CLAUSE22_STCODE_MASK I40E_GLGEN_MSCA_STCODE_MASK(1) #define I40E_MDIO_CLAUSE22_OPCODE_WRITE_MASK I40E_GLGEN_MSCA_OPCODE_MASK(1) #define I40E_MDIO_CLAUSE22_OPCODE_READ_MASK I40E_GLGEN_MSCA_OPCODE_MASK(2) -#define I40E_MDIO_CLAUSE45_STCODE_MASK I40E_GLGEN_MSCA_STCODE_MASK +#define I40E_MDIO_CLAUSE45_STCODE_MASK I40E_GLGEN_MSCA_STCODE_MASK(0) #define I40E_MDIO_CLAUSE45_OPCODE_ADDRESS_MASK I40E_GLGEN_MSCA_OPCODE_MASK(0) #define I40E_MDIO_CLAUSE45_OPCODE_WRITE_MASK I40E_GLGEN_MSCA_OPCODE_MASK(1) #define I40E_MDIO_CLAUSE45_OPCODE_READ_MASK I40E_GLGEN_MSCA_OPCODE_MASK(3) From 8d6650646ce49e9a5b8c5c23eb94f74b1749f70f Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Fri, 1 Dec 2023 10:01:38 -0800 Subject: [PATCH 14/69] bpf: syzkaller found null ptr deref in unix_bpf proto add I added logic to track the sock pair for stream_unix sockets so that we ensure lifetime of the sock matches the time a sockmap could reference the sock (see fixes tag). I forgot though that we allow af_unix unconnected sockets into a sock{map|hash} map. This is problematic because previous fixed expected sk_pair() to exist and did not NULL check it. Because unconnected sockets have a NULL sk_pair this resulted in the NULL ptr dereference found by syzkaller. BUG: KASAN: null-ptr-deref in unix_stream_bpf_update_proto+0x72/0x430 net/unix/unix_bpf.c:171 Write of size 4 at addr 0000000000000080 by task syz-executor360/5073 Call Trace: ... sock_hold include/net/sock.h:777 [inline] unix_stream_bpf_update_proto+0x72/0x430 net/unix/unix_bpf.c:171 sock_map_init_proto net/core/sock_map.c:190 [inline] sock_map_link+0xb87/0x1100 net/core/sock_map.c:294 sock_map_update_common+0xf6/0x870 net/core/sock_map.c:483 sock_map_update_elem_sys+0x5b6/0x640 net/core/sock_map.c:577 bpf_map_update_value+0x3af/0x820 kernel/bpf/syscall.c:167 We considered just checking for the null ptr and skipping taking a ref on the NULL peer sock. But, if the socket is then connected() after being added to the sockmap we can cause the original issue again. So instead this patch blocks adding af_unix sockets that are not in the ESTABLISHED state. Reported-by: Eric Dumazet Reported-by: syzbot+e8030702aefd3444fb9e@syzkaller.appspotmail.com Fixes: 8866730aed51 ("bpf, sockmap: af_unix stream sockets need to hold ref for pair sock") Acked-by: Jakub Sitnicki Signed-off-by: John Fastabend Link: https://lore.kernel.org/r/20231201180139.328529-2-john.fastabend@gmail.com Signed-off-by: Martin KaFai Lau --- include/net/sock.h | 5 +++++ net/core/sock_map.c | 2 ++ 2 files changed, 7 insertions(+) diff --git a/include/net/sock.h b/include/net/sock.h index 1d6931caf0c3..0201136b0b9c 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2799,6 +2799,11 @@ static inline bool sk_is_tcp(const struct sock *sk) return sk->sk_type == SOCK_STREAM && sk->sk_protocol == IPPROTO_TCP; } +static inline bool sk_is_stream_unix(const struct sock *sk) +{ + return sk->sk_family == AF_UNIX && sk->sk_type == SOCK_STREAM; +} + /** * sk_eat_skb - Release a skb if it is no longer needed * @sk: socket to eat this skb from diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 4292c2ed1828..27d733c0f65e 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -536,6 +536,8 @@ static bool sock_map_sk_state_allowed(const struct sock *sk) { if (sk_is_tcp(sk)) return (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_LISTEN); + if (sk_is_stream_unix(sk)) + return (1 << sk->sk_state) & TCPF_ESTABLISHED; return true; } From 50d96f05af6787a34b4eca2ee3fc1993289c4c24 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Fri, 1 Dec 2023 10:01:39 -0800 Subject: [PATCH 15/69] bpf: sockmap, test for unconnected af_unix sock Add test to sockmap_basic to ensure af_unix sockets that are not connected can not be added to the map. Ensure we keep DGRAM sockets working however as these will not be connected typically. Signed-off-by: John Fastabend Acked-by: Jakub Sitnicki Link: https://lore.kernel.org/r/20231201180139.328529-3-john.fastabend@gmail.com Signed-off-by: Martin KaFai Lau --- .../selftests/bpf/prog_tests/sockmap_basic.c | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c index f75f84d0b3d7..7c2241fae19a 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c @@ -524,6 +524,37 @@ out: test_sockmap_pass_prog__destroy(pass); } +static void test_sockmap_unconnected_unix(void) +{ + int err, map, stream = 0, dgram = 0, zero = 0; + struct test_sockmap_pass_prog *skel; + + skel = test_sockmap_pass_prog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load")) + return; + + map = bpf_map__fd(skel->maps.sock_map_rx); + + stream = xsocket(AF_UNIX, SOCK_STREAM, 0); + if (stream < 0) + return; + + dgram = xsocket(AF_UNIX, SOCK_DGRAM, 0); + if (dgram < 0) { + close(stream); + return; + } + + err = bpf_map_update_elem(map, &zero, &stream, BPF_ANY); + ASSERT_ERR(err, "bpf_map_update_elem(stream)"); + + err = bpf_map_update_elem(map, &zero, &dgram, BPF_ANY); + ASSERT_OK(err, "bpf_map_update_elem(dgram)"); + + close(stream); + close(dgram); +} + void test_sockmap_basic(void) { if (test__start_subtest("sockmap create_update_free")) @@ -566,4 +597,7 @@ void test_sockmap_basic(void) test_sockmap_skb_verdict_fionread(false); if (test__start_subtest("sockmap skb_verdict msg_f_peek")) test_sockmap_skb_verdict_peek(); + + if (test__start_subtest("sockmap unconnected af_unix")) + test_sockmap_unconnected_unix(); } From 66ca8d4deca09bce3fc7bcf8ea7997fa1a51c33c Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Tue, 21 Nov 2023 13:51:52 +0100 Subject: [PATCH 16/69] Revert "net/mlx5e: fix double free of encap_header in update funcs" This reverts commit 3a4aa3cb83563df942be49d145ee3b7ddf17d6bb. This patch is causing a null ptr issue, the proper fix is in the next patch. Fixes: 3a4aa3cb8356 ("net/mlx5e: fix double free of encap_header in update funcs") Signed-off-by: Vlad Buslov Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en/tc_tun.c | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index 668da5c70e63..8bca696b6658 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -403,12 +403,16 @@ int mlx5e_tc_tun_update_header_ipv4(struct mlx5e_priv *priv, if (err) goto free_encap; + e->encap_size = ipv4_encap_size; + kfree(e->encap_header); + e->encap_header = encap_header; + if (!(nud_state & NUD_VALID)) { neigh_event_send(attr.n, NULL); /* the encap entry will be made valid on neigh update event * and not used before that. */ - goto free_encap; + goto release_neigh; } memset(&reformat_params, 0, sizeof(reformat_params)); @@ -422,10 +426,6 @@ int mlx5e_tc_tun_update_header_ipv4(struct mlx5e_priv *priv, goto free_encap; } - e->encap_size = ipv4_encap_size; - kfree(e->encap_header); - e->encap_header = encap_header; - e->flags |= MLX5_ENCAP_ENTRY_VALID; mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev)); mlx5e_route_lookup_ipv4_put(&attr); @@ -669,12 +669,16 @@ int mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv, if (err) goto free_encap; + e->encap_size = ipv6_encap_size; + kfree(e->encap_header); + e->encap_header = encap_header; + if (!(nud_state & NUD_VALID)) { neigh_event_send(attr.n, NULL); /* the encap entry will be made valid on neigh update event * and not used before that. */ - goto free_encap; + goto release_neigh; } memset(&reformat_params, 0, sizeof(reformat_params)); @@ -688,10 +692,6 @@ int mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv, goto free_encap; } - e->encap_size = ipv6_encap_size; - kfree(e->encap_header); - e->encap_header = encap_header; - e->flags |= MLX5_ENCAP_ENTRY_VALID; mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev)); mlx5e_route_lookup_ipv6_put(&attr); From 5d089684dc434a31e08d32f0530066d0025c52e4 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Tue, 21 Nov 2023 13:52:28 +0100 Subject: [PATCH 17/69] Revert "net/mlx5e: fix double free of encap_header" This reverts commit 6f9b1a0731662648949a1c0587f6acb3b7f8acf1. This patch is causing a null ptr issue, the proper fix is in the next patch. Fixes: 6f9b1a073166 ("net/mlx5e: fix double free of encap_header") Signed-off-by: Vlad Buslov Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index 8bca696b6658..00a04fdd756f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -300,6 +300,9 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, if (err) goto destroy_neigh_entry; + e->encap_size = ipv4_encap_size; + e->encap_header = encap_header; + if (!(nud_state & NUD_VALID)) { neigh_event_send(attr.n, NULL); /* the encap entry will be made valid on neigh update event @@ -319,8 +322,6 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, goto destroy_neigh_entry; } - e->encap_size = ipv4_encap_size; - e->encap_header = encap_header; e->flags |= MLX5_ENCAP_ENTRY_VALID; mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev)); mlx5e_route_lookup_ipv4_put(&attr); @@ -567,6 +568,9 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, if (err) goto destroy_neigh_entry; + e->encap_size = ipv6_encap_size; + e->encap_header = encap_header; + if (!(nud_state & NUD_VALID)) { neigh_event_send(attr.n, NULL); /* the encap entry will be made valid on neigh update event @@ -586,8 +590,6 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, goto destroy_neigh_entry; } - e->encap_size = ipv6_encap_size; - e->encap_header = encap_header; e->flags |= MLX5_ENCAP_ENTRY_VALID; mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev)); mlx5e_route_lookup_ipv6_put(&attr); From 8e13cd737cb4fbbb37d448e7e5228a99ae08fdc1 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Tue, 21 Nov 2023 14:15:30 +0100 Subject: [PATCH 18/69] net/mlx5e: fix double free of encap_header Cited commit introduced potential double free since encap_header can be destroyed twice in some cases - once by error cleanup sequence in mlx5e_tc_tun_{create|update}_header_ipv{4|6}(), once by generic mlx5e_encap_put() that user calls as a result of getting an error from tunnel create|update. At the same time the point where e->encap_header is assigned can't be delayed because the function can still return non-error code 0 as a result of checking for NUD_VALID flag, which will cause neighbor update to dereference NULL encap_header. Fix the issue by: - Nulling local encap_header variables in mlx5e_tc_tun_{create|update}_header_ipv{4|6}() to make kfree(encap_header) call in error cleanup sequence noop after that point. - Assigning reformat_params.data from e->encap_header instead of local variable encap_header that was set to NULL pointer by previous step. Also assign reformat_params.size from e->encap_size for uniformity and in order to make the code less error-prone in the future. Fixes: d589e785baf5 ("net/mlx5e: Allow concurrent creation of encap entries") Reported-by: Dust Li Reported-by: Cruz Zhao Reported-by: Tianchen Ding Signed-off-by: Vlad Buslov Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en/tc_tun.c | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index 00a04fdd756f..8dfb57f712b0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -302,6 +302,7 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, e->encap_size = ipv4_encap_size; e->encap_header = encap_header; + encap_header = NULL; if (!(nud_state & NUD_VALID)) { neigh_event_send(attr.n, NULL); @@ -313,8 +314,8 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, memset(&reformat_params, 0, sizeof(reformat_params)); reformat_params.type = e->reformat_type; - reformat_params.size = ipv4_encap_size; - reformat_params.data = encap_header; + reformat_params.size = e->encap_size; + reformat_params.data = e->encap_header; e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, &reformat_params, MLX5_FLOW_NAMESPACE_FDB); if (IS_ERR(e->pkt_reformat)) { @@ -407,6 +408,7 @@ int mlx5e_tc_tun_update_header_ipv4(struct mlx5e_priv *priv, e->encap_size = ipv4_encap_size; kfree(e->encap_header); e->encap_header = encap_header; + encap_header = NULL; if (!(nud_state & NUD_VALID)) { neigh_event_send(attr.n, NULL); @@ -418,8 +420,8 @@ int mlx5e_tc_tun_update_header_ipv4(struct mlx5e_priv *priv, memset(&reformat_params, 0, sizeof(reformat_params)); reformat_params.type = e->reformat_type; - reformat_params.size = ipv4_encap_size; - reformat_params.data = encap_header; + reformat_params.size = e->encap_size; + reformat_params.data = e->encap_header; e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, &reformat_params, MLX5_FLOW_NAMESPACE_FDB); if (IS_ERR(e->pkt_reformat)) { @@ -570,6 +572,7 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, e->encap_size = ipv6_encap_size; e->encap_header = encap_header; + encap_header = NULL; if (!(nud_state & NUD_VALID)) { neigh_event_send(attr.n, NULL); @@ -581,8 +584,8 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, memset(&reformat_params, 0, sizeof(reformat_params)); reformat_params.type = e->reformat_type; - reformat_params.size = ipv6_encap_size; - reformat_params.data = encap_header; + reformat_params.size = e->encap_size; + reformat_params.data = e->encap_header; e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, &reformat_params, MLX5_FLOW_NAMESPACE_FDB); if (IS_ERR(e->pkt_reformat)) { @@ -674,6 +677,7 @@ int mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv, e->encap_size = ipv6_encap_size; kfree(e->encap_header); e->encap_header = encap_header; + encap_header = NULL; if (!(nud_state & NUD_VALID)) { neigh_event_send(attr.n, NULL); @@ -685,8 +689,8 @@ int mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv, memset(&reformat_params, 0, sizeof(reformat_params)); reformat_params.type = e->reformat_type; - reformat_params.size = ipv6_encap_size; - reformat_params.data = encap_header; + reformat_params.size = e->encap_size; + reformat_params.data = e->encap_header; e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, &reformat_params, MLX5_FLOW_NAMESPACE_FDB); if (IS_ERR(e->pkt_reformat)) { From ddb38ddff9c71026bad481b791a94d446ee37603 Mon Sep 17 00:00:00 2001 From: Shifeng Li Date: Thu, 30 Nov 2023 01:46:56 -0800 Subject: [PATCH 19/69] net/mlx5e: Fix slab-out-of-bounds in mlx5_query_nic_vport_mac_list() Out_sz that the size of out buffer is calculated using query_nic_vport _context_in structure when driver query the MAC list. However query_nic _vport_context_in structure is smaller than query_nic_vport_context_out. When allowed_list_size is greater than 96, calling ether_addr_copy() will trigger an slab-out-of-bounds. [ 1170.055866] BUG: KASAN: slab-out-of-bounds in mlx5_query_nic_vport_mac_list+0x481/0x4d0 [mlx5_core] [ 1170.055869] Read of size 4 at addr ffff88bdbc57d912 by task kworker/u128:1/461 [ 1170.055870] [ 1170.055932] Workqueue: mlx5_esw_wq esw_vport_change_handler [mlx5_core] [ 1170.055936] Call Trace: [ 1170.055949] dump_stack+0x8b/0xbb [ 1170.055958] print_address_description+0x6a/0x270 [ 1170.055961] kasan_report+0x179/0x2c0 [ 1170.056061] mlx5_query_nic_vport_mac_list+0x481/0x4d0 [mlx5_core] [ 1170.056162] esw_update_vport_addr_list+0x2c5/0xcd0 [mlx5_core] [ 1170.056257] esw_vport_change_handle_locked+0xd08/0x1a20 [mlx5_core] [ 1170.056377] esw_vport_change_handler+0x6b/0x90 [mlx5_core] [ 1170.056381] process_one_work+0x65f/0x12d0 [ 1170.056383] worker_thread+0x87/0xb50 [ 1170.056390] kthread+0x2e9/0x3a0 [ 1170.056394] ret_from_fork+0x1f/0x40 Fixes: e16aea2744ab ("net/mlx5: Introduce access functions to modify/query vport mac lists") Cc: Ding Hui Signed-off-by: Shifeng Li Reviewed-by: Simon Horman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/vport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index 5a31fb47ffa5..21753f327868 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -277,7 +277,7 @@ int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev, req_list_size = max_list_size; } - out_sz = MLX5_ST_SZ_BYTES(query_nic_vport_context_in) + + out_sz = MLX5_ST_SZ_BYTES(query_nic_vport_context_out) + req_list_size * MLX5_ST_SZ_BYTES(mac_address_layout); out = kvzalloc(out_sz, GFP_KERNEL); From 8f5100da56b3980276234e812ce98d8f075194cd Mon Sep 17 00:00:00 2001 From: Shifeng Li Date: Sat, 2 Dec 2023 00:01:26 -0800 Subject: [PATCH 20/69] net/mlx5e: Fix a race in command alloc flow Fix a cmd->ent use after free due to a race on command entry. Such race occurs when one of the commands releases its last refcount and frees its index and entry while another process running command flush flow takes refcount to this command entry. The process which handles commands flush may see this command as needed to be flushed if the other process allocated a ent->idx but didn't set ent to cmd->ent_arr in cmd_work_handler(). Fix it by moving the assignment of cmd->ent_arr into the spin lock. [70013.081955] BUG: KASAN: use-after-free in mlx5_cmd_trigger_completions+0x1e2/0x4c0 [mlx5_core] [70013.081967] Write of size 4 at addr ffff88880b1510b4 by task kworker/26:1/1433361 [70013.081968] [70013.082028] Workqueue: events aer_isr [70013.082053] Call Trace: [70013.082067] dump_stack+0x8b/0xbb [70013.082086] print_address_description+0x6a/0x270 [70013.082102] kasan_report+0x179/0x2c0 [70013.082173] mlx5_cmd_trigger_completions+0x1e2/0x4c0 [mlx5_core] [70013.082267] mlx5_cmd_flush+0x80/0x180 [mlx5_core] [70013.082304] mlx5_enter_error_state+0x106/0x1d0 [mlx5_core] [70013.082338] mlx5_try_fast_unload+0x2ea/0x4d0 [mlx5_core] [70013.082377] remove_one+0x200/0x2b0 [mlx5_core] [70013.082409] pci_device_remove+0xf3/0x280 [70013.082439] device_release_driver_internal+0x1c3/0x470 [70013.082453] pci_stop_bus_device+0x109/0x160 [70013.082468] pci_stop_and_remove_bus_device+0xe/0x20 [70013.082485] pcie_do_fatal_recovery+0x167/0x550 [70013.082493] aer_isr+0x7d2/0x960 [70013.082543] process_one_work+0x65f/0x12d0 [70013.082556] worker_thread+0x87/0xb50 [70013.082571] kthread+0x2e9/0x3a0 [70013.082592] ret_from_fork+0x1f/0x40 The logical relationship of this error is as follows: aer_recover_work | ent->work -------------------------------------------+------------------------------ aer_recover_work_func | |- pcie_do_recovery | |- report_error_detected | |- mlx5_pci_err_detected |cmd_work_handler |- mlx5_enter_error_state | |- cmd_alloc_index |- enter_error_state | |- lock cmd->alloc_lock |- mlx5_cmd_flush | |- clear_bit |- mlx5_cmd_trigger_completions| |- unlock cmd->alloc_lock |- lock cmd->alloc_lock | |- vector = ~dev->cmd.vars.bitmask |- for_each_set_bit | |- cmd_ent_get(cmd->ent_arr[i]) (UAF) |- unlock cmd->alloc_lock | |- cmd->ent_arr[ent->idx]=ent The cmd->ent_arr[ent->idx] assignment and the bit clearing are not protected by the cmd->alloc_lock in cmd_work_handler(). Fixes: 50b2412b7e78 ("net/mlx5: Avoid possible free of command entry while timeout comp handler") Reviewed-by: Moshe Shemesh Signed-off-by: Shifeng Li Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index f8f0a712c943..a7b1f9686c09 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -156,15 +156,18 @@ static u8 alloc_token(struct mlx5_cmd *cmd) return token; } -static int cmd_alloc_index(struct mlx5_cmd *cmd) +static int cmd_alloc_index(struct mlx5_cmd *cmd, struct mlx5_cmd_work_ent *ent) { unsigned long flags; int ret; spin_lock_irqsave(&cmd->alloc_lock, flags); ret = find_first_bit(&cmd->vars.bitmask, cmd->vars.max_reg_cmds); - if (ret < cmd->vars.max_reg_cmds) + if (ret < cmd->vars.max_reg_cmds) { clear_bit(ret, &cmd->vars.bitmask); + ent->idx = ret; + cmd->ent_arr[ent->idx] = ent; + } spin_unlock_irqrestore(&cmd->alloc_lock, flags); return ret < cmd->vars.max_reg_cmds ? ret : -ENOMEM; @@ -979,7 +982,7 @@ static void cmd_work_handler(struct work_struct *work) sem = ent->page_queue ? &cmd->vars.pages_sem : &cmd->vars.sem; down(sem); if (!ent->page_queue) { - alloc_ret = cmd_alloc_index(cmd); + alloc_ret = cmd_alloc_index(cmd, ent); if (alloc_ret < 0) { mlx5_core_err_rl(dev, "failed to allocate command entry\n"); if (ent->callback) { @@ -994,15 +997,14 @@ static void cmd_work_handler(struct work_struct *work) up(sem); return; } - ent->idx = alloc_ret; } else { ent->idx = cmd->vars.max_reg_cmds; spin_lock_irqsave(&cmd->alloc_lock, flags); clear_bit(ent->idx, &cmd->vars.bitmask); + cmd->ent_arr[ent->idx] = ent; spin_unlock_irqrestore(&cmd->alloc_lock, flags); } - cmd->ent_arr[ent->idx] = ent; lay = get_inst(cmd, ent->idx); ent->lay = lay; memset(lay, 0, sizeof(*lay)); From e75efc6466ae289e599fb12a5a86545dff245c65 Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Tue, 28 Nov 2023 17:40:53 +0800 Subject: [PATCH 21/69] net/mlx5e: fix a potential double-free in fs_udp_create_groups When kcalloc() for ft->g succeeds but kvzalloc() for in fails, fs_udp_create_groups() will free ft->g. However, its caller fs_udp_create_table() will free ft->g again through calling mlx5e_destroy_flow_table(), which will lead to a double-free. Fix this by setting ft->g to NULL in fs_udp_create_groups(). Fixes: 1c80bd684388 ("net/mlx5e: Introduce Flow Steering UDP API") Signed-off-by: Dinghao Liu Reviewed-by: Tariq Toukan Reviewed-by: Simon Horman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c index be83ad9db82a..e1283531e0b8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c @@ -154,6 +154,7 @@ static int fs_udp_create_groups(struct mlx5e_flow_table *ft, enum fs_udp_type ty in = kvzalloc(inlen, GFP_KERNEL); if (!in || !ft->g) { kfree(ft->g); + ft->g = NULL; kvfree(in); return -ENOMEM; } From da75fa542873e5f7d7f615566c0b00042d8a0437 Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Tue, 14 Nov 2023 01:25:21 +0000 Subject: [PATCH 22/69] net/mlx5e: Fix overrun reported by coverity Coverity Scan reports the following issue. But it's impossible that mlx5_get_dev_index returns 7 for PF, even if the index is calculated from PCI FUNC ID. So add the checking to make coverity slience. CID 610894 (#2 of 2): Out-of-bounds write (OVERRUN) Overrunning array esw->fdb_table.offloads.peer_miss_rules of 4 8-byte elements at element index 7 (byte offset 63) using index mlx5_get_dev_index(peer_dev) (which evaluates to 7). Fixes: 9bee385a6e39 ("net/mlx5: E-switch, refactor FDB miss rule add/remove") Signed-off-by: Jianbo Liu Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index bb8bcb448ae9..9bd5609cf659 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1177,9 +1177,9 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, struct mlx5_flow_handle *flow; struct mlx5_flow_spec *spec; struct mlx5_vport *vport; + int err, pfindex; unsigned long i; void *misc; - int err; if (!MLX5_VPORT_MANAGER(esw->dev) && !mlx5_core_is_ecpf_esw_manager(esw->dev)) return 0; @@ -1255,7 +1255,15 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, flows[vport->index] = flow; } } - esw->fdb_table.offloads.peer_miss_rules[mlx5_get_dev_index(peer_dev)] = flows; + + pfindex = mlx5_get_dev_index(peer_dev); + if (pfindex >= MLX5_MAX_PORTS) { + esw_warn(esw->dev, "Peer dev index(%d) is over the max num defined(%d)\n", + pfindex, MLX5_MAX_PORTS); + err = -EINVAL; + goto add_ec_vf_flow_err; + } + esw->fdb_table.offloads.peer_miss_rules[pfindex] = flows; kvfree(spec); return 0; From be86106fd74a145f24c56c9bc18d658e8fe6d4f4 Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Wed, 29 Nov 2023 04:53:32 +0200 Subject: [PATCH 23/69] net/mlx5e: Decrease num_block_tc when unblock tc offload The cited commit increases num_block_tc when unblock tc offload. Actually should decrease it. Fixes: c8e350e62fc5 ("net/mlx5e: Make TC and IPsec offloads mutually exclusive on a netdev") Signed-off-by: Chris Mi Reviewed-by: Jianbo Liu Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c index c1e89dc77db9..41a2543a52cd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c @@ -2142,7 +2142,7 @@ static int mlx5e_ipsec_block_tc_offload(struct mlx5_core_dev *mdev) static void mlx5e_ipsec_unblock_tc_offload(struct mlx5_core_dev *mdev) { - mdev->num_block_tc++; + mdev->num_block_tc--; } int mlx5e_accel_ipsec_fs_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry) From bcaf109f794744c14da0e9123b31d1f4571b0a35 Mon Sep 17 00:00:00 2001 From: Carolina Jubran Date: Thu, 23 Nov 2023 16:11:20 +0200 Subject: [PATCH 24/69] net/mlx5e: XDP, Drop fragmented packets larger than MTU size XDP transmits fragmented packets that are larger than MTU size instead of dropping those packets. The drop check that checks whether a packet is larger than MTU is comparing MTU size against the linear part length only. Adjust the drop check to compare MTU size against both linear and non-linear part lengths to avoid transmitting fragmented packets larger than MTU size. Fixes: 39a1665d16a2 ("net/mlx5e: Implement sending multi buffer XDP frames") Signed-off-by: Carolina Jubran Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index 7decc81ed33a..13c7ed1bb37e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -493,6 +493,7 @@ mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd, dma_addr_t dma_addr = xdptxd->dma_addr; u32 dma_len = xdptxd->len; u16 ds_cnt, inline_hdr_sz; + unsigned int frags_size; u8 num_wqebbs = 1; int num_frags = 0; bool inline_ok; @@ -503,8 +504,9 @@ mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd, inline_ok = sq->min_inline_mode == MLX5_INLINE_MODE_NONE || dma_len >= MLX5E_XDP_MIN_INLINE; + frags_size = xdptxd->has_frags ? xdptxdf->sinfo->xdp_frags_size : 0; - if (unlikely(!inline_ok || sq->hw_mtu < dma_len)) { + if (unlikely(!inline_ok || sq->hw_mtu < dma_len + frags_size)) { stats->err++; return false; } From 4261edf11cb7c9224af713a102e5616329306932 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Thu, 30 Nov 2023 11:30:34 +0200 Subject: [PATCH 25/69] net/mlx5: Fix fw tracer first block check While handling new traces, to verify it is not the first block being written, last_timestamp is checked. But instead of checking it is non zero it is verified to be zero. Fix to verify last_timestamp is not zero. Fixes: c71ad41ccb0c ("net/mlx5: FW tracer, events handling") Signed-off-by: Moshe Shemesh Reviewed-by: Feras Daoud Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c index 76d27d2ee40c..080e7eab52c7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c @@ -718,7 +718,7 @@ static void mlx5_fw_tracer_handle_traces(struct work_struct *work) while (block_timestamp > tracer->last_timestamp) { /* Check block override if it's not the first block */ - if (!tracer->last_timestamp) { + if (tracer->last_timestamp) { u64 *ts_event; /* To avoid block override be the HW in case of buffer * wraparound, the time stamp of the previous block From 04ad04e4fdd10f92ef4f2b3f6227ec9824682197 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Fri, 6 Oct 2023 15:22:22 +0200 Subject: [PATCH 26/69] net/mlx5: Refactor mlx5_flow_destination->rep pointer to vport num Currently the destination rep pointer is only used for comparisons or to obtain vport number from it. Since it is used both during flow creation and deletion it may point to representor of another eswitch instance which can be deallocated during driver unload even when there are rules pointing to it[0]. Refactor the code to store vport number and 'valid' flag instead of the representor pointer. [0]: [176805.886303] ================================================================== [176805.889433] BUG: KASAN: slab-use-after-free in esw_cleanup_dests+0x390/0x440 [mlx5_core] [176805.892981] Read of size 2 at addr ffff888155090aa0 by task modprobe/27280 [176805.895462] CPU: 3 PID: 27280 Comm: modprobe Tainted: G B 6.6.0-rc3+ #1 [176805.896771] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 [176805.898514] Call Trace: [176805.899026] [176805.899519] dump_stack_lvl+0x33/0x50 [176805.900221] print_report+0xc2/0x610 [176805.900893] ? mlx5_chains_put_table+0x33d/0x8d0 [mlx5_core] [176805.901897] ? esw_cleanup_dests+0x390/0x440 [mlx5_core] [176805.902852] kasan_report+0xac/0xe0 [176805.903509] ? esw_cleanup_dests+0x390/0x440 [mlx5_core] [176805.904461] esw_cleanup_dests+0x390/0x440 [mlx5_core] [176805.905223] __mlx5_eswitch_del_rule+0x1ae/0x460 [mlx5_core] [176805.906044] ? esw_cleanup_dests+0x440/0x440 [mlx5_core] [176805.906822] ? xas_find_conflict+0x420/0x420 [176805.907496] ? down_read+0x11e/0x200 [176805.908046] mlx5e_tc_rule_unoffload+0xc4/0x2a0 [mlx5_core] [176805.908844] mlx5e_tc_del_fdb_flow+0x7da/0xb10 [mlx5_core] [176805.909597] mlx5e_flow_put+0x4b/0x80 [mlx5_core] [176805.910275] mlx5e_delete_flower+0x5b4/0xb70 [mlx5_core] [176805.911010] tc_setup_cb_reoffload+0x27/0xb0 [176805.911648] fl_reoffload+0x62d/0x900 [cls_flower] [176805.912313] ? mlx5e_rep_indr_block_unbind+0xd0/0xd0 [mlx5_core] [176805.913151] ? __fl_put+0x230/0x230 [cls_flower] [176805.913768] ? filter_irq_stacks+0x90/0x90 [176805.914335] ? kasan_save_stack+0x1e/0x40 [176805.914893] ? kasan_set_track+0x21/0x30 [176805.915484] ? kasan_save_free_info+0x27/0x40 [176805.916105] tcf_block_playback_offloads+0x79/0x1f0 [176805.916773] ? mlx5e_rep_indr_block_unbind+0xd0/0xd0 [mlx5_core] [176805.917647] tcf_block_unbind+0x12d/0x330 [176805.918239] tcf_block_offload_cmd.isra.0+0x24e/0x320 [176805.918953] ? tcf_block_bind+0x770/0x770 [176805.919551] ? _raw_read_unlock_irqrestore+0x30/0x30 [176805.920236] ? mutex_lock+0x7d/0xd0 [176805.920735] ? mutex_unlock+0x80/0xd0 [176805.921255] tcf_block_offload_unbind+0xa5/0x120 [176805.921909] __tcf_block_put+0xc2/0x2d0 [176805.922467] ingress_destroy+0xf4/0x3d0 [sch_ingress] [176805.923178] __qdisc_destroy+0x9d/0x280 [176805.923741] dev_shutdown+0x1c6/0x330 [176805.924295] unregister_netdevice_many_notify+0x6ef/0x1500 [176805.925034] ? netdev_freemem+0x50/0x50 [176805.925610] ? _raw_spin_lock_irq+0x7b/0xd0 [176805.926235] ? _raw_spin_lock_bh+0xe0/0xe0 [176805.926849] unregister_netdevice_queue+0x1e0/0x280 [176805.927592] ? unregister_netdevice_many+0x10/0x10 [176805.928275] unregister_netdev+0x18/0x20 [176805.928835] mlx5e_vport_rep_unload+0xc0/0x200 [mlx5_core] [176805.929608] mlx5_esw_offloads_unload_rep+0x9d/0xc0 [mlx5_core] [176805.930492] mlx5_eswitch_unload_vf_vports+0x108/0x1a0 [mlx5_core] [176805.931422] ? mlx5_eswitch_unload_sf_vport+0x50/0x50 [mlx5_core] [176805.932304] ? rwsem_down_write_slowpath+0x11f0/0x11f0 [176805.932987] mlx5_eswitch_disable_sriov+0x6f9/0xa60 [mlx5_core] [176805.933807] ? mlx5_core_disable_hca+0xe1/0x130 [mlx5_core] [176805.934576] ? mlx5_eswitch_disable_locked+0x580/0x580 [mlx5_core] [176805.935463] mlx5_device_disable_sriov+0x138/0x490 [mlx5_core] [176805.936308] mlx5_sriov_disable+0x8c/0xb0 [mlx5_core] [176805.937063] remove_one+0x7f/0x210 [mlx5_core] [176805.937711] pci_device_remove+0x96/0x1c0 [176805.938289] device_release_driver_internal+0x361/0x520 [176805.938981] ? kobject_put+0x5c/0x330 [176805.939553] driver_detach+0xd7/0x1d0 [176805.940101] bus_remove_driver+0x11f/0x290 [176805.943847] pci_unregister_driver+0x23/0x1f0 [176805.944505] mlx5_cleanup+0xc/0x20 [mlx5_core] [176805.945189] __x64_sys_delete_module+0x2b3/0x450 [176805.945837] ? module_flags+0x300/0x300 [176805.946377] ? dput+0xc2/0x830 [176805.946848] ? __kasan_record_aux_stack+0x9c/0xb0 [176805.947555] ? __call_rcu_common.constprop.0+0x46c/0xb50 [176805.948338] ? fpregs_assert_state_consistent+0x1d/0xa0 [176805.949055] ? exit_to_user_mode_prepare+0x30/0x120 [176805.949713] do_syscall_64+0x3d/0x90 [176805.950226] entry_SYSCALL_64_after_hwframe+0x46/0xb0 [176805.950904] RIP: 0033:0x7f7f42c3f5ab [176805.951462] Code: 73 01 c3 48 8b 0d 75 a8 1b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa b8 b0 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 45 a8 1b 00 f7 d8 64 89 01 48 [176805.953710] RSP: 002b:00007fff07dc9d08 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 [176805.954691] RAX: ffffffffffffffda RBX: 000055b6e91c01e0 RCX: 00007f7f42c3f5ab [176805.955691] RDX: 0000000000000000 RSI: 0000000000000800 RDI: 000055b6e91c0248 [176805.956662] RBP: 000055b6e91c01e0 R08: 0000000000000000 R09: 0000000000000000 [176805.957601] R10: 00007f7f42d9eac0 R11: 0000000000000206 R12: 000055b6e91c0248 [176805.958593] R13: 0000000000000000 R14: 000055b6e91bfb38 R15: 0000000000000000 [176805.959599] [176805.960324] Allocated by task 20490: [176805.960893] kasan_save_stack+0x1e/0x40 [176805.961463] kasan_set_track+0x21/0x30 [176805.962019] __kasan_kmalloc+0x77/0x90 [176805.962554] esw_offloads_init+0x1bb/0x480 [mlx5_core] [176805.963318] mlx5_eswitch_init+0xc70/0x15c0 [mlx5_core] [176805.964092] mlx5_init_one_devl_locked+0x366/0x1230 [mlx5_core] [176805.964902] probe_one+0x6f7/0xc90 [mlx5_core] [176805.965541] local_pci_probe+0xd7/0x180 [176805.966075] pci_device_probe+0x231/0x6f0 [176805.966631] really_probe+0x1d4/0xb50 [176805.967179] __driver_probe_device+0x18d/0x450 [176805.967810] driver_probe_device+0x49/0x120 [176805.968431] __driver_attach+0x1fb/0x490 [176805.968976] bus_for_each_dev+0xed/0x170 [176805.969560] bus_add_driver+0x21a/0x570 [176805.970124] driver_register+0x133/0x460 [176805.970684] 0xffffffffa0678065 [176805.971180] do_one_initcall+0x92/0x2b0 [176805.971744] do_init_module+0x22d/0x720 [176805.972318] load_module+0x58c3/0x63b0 [176805.972847] init_module_from_file+0xd2/0x130 [176805.973441] __x64_sys_finit_module+0x389/0x7c0 [176805.974045] do_syscall_64+0x3d/0x90 [176805.974556] entry_SYSCALL_64_after_hwframe+0x46/0xb0 [176805.975566] Freed by task 27280: [176805.976077] kasan_save_stack+0x1e/0x40 [176805.976655] kasan_set_track+0x21/0x30 [176805.977221] kasan_save_free_info+0x27/0x40 [176805.977834] ____kasan_slab_free+0x11a/0x1b0 [176805.978505] __kmem_cache_free+0x163/0x2d0 [176805.979113] esw_offloads_cleanup_reps+0xb8/0x120 [mlx5_core] [176805.979963] mlx5_eswitch_cleanup+0x182/0x270 [mlx5_core] [176805.980763] mlx5_cleanup_once+0x9a/0x1e0 [mlx5_core] [176805.981477] mlx5_uninit_one+0xa9/0x180 [mlx5_core] [176805.982196] remove_one+0x8f/0x210 [mlx5_core] [176805.982868] pci_device_remove+0x96/0x1c0 [176805.983461] device_release_driver_internal+0x361/0x520 [176805.984169] driver_detach+0xd7/0x1d0 [176805.984702] bus_remove_driver+0x11f/0x290 [176805.985261] pci_unregister_driver+0x23/0x1f0 [176805.985847] mlx5_cleanup+0xc/0x20 [mlx5_core] [176805.986483] __x64_sys_delete_module+0x2b3/0x450 [176805.987126] do_syscall_64+0x3d/0x90 [176805.987665] entry_SYSCALL_64_after_hwframe+0x46/0xb0 [176805.988667] Last potentially related work creation: [176805.989305] kasan_save_stack+0x1e/0x40 [176805.989839] __kasan_record_aux_stack+0x9c/0xb0 [176805.990443] kvfree_call_rcu+0x84/0xa30 [176805.990973] clean_xps_maps+0x265/0x6e0 [176805.991547] netif_reset_xps_queues.part.0+0x3f/0x80 [176805.992226] unregister_netdevice_many_notify+0xfcf/0x1500 [176805.992966] unregister_netdevice_queue+0x1e0/0x280 [176805.993638] unregister_netdev+0x18/0x20 [176805.994205] mlx5e_remove+0xba/0x1e0 [mlx5_core] [176805.994872] auxiliary_bus_remove+0x52/0x70 [176805.995490] device_release_driver_internal+0x361/0x520 [176805.996196] bus_remove_device+0x1e1/0x3d0 [176805.996767] device_del+0x390/0x980 [176805.997270] mlx5_rescan_drivers_locked.part.0+0x130/0x540 [mlx5_core] [176805.998195] mlx5_unregister_device+0x77/0xc0 [mlx5_core] [176805.998989] mlx5_uninit_one+0x41/0x180 [mlx5_core] [176805.999719] remove_one+0x8f/0x210 [mlx5_core] [176806.000387] pci_device_remove+0x96/0x1c0 [176806.000938] device_release_driver_internal+0x361/0x520 [176806.001612] unbind_store+0xd8/0xf0 [176806.002108] kernfs_fop_write_iter+0x2c0/0x440 [176806.002748] vfs_write+0x725/0xba0 [176806.003294] ksys_write+0xed/0x1c0 [176806.003823] do_syscall_64+0x3d/0x90 [176806.004357] entry_SYSCALL_64_after_hwframe+0x46/0xb0 [176806.005317] The buggy address belongs to the object at ffff888155090a80 which belongs to the cache kmalloc-64 of size 64 [176806.006774] The buggy address is located 32 bytes inside of freed 64-byte region [ffff888155090a80, ffff888155090ac0) [176806.008773] The buggy address belongs to the physical page: [176806.009480] page:00000000a407e0e6 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x155090 [176806.010633] flags: 0x200000000000800(slab|node=0|zone=2) [176806.011352] page_type: 0xffffffff() [176806.011905] raw: 0200000000000800 ffff888100042640 ffffea000422b1c0 dead000000000004 [176806.012949] raw: 0000000000000000 0000000000200020 00000001ffffffff 0000000000000000 [176806.013933] page dumped because: kasan: bad access detected [176806.014935] Memory state around the buggy address: [176806.015601] ffff888155090980: fa fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc [176806.016568] ffff888155090a00: fa fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc [176806.017497] >ffff888155090a80: fa fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc [176806.018438] ^ [176806.019007] ffff888155090b00: fa fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc [176806.020001] ffff888155090b80: fa fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc [176806.020996] ================================================================== Fixes: a508728a4c8b ("net/mlx5e: VF tunnel RX traffic offloading") Signed-off-by: Vlad Buslov Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/en/tc/act/mirred.c | 5 +++-- .../mellanox/mlx5/core/en/tc_tun_encap.c | 3 ++- .../net/ethernet/mellanox/mlx5/core/eswitch.h | 3 ++- .../mellanox/mlx5/core/eswitch_offloads.c | 19 +++++++++---------- .../mlx5/core/eswitch_offloads_termtbl.c | 4 ++-- 5 files changed, 18 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c index f63402c48028..1b418095b79a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c @@ -197,7 +197,7 @@ parse_mirred_encap(struct mlx5e_tc_act_parse_state *parse_state, } esw_attr->dests[esw_attr->out_count].flags |= MLX5_ESW_DEST_ENCAP; esw_attr->out_count++; - /* attr->dests[].rep is resolved when we handle encap */ + /* attr->dests[].vport is resolved when we handle encap */ return 0; } @@ -270,7 +270,8 @@ parse_mirred(struct mlx5e_tc_act_parse_state *parse_state, out_priv = netdev_priv(out_dev); rpriv = out_priv->ppriv; - esw_attr->dests[esw_attr->out_count].rep = rpriv->rep; + esw_attr->dests[esw_attr->out_count].vport_valid = true; + esw_attr->dests[esw_attr->out_count].vport = rpriv->rep->vport; esw_attr->dests[esw_attr->out_count].mdev = out_priv->mdev; esw_attr->out_count++; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c index b10e40e1a9c1..f1d1e1542e81 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c @@ -1064,7 +1064,8 @@ int mlx5e_tc_tun_encap_dests_set(struct mlx5e_priv *priv, out_priv = netdev_priv(encap_dev); rpriv = out_priv->ppriv; - esw_attr->dests[out_index].rep = rpriv->rep; + esw_attr->dests[out_index].vport_valid = true; + esw_attr->dests[out_index].vport = rpriv->rep->vport; esw_attr->dests[out_index].mdev = out_priv->mdev; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index b674b57d05aa..b4eb17141edf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -526,7 +526,8 @@ struct mlx5_esw_flow_attr { u8 total_vlan; struct { u32 flags; - struct mlx5_eswitch_rep *rep; + bool vport_valid; + u16 vport; struct mlx5_pkt_reformat *pkt_reformat; struct mlx5_core_dev *mdev; struct mlx5_termtbl_handle *termtbl; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 9bd5609cf659..b0455134c98e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -287,10 +287,9 @@ static void esw_put_dest_tables_loop(struct mlx5_eswitch *esw, struct mlx5_flow_ for (i = from; i < to; i++) if (esw_attr->dests[i].flags & MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE) mlx5_chains_put_table(chains, 0, 1, 0); - else if (mlx5_esw_indir_table_needed(esw, attr, esw_attr->dests[i].rep->vport, + else if (mlx5_esw_indir_table_needed(esw, attr, esw_attr->dests[i].vport, esw_attr->dests[i].mdev)) - mlx5_esw_indir_table_put(esw, esw_attr->dests[i].rep->vport, - false); + mlx5_esw_indir_table_put(esw, esw_attr->dests[i].vport, false); } static bool @@ -358,8 +357,8 @@ esw_is_indir_table(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr) * this criteria. */ for (i = esw_attr->split_count; i < esw_attr->out_count; i++) { - if (esw_attr->dests[i].rep && - mlx5_esw_indir_table_needed(esw, attr, esw_attr->dests[i].rep->vport, + if (esw_attr->dests[i].vport_valid && + mlx5_esw_indir_table_needed(esw, attr, esw_attr->dests[i].vport, esw_attr->dests[i].mdev)) { result = true; } else { @@ -388,7 +387,7 @@ esw_setup_indir_table(struct mlx5_flow_destination *dest, dest[*i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; dest[*i].ft = mlx5_esw_indir_table_get(esw, attr, - esw_attr->dests[j].rep->vport, false); + esw_attr->dests[j].vport, false); if (IS_ERR(dest[*i].ft)) { err = PTR_ERR(dest[*i].ft); goto err_indir_tbl_get; @@ -432,11 +431,11 @@ static bool esw_setup_uplink_fwd_ipsec_needed(struct mlx5_eswitch *esw, int attr_idx) { if (esw->offloads.ft_ipsec_tx_pol && - esw_attr->dests[attr_idx].rep && - esw_attr->dests[attr_idx].rep->vport == MLX5_VPORT_UPLINK && + esw_attr->dests[attr_idx].vport_valid && + esw_attr->dests[attr_idx].vport == MLX5_VPORT_UPLINK && /* To be aligned with software, encryption is needed only for tunnel device */ (esw_attr->dests[attr_idx].flags & MLX5_ESW_DEST_ENCAP_VALID) && - esw_attr->dests[attr_idx].rep != esw_attr->in_rep && + esw_attr->dests[attr_idx].vport != esw_attr->in_rep->vport && esw_same_vhca_id(esw_attr->dests[attr_idx].mdev, esw->dev)) return true; @@ -469,7 +468,7 @@ esw_setup_dest_fwd_vport(struct mlx5_flow_destination *dest, struct mlx5_flow_ac int attr_idx, int dest_idx, bool pkt_reformat) { dest[dest_idx].type = MLX5_FLOW_DESTINATION_TYPE_VPORT; - dest[dest_idx].vport.num = esw_attr->dests[attr_idx].rep->vport; + dest[dest_idx].vport.num = esw_attr->dests[attr_idx].vport; if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) { dest[dest_idx].vport.vhca_id = MLX5_CAP_GEN(esw_attr->dests[attr_idx].mdev, vhca_id); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c index edd910258314..40bdc677f051 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c @@ -233,8 +233,8 @@ mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw, /* hairpin */ for (i = esw_attr->split_count; i < esw_attr->out_count; i++) - if (!esw_attr->dest_int_port && esw_attr->dests[i].rep && - esw_attr->dests[i].rep->vport == MLX5_VPORT_UPLINK) + if (!esw_attr->dest_int_port && esw_attr->dests[i].vport_valid && + esw_attr->dests[i].vport == MLX5_VPORT_UPLINK) return true; return false; From 86d5922679f3b6d02a64df66cdd777fdd4ea5c0d Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 13 Dec 2023 17:08:17 +0300 Subject: [PATCH 27/69] net/mlx5e: Fix error code in mlx5e_tc_action_miss_mapping_get() Preserve the error code if esw_add_restore_rule() fails. Don't return success. Fixes: 6702782845a5 ("net/mlx5e: TC, Set CT miss to the specific ct action instance") Signed-off-by: Dan Carpenter Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 4809a66f3491..6106bbbe14de 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -5736,8 +5736,10 @@ int mlx5e_tc_action_miss_mapping_get(struct mlx5e_priv *priv, struct mlx5_flow_a esw = priv->mdev->priv.eswitch; attr->act_id_restore_rule = esw_add_restore_rule(esw, *act_miss_mapping); - if (IS_ERR(attr->act_id_restore_rule)) + if (IS_ERR(attr->act_id_restore_rule)) { + err = PTR_ERR(attr->act_id_restore_rule); goto err_rule; + } return 0; From d792e5f7f19b95f5ce41ac49df5ead4d280238f4 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 13 Dec 2023 17:08:57 +0300 Subject: [PATCH 28/69] net/mlx5e: Fix error codes in alloc_branch_attr() Set the error code if set_branch_dest_ft() fails. Fixes: ccbe33003b10 ("net/mlx5e: TC, Don't offload post action rule if not supported") Signed-off-by: Dan Carpenter Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 6106bbbe14de..96af9e2ab1d8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -3778,7 +3778,8 @@ alloc_branch_attr(struct mlx5e_tc_flow *flow, break; case FLOW_ACTION_ACCEPT: case FLOW_ACTION_PIPE: - if (set_branch_dest_ft(flow->priv, attr)) + err = set_branch_dest_ft(flow->priv, attr); + if (err) goto out_err; break; case FLOW_ACTION_JUMP: @@ -3788,7 +3789,8 @@ alloc_branch_attr(struct mlx5e_tc_flow *flow, goto out_err; } *jump_count = cond->extval; - if (set_branch_dest_ft(flow->priv, attr)) + err = set_branch_dest_ft(flow->priv, attr); + if (err) goto out_err; break; default: From ad436b9c1270c40554e274f067f1b78fcc06a004 Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Tue, 21 Nov 2023 15:00:21 -0800 Subject: [PATCH 29/69] net/mlx5e: Correct snprintf truncation handling for fw_version buffer snprintf returns the length of the formatted string, excluding the trailing null, without accounting for truncation. This means that is the return value is greater than or equal to the size parameter, the fw_version string was truncated. Reported-by: David Laight Closes: https://lore.kernel.org/netdev/81cae734ee1b4cde9b380a9a31006c1a@AcuMS.aculab.com/ Link: https://docs.kernel.org/core-api/kernel-api.html#c.snprintf Fixes: 41e63c2baa11 ("net/mlx5e: Check return value of snprintf writing to fw_version buffer") Signed-off-by: Rahul Rameshbabu Reviewed-by: Simon Horman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 792a0ea544cd..c7c1b667b105 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -49,7 +49,7 @@ void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv, count = snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), "%d.%d.%04d (%.16s)", fw_rev_maj(mdev), fw_rev_min(mdev), fw_rev_sub(mdev), mdev->board_id); - if (count == sizeof(drvinfo->fw_version)) + if (count >= sizeof(drvinfo->fw_version)) snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), "%d.%d.%04d", fw_rev_maj(mdev), fw_rev_min(mdev), fw_rev_sub(mdev)); From b13559b76157de9d74f04d3ca0e49d69de3b5675 Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Tue, 21 Nov 2023 15:00:22 -0800 Subject: [PATCH 30/69] net/mlx5e: Correct snprintf truncation handling for fw_version buffer used by representors snprintf returns the length of the formatted string, excluding the trailing null, without accounting for truncation. This means that is the return value is greater than or equal to the size parameter, the fw_version string was truncated. Link: https://docs.kernel.org/core-api/kernel-api.html#c.snprintf Fixes: 1b2bd0c0264f ("net/mlx5e: Check return value of snprintf writing to fw_version buffer for representors") Signed-off-by: Rahul Rameshbabu Reviewed-by: Simon Horman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 1bf7540a65ad..e92d4f83592e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -78,7 +78,7 @@ static void mlx5e_rep_get_drvinfo(struct net_device *dev, count = snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), "%d.%d.%04d (%.16s)", fw_rev_maj(mdev), fw_rev_min(mdev), fw_rev_sub(mdev), mdev->board_id); - if (count == sizeof(drvinfo->fw_version)) + if (count >= sizeof(drvinfo->fw_version)) snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), "%d.%d.%04d", fw_rev_maj(mdev), fw_rev_min(mdev), fw_rev_sub(mdev)); From 3c2a8ebe3fe66a5f77d4c164a0bea8e2ff37b455 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 14 Dec 2023 09:08:16 +0100 Subject: [PATCH 31/69] wifi: cfg80211: fix certs build to not depend on file order The file for the new certificate (Chen-Yu Tsai's) didn't end with a comma, so depending on the file order in the build rule, we'd end up with invalid C when concatenating the (now two) certificates. Fix that. Cc: stable@vger.kernel.org Reported-by: Biju Das Reported-by: Naresh Kamboju Fixes: fb768d3b13ff ("wifi: cfg80211: Add my certificate") Signed-off-by: Johannes Berg --- net/wireless/certs/wens.hex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/certs/wens.hex b/net/wireless/certs/wens.hex index ccd5b5dc3360..0d50369bede9 100644 --- a/net/wireless/certs/wens.hex +++ b/net/wireless/certs/wens.hex @@ -84,4 +84,4 @@ 0xf0, 0xc7, 0x83, 0xbb, 0xa2, 0x81, 0x03, 0x2d, 0xd4, 0x2a, 0x63, 0x3f, 0xf7, 0x31, 0x2e, 0x40, 0x33, 0x5c, 0x46, 0xbc, 0x9b, 0xc1, 0x05, 0xa5, -0x45, 0x4e, 0xc3 +0x45, 0x4e, 0xc3, From 52eda4641d041667fa059f4855c5f88dcebd8afe Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 14 Dec 2023 02:09:01 +0200 Subject: [PATCH 32/69] net: mscc: ocelot: fix eMAC TX RMON stats for bucket 256-511 and above There is a typo in the driver due to which we report incorrect TX RMON counters for the 256-511 octet bucket and all the other buckets larger than that. Bug found with the selftest at https://patchwork.kernel.org/project/netdevbpf/patch/20231211223346.2497157-9-tobias@waldekranz.com/ Fixes: e32036e1ae7b ("net: mscc: ocelot: add support for all sorts of standardized counters present in DSA") Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20231214000902.545625-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mscc/ocelot_stats.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot_stats.c b/drivers/net/ethernet/mscc/ocelot_stats.c index 5c55197c7327..f29fa37263da 100644 --- a/drivers/net/ethernet/mscc/ocelot_stats.c +++ b/drivers/net/ethernet/mscc/ocelot_stats.c @@ -582,10 +582,10 @@ static void ocelot_port_rmon_stats_cb(struct ocelot *ocelot, int port, void *pri rmon_stats->hist_tx[0] = s[OCELOT_STAT_TX_64]; rmon_stats->hist_tx[1] = s[OCELOT_STAT_TX_65_127]; rmon_stats->hist_tx[2] = s[OCELOT_STAT_TX_128_255]; - rmon_stats->hist_tx[3] = s[OCELOT_STAT_TX_128_255]; - rmon_stats->hist_tx[4] = s[OCELOT_STAT_TX_256_511]; - rmon_stats->hist_tx[5] = s[OCELOT_STAT_TX_512_1023]; - rmon_stats->hist_tx[6] = s[OCELOT_STAT_TX_1024_1526]; + rmon_stats->hist_tx[3] = s[OCELOT_STAT_TX_256_511]; + rmon_stats->hist_tx[4] = s[OCELOT_STAT_TX_512_1023]; + rmon_stats->hist_tx[5] = s[OCELOT_STAT_TX_1024_1526]; + rmon_stats->hist_tx[6] = s[OCELOT_STAT_TX_1527_MAX]; } static void ocelot_port_pmac_rmon_stats_cb(struct ocelot *ocelot, int port, From 70f010da00f90415296f93fb47a561977eae41cb Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 14 Dec 2023 02:09:02 +0200 Subject: [PATCH 33/69] net: mscc: ocelot: fix pMAC TX RMON stats for bucket 256-511 and above The typo from ocelot_port_rmon_stats_cb() was also carried over to ocelot_port_pmac_rmon_stats_cb() as well, leading to incorrect TX RMON stats for the pMAC too. Fixes: ab3f97a9610a ("net: mscc: ocelot: export ethtool MAC Merge stats for Felix VSC9959") Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20231214000902.545625-2-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mscc/ocelot_stats.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot_stats.c b/drivers/net/ethernet/mscc/ocelot_stats.c index f29fa37263da..c018783757fb 100644 --- a/drivers/net/ethernet/mscc/ocelot_stats.c +++ b/drivers/net/ethernet/mscc/ocelot_stats.c @@ -610,10 +610,10 @@ static void ocelot_port_pmac_rmon_stats_cb(struct ocelot *ocelot, int port, rmon_stats->hist_tx[0] = s[OCELOT_STAT_TX_PMAC_64]; rmon_stats->hist_tx[1] = s[OCELOT_STAT_TX_PMAC_65_127]; rmon_stats->hist_tx[2] = s[OCELOT_STAT_TX_PMAC_128_255]; - rmon_stats->hist_tx[3] = s[OCELOT_STAT_TX_PMAC_128_255]; - rmon_stats->hist_tx[4] = s[OCELOT_STAT_TX_PMAC_256_511]; - rmon_stats->hist_tx[5] = s[OCELOT_STAT_TX_PMAC_512_1023]; - rmon_stats->hist_tx[6] = s[OCELOT_STAT_TX_PMAC_1024_1526]; + rmon_stats->hist_tx[3] = s[OCELOT_STAT_TX_PMAC_256_511]; + rmon_stats->hist_tx[4] = s[OCELOT_STAT_TX_PMAC_512_1023]; + rmon_stats->hist_tx[5] = s[OCELOT_STAT_TX_PMAC_1024_1526]; + rmon_stats->hist_tx[6] = s[OCELOT_STAT_TX_PMAC_1527_MAX]; } void ocelot_port_get_rmon_stats(struct ocelot *ocelot, int port, From 738b54b9b6236f573eed2453c4cbfa77326793e2 Mon Sep 17 00:00:00 2001 From: duanqiangwen Date: Thu, 14 Dec 2023 10:33:37 +0800 Subject: [PATCH 34/69] net: libwx: fix memory leak on free page ifconfig ethx up, will set page->refcount larger than 1, and then ifconfig ethx down, calling __page_frag_cache_drain() to free pages, it is not compatible with page pool. So deleting codes which changing page->refcount. Fixes: 3c47e8ae113a ("net: libwx: Support to receive packets in NAPI") Signed-off-by: duanqiangwen Signed-off-by: David S. Miller --- drivers/net/ethernet/wangxun/libwx/wx_lib.c | 82 ++------------------ drivers/net/ethernet/wangxun/libwx/wx_type.h | 1 - 2 files changed, 6 insertions(+), 77 deletions(-) diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_lib.c index a5a50b5a8816..347d3cec02a3 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_lib.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.c @@ -160,60 +160,6 @@ static __le32 wx_test_staterr(union wx_rx_desc *rx_desc, return rx_desc->wb.upper.status_error & cpu_to_le32(stat_err_bits); } -static bool wx_can_reuse_rx_page(struct wx_rx_buffer *rx_buffer, - int rx_buffer_pgcnt) -{ - unsigned int pagecnt_bias = rx_buffer->pagecnt_bias; - struct page *page = rx_buffer->page; - - /* avoid re-using remote and pfmemalloc pages */ - if (!dev_page_is_reusable(page)) - return false; - -#if (PAGE_SIZE < 8192) - /* if we are only owner of page we can reuse it */ - if (unlikely((rx_buffer_pgcnt - pagecnt_bias) > 1)) - return false; -#endif - - /* If we have drained the page fragment pool we need to update - * the pagecnt_bias and page count so that we fully restock the - * number of references the driver holds. - */ - if (unlikely(pagecnt_bias == 1)) { - page_ref_add(page, USHRT_MAX - 1); - rx_buffer->pagecnt_bias = USHRT_MAX; - } - - return true; -} - -/** - * wx_reuse_rx_page - page flip buffer and store it back on the ring - * @rx_ring: rx descriptor ring to store buffers on - * @old_buff: donor buffer to have page reused - * - * Synchronizes page for reuse by the adapter - **/ -static void wx_reuse_rx_page(struct wx_ring *rx_ring, - struct wx_rx_buffer *old_buff) -{ - u16 nta = rx_ring->next_to_alloc; - struct wx_rx_buffer *new_buff; - - new_buff = &rx_ring->rx_buffer_info[nta]; - - /* update, and store next to alloc */ - nta++; - rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; - - /* transfer page from old buffer to new buffer */ - new_buff->page = old_buff->page; - new_buff->page_dma = old_buff->page_dma; - new_buff->page_offset = old_buff->page_offset; - new_buff->pagecnt_bias = old_buff->pagecnt_bias; -} - static void wx_dma_sync_frag(struct wx_ring *rx_ring, struct wx_rx_buffer *rx_buffer) { @@ -270,8 +216,6 @@ static struct wx_rx_buffer *wx_get_rx_buffer(struct wx_ring *rx_ring, size, DMA_FROM_DEVICE); skip_sync: - rx_buffer->pagecnt_bias--; - return rx_buffer; } @@ -280,19 +224,9 @@ static void wx_put_rx_buffer(struct wx_ring *rx_ring, struct sk_buff *skb, int rx_buffer_pgcnt) { - if (wx_can_reuse_rx_page(rx_buffer, rx_buffer_pgcnt)) { - /* hand second half of page back to the ring */ - wx_reuse_rx_page(rx_ring, rx_buffer); - } else { - if (!IS_ERR(skb) && WX_CB(skb)->dma == rx_buffer->dma) - /* the page has been released from the ring */ - WX_CB(skb)->page_released = true; - else - page_pool_put_full_page(rx_ring->page_pool, rx_buffer->page, false); - - __page_frag_cache_drain(rx_buffer->page, - rx_buffer->pagecnt_bias); - } + if (!IS_ERR(skb) && WX_CB(skb)->dma == rx_buffer->dma) + /* the page has been released from the ring */ + WX_CB(skb)->page_released = true; /* clear contents of rx_buffer */ rx_buffer->page = NULL; @@ -335,11 +269,12 @@ static struct sk_buff *wx_build_skb(struct wx_ring *rx_ring, if (size <= WX_RXBUFFER_256) { memcpy(__skb_put(skb, size), page_addr, ALIGN(size, sizeof(long))); - rx_buffer->pagecnt_bias++; - + page_pool_put_full_page(rx_ring->page_pool, rx_buffer->page, true); return skb; } + skb_mark_for_recycle(skb); + if (!wx_test_staterr(rx_desc, WX_RXD_STAT_EOP)) WX_CB(skb)->dma = rx_buffer->dma; @@ -382,8 +317,6 @@ static bool wx_alloc_mapped_page(struct wx_ring *rx_ring, bi->page_dma = dma; bi->page = page; bi->page_offset = 0; - page_ref_add(page, USHRT_MAX - 1); - bi->pagecnt_bias = USHRT_MAX; return true; } @@ -723,7 +656,6 @@ static int wx_clean_rx_irq(struct wx_q_vector *q_vector, /* exit if we failed to retrieve a buffer */ if (!skb) { rx_ring->rx_stats.alloc_rx_buff_failed++; - rx_buffer->pagecnt_bias++; break; } @@ -2248,8 +2180,6 @@ static void wx_clean_rx_ring(struct wx_ring *rx_ring) /* free resources associated with mapping */ page_pool_put_full_page(rx_ring->page_pool, rx_buffer->page, false); - __page_frag_cache_drain(rx_buffer->page, - rx_buffer->pagecnt_bias); i++; rx_buffer++; diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h index 165e82de772e..83f9bb7b3c22 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_type.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h @@ -787,7 +787,6 @@ struct wx_rx_buffer { dma_addr_t page_dma; struct page *page; unsigned int page_offset; - u16 pagecnt_bias; }; struct wx_queue_stats { From 8c97ab5448f2096daba11edf8d18a44e1eb6f31d Mon Sep 17 00:00:00 2001 From: Suman Ghosh Date: Wed, 13 Dec 2023 23:40:44 +0530 Subject: [PATCH 35/69] octeontx2-pf: Fix graceful exit during PFC configuration failure During PFC configuration failure the code was not handling a graceful exit. This patch fixes the same and add proper code for a graceful exit. Fixes: 99c969a83d82 ("octeontx2-pf: Add egress PFC support") Signed-off-by: Suman Ghosh Signed-off-by: David S. Miller --- .../ethernet/marvell/octeontx2/nic/otx2_dcbnl.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c index bfddbff7bcdf..28fb643d2917 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c @@ -399,9 +399,10 @@ static int otx2_dcbnl_ieee_getpfc(struct net_device *dev, struct ieee_pfc *pfc) static int otx2_dcbnl_ieee_setpfc(struct net_device *dev, struct ieee_pfc *pfc) { struct otx2_nic *pfvf = netdev_priv(dev); + u8 old_pfc_en; int err; - /* Save PFC configuration to interface */ + old_pfc_en = pfvf->pfc_en; pfvf->pfc_en = pfc->pfc_en; if (pfvf->hw.tx_queues >= NIX_PF_PFC_PRIO_MAX) @@ -411,13 +412,17 @@ static int otx2_dcbnl_ieee_setpfc(struct net_device *dev, struct ieee_pfc *pfc) * supported by the tx queue configuration */ err = otx2_check_pfc_config(pfvf); - if (err) + if (err) { + pfvf->pfc_en = old_pfc_en; return err; + } process_pfc: err = otx2_config_priority_flow_ctrl(pfvf); - if (err) + if (err) { + pfvf->pfc_en = old_pfc_en; return err; + } /* Request Per channel Bpids */ if (pfc->pfc_en) @@ -425,6 +430,12 @@ process_pfc: err = otx2_pfc_txschq_update(pfvf); if (err) { + if (pfc->pfc_en) + otx2_nix_config_bp(pfvf, false); + + otx2_pfc_txschq_stop(pfvf); + pfvf->pfc_en = old_pfc_en; + otx2_config_priority_flow_ctrl(pfvf); dev_err(pfvf->dev, "%s failed to update TX schedulers\n", __func__); return err; } From cac23b7d7627915d967ce25436d7aae26e88ed06 Mon Sep 17 00:00:00 2001 From: Shigeru Yoshida Date: Thu, 14 Dec 2023 14:09:22 +0900 Subject: [PATCH 36/69] net: Return error from sk_stream_wait_connect() if sk_wait_event() fails The following NULL pointer dereference issue occurred: BUG: kernel NULL pointer dereference, address: 0000000000000000 <...> RIP: 0010:ccid_hc_tx_send_packet net/dccp/ccid.h:166 [inline] RIP: 0010:dccp_write_xmit+0x49/0x140 net/dccp/output.c:356 <...> Call Trace: dccp_sendmsg+0x642/0x7e0 net/dccp/proto.c:801 inet_sendmsg+0x63/0x90 net/ipv4/af_inet.c:846 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x83/0xe0 net/socket.c:745 ____sys_sendmsg+0x443/0x510 net/socket.c:2558 ___sys_sendmsg+0xe5/0x150 net/socket.c:2612 __sys_sendmsg+0xa6/0x120 net/socket.c:2641 __do_sys_sendmsg net/socket.c:2650 [inline] __se_sys_sendmsg net/socket.c:2648 [inline] __x64_sys_sendmsg+0x45/0x50 net/socket.c:2648 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x43/0x110 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x63/0x6b sk_wait_event() returns an error (-EPIPE) if disconnect() is called on the socket waiting for the event. However, sk_stream_wait_connect() returns success, i.e. zero, even if sk_wait_event() returns -EPIPE, so a function that waits for a connection with sk_stream_wait_connect() may misbehave. In the case of the above DCCP issue, dccp_sendmsg() is waiting for the connection. If disconnect() is called in concurrently, the above issue occurs. This patch fixes the issue by returning error from sk_stream_wait_connect() if sk_wait_event() fails. Fixes: 419ce133ab92 ("tcp: allow again tcp_disconnect() when threads are waiting") Signed-off-by: Shigeru Yoshida Reviewed-by: Kuniyuki Iwashima Reported-by: syzbot+c71bc336c5061153b502@syzkaller.appspotmail.com Reviewed-by: Eric Dumazet Reported-by: syzbot Reported-by: syzkaller Signed-off-by: David S. Miller --- net/core/stream.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/stream.c b/net/core/stream.c index 96fbcb9bbb30..b16dfa568a2d 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -79,7 +79,7 @@ int sk_stream_wait_connect(struct sock *sk, long *timeo_p) remove_wait_queue(sk_sleep(sk), &wait); sk->sk_write_pending--; } while (!done); - return 0; + return done < 0 ? done : 0; } EXPORT_SYMBOL(sk_stream_wait_connect); From 19391a2ca98baa7b80279306cdf7dd43f81fa595 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 14 Dec 2023 11:30:38 +0000 Subject: [PATCH 37/69] net: sched: ife: fix potential use-after-free ife_decode() calls pskb_may_pull() two times, we need to reload ifehdr after the second one, or risk use-after-free as reported by syzbot: BUG: KASAN: slab-use-after-free in __ife_tlv_meta_valid net/ife/ife.c:108 [inline] BUG: KASAN: slab-use-after-free in ife_tlv_meta_decode+0x1d1/0x210 net/ife/ife.c:131 Read of size 2 at addr ffff88802d7300a4 by task syz-executor.5/22323 CPU: 0 PID: 22323 Comm: syz-executor.5 Not tainted 6.7.0-rc3-syzkaller-00804-g074ac38d5b95 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 11/10/2023 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xd9/0x1b0 lib/dump_stack.c:106 print_address_description mm/kasan/report.c:364 [inline] print_report+0xc4/0x620 mm/kasan/report.c:475 kasan_report+0xda/0x110 mm/kasan/report.c:588 __ife_tlv_meta_valid net/ife/ife.c:108 [inline] ife_tlv_meta_decode+0x1d1/0x210 net/ife/ife.c:131 tcf_ife_decode net/sched/act_ife.c:739 [inline] tcf_ife_act+0x4e3/0x1cd0 net/sched/act_ife.c:879 tc_act include/net/tc_wrapper.h:221 [inline] tcf_action_exec+0x1ac/0x620 net/sched/act_api.c:1079 tcf_exts_exec include/net/pkt_cls.h:344 [inline] mall_classify+0x201/0x310 net/sched/cls_matchall.c:42 tc_classify include/net/tc_wrapper.h:227 [inline] __tcf_classify net/sched/cls_api.c:1703 [inline] tcf_classify+0x82f/0x1260 net/sched/cls_api.c:1800 hfsc_classify net/sched/sch_hfsc.c:1147 [inline] hfsc_enqueue+0x315/0x1060 net/sched/sch_hfsc.c:1546 dev_qdisc_enqueue+0x3f/0x230 net/core/dev.c:3739 __dev_xmit_skb net/core/dev.c:3828 [inline] __dev_queue_xmit+0x1de1/0x3d30 net/core/dev.c:4311 dev_queue_xmit include/linux/netdevice.h:3165 [inline] packet_xmit+0x237/0x350 net/packet/af_packet.c:276 packet_snd net/packet/af_packet.c:3081 [inline] packet_sendmsg+0x24aa/0x5200 net/packet/af_packet.c:3113 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0xd5/0x180 net/socket.c:745 __sys_sendto+0x255/0x340 net/socket.c:2190 __do_sys_sendto net/socket.c:2202 [inline] __se_sys_sendto net/socket.c:2198 [inline] __x64_sys_sendto+0xe0/0x1b0 net/socket.c:2198 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x40/0x110 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x63/0x6b RIP: 0033:0x7fe9acc7cae9 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 e1 20 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b0 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007fe9ada450c8 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 00007fe9acd9bf80 RCX: 00007fe9acc7cae9 RDX: 000000000000fce0 RSI: 00000000200002c0 RDI: 0000000000000003 RBP: 00007fe9accc847a R08: 0000000020000140 R09: 0000000000000014 R10: 0000000000000004 R11: 0000000000000246 R12: 0000000000000000 R13: 000000000000000b R14: 00007fe9acd9bf80 R15: 00007ffd5427ae78 Allocated by task 22323: kasan_save_stack+0x33/0x50 mm/kasan/common.c:45 kasan_set_track+0x25/0x30 mm/kasan/common.c:52 ____kasan_kmalloc mm/kasan/common.c:374 [inline] __kasan_kmalloc+0xa2/0xb0 mm/kasan/common.c:383 kasan_kmalloc include/linux/kasan.h:198 [inline] __do_kmalloc_node mm/slab_common.c:1007 [inline] __kmalloc_node_track_caller+0x5a/0x90 mm/slab_common.c:1027 kmalloc_reserve+0xef/0x260 net/core/skbuff.c:582 __alloc_skb+0x12b/0x330 net/core/skbuff.c:651 alloc_skb include/linux/skbuff.h:1298 [inline] alloc_skb_with_frags+0xe4/0x710 net/core/skbuff.c:6331 sock_alloc_send_pskb+0x7e4/0x970 net/core/sock.c:2780 packet_alloc_skb net/packet/af_packet.c:2930 [inline] packet_snd net/packet/af_packet.c:3024 [inline] packet_sendmsg+0x1e2a/0x5200 net/packet/af_packet.c:3113 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0xd5/0x180 net/socket.c:745 __sys_sendto+0x255/0x340 net/socket.c:2190 __do_sys_sendto net/socket.c:2202 [inline] __se_sys_sendto net/socket.c:2198 [inline] __x64_sys_sendto+0xe0/0x1b0 net/socket.c:2198 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x40/0x110 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x63/0x6b Freed by task 22323: kasan_save_stack+0x33/0x50 mm/kasan/common.c:45 kasan_set_track+0x25/0x30 mm/kasan/common.c:52 kasan_save_free_info+0x2b/0x40 mm/kasan/generic.c:522 ____kasan_slab_free mm/kasan/common.c:236 [inline] ____kasan_slab_free+0x15b/0x1b0 mm/kasan/common.c:200 kasan_slab_free include/linux/kasan.h:164 [inline] slab_free_hook mm/slub.c:1800 [inline] slab_free_freelist_hook+0x114/0x1e0 mm/slub.c:1826 slab_free mm/slub.c:3809 [inline] __kmem_cache_free+0xc0/0x180 mm/slub.c:3822 skb_kfree_head net/core/skbuff.c:950 [inline] skb_free_head+0x110/0x1b0 net/core/skbuff.c:962 pskb_expand_head+0x3c5/0x1170 net/core/skbuff.c:2130 __pskb_pull_tail+0xe1/0x1830 net/core/skbuff.c:2655 pskb_may_pull_reason include/linux/skbuff.h:2685 [inline] pskb_may_pull include/linux/skbuff.h:2693 [inline] ife_decode+0x394/0x4f0 net/ife/ife.c:82 tcf_ife_decode net/sched/act_ife.c:727 [inline] tcf_ife_act+0x43b/0x1cd0 net/sched/act_ife.c:879 tc_act include/net/tc_wrapper.h:221 [inline] tcf_action_exec+0x1ac/0x620 net/sched/act_api.c:1079 tcf_exts_exec include/net/pkt_cls.h:344 [inline] mall_classify+0x201/0x310 net/sched/cls_matchall.c:42 tc_classify include/net/tc_wrapper.h:227 [inline] __tcf_classify net/sched/cls_api.c:1703 [inline] tcf_classify+0x82f/0x1260 net/sched/cls_api.c:1800 hfsc_classify net/sched/sch_hfsc.c:1147 [inline] hfsc_enqueue+0x315/0x1060 net/sched/sch_hfsc.c:1546 dev_qdisc_enqueue+0x3f/0x230 net/core/dev.c:3739 __dev_xmit_skb net/core/dev.c:3828 [inline] __dev_queue_xmit+0x1de1/0x3d30 net/core/dev.c:4311 dev_queue_xmit include/linux/netdevice.h:3165 [inline] packet_xmit+0x237/0x350 net/packet/af_packet.c:276 packet_snd net/packet/af_packet.c:3081 [inline] packet_sendmsg+0x24aa/0x5200 net/packet/af_packet.c:3113 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0xd5/0x180 net/socket.c:745 __sys_sendto+0x255/0x340 net/socket.c:2190 __do_sys_sendto net/socket.c:2202 [inline] __se_sys_sendto net/socket.c:2198 [inline] __x64_sys_sendto+0xe0/0x1b0 net/socket.c:2198 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x40/0x110 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x63/0x6b The buggy address belongs to the object at ffff88802d730000 which belongs to the cache kmalloc-8k of size 8192 The buggy address is located 164 bytes inside of freed 8192-byte region [ffff88802d730000, ffff88802d732000) The buggy address belongs to the physical page: page:ffffea0000b5cc00 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x2d730 head:ffffea0000b5cc00 order:3 entire_mapcount:0 nr_pages_mapped:0 pincount:0 flags: 0xfff00000000840(slab|head|node=0|zone=1|lastcpupid=0x7ff) page_type: 0xffffffff() raw: 00fff00000000840 ffff888013042280 dead000000000122 0000000000000000 raw: 0000000000000000 0000000080020002 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected page_owner tracks the page as allocated page last allocated via order 3, migratetype Unmovable, gfp_mask 0x1d20c0(__GFP_IO|__GFP_FS|__GFP_NOWARN|__GFP_NORETRY|__GFP_COMP|__GFP_NOMEMALLOC|__GFP_HARDWALL), pid 22323, tgid 22320 (syz-executor.5), ts 950317230369, free_ts 950233467461 set_page_owner include/linux/page_owner.h:31 [inline] post_alloc_hook+0x2d0/0x350 mm/page_alloc.c:1544 prep_new_page mm/page_alloc.c:1551 [inline] get_page_from_freelist+0xa28/0x3730 mm/page_alloc.c:3319 __alloc_pages+0x22e/0x2420 mm/page_alloc.c:4575 alloc_pages_mpol+0x258/0x5f0 mm/mempolicy.c:2133 alloc_slab_page mm/slub.c:1870 [inline] allocate_slab mm/slub.c:2017 [inline] new_slab+0x283/0x3c0 mm/slub.c:2070 ___slab_alloc+0x979/0x1500 mm/slub.c:3223 __slab_alloc.constprop.0+0x56/0xa0 mm/slub.c:3322 __slab_alloc_node mm/slub.c:3375 [inline] slab_alloc_node mm/slub.c:3468 [inline] __kmem_cache_alloc_node+0x131/0x310 mm/slub.c:3517 __do_kmalloc_node mm/slab_common.c:1006 [inline] __kmalloc_node_track_caller+0x4a/0x90 mm/slab_common.c:1027 kmalloc_reserve+0xef/0x260 net/core/skbuff.c:582 __alloc_skb+0x12b/0x330 net/core/skbuff.c:651 alloc_skb include/linux/skbuff.h:1298 [inline] alloc_skb_with_frags+0xe4/0x710 net/core/skbuff.c:6331 sock_alloc_send_pskb+0x7e4/0x970 net/core/sock.c:2780 packet_alloc_skb net/packet/af_packet.c:2930 [inline] packet_snd net/packet/af_packet.c:3024 [inline] packet_sendmsg+0x1e2a/0x5200 net/packet/af_packet.c:3113 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0xd5/0x180 net/socket.c:745 __sys_sendto+0x255/0x340 net/socket.c:2190 page last free stack trace: reset_page_owner include/linux/page_owner.h:24 [inline] free_pages_prepare mm/page_alloc.c:1144 [inline] free_unref_page_prepare+0x53c/0xb80 mm/page_alloc.c:2354 free_unref_page+0x33/0x3b0 mm/page_alloc.c:2494 __unfreeze_partials+0x226/0x240 mm/slub.c:2655 qlink_free mm/kasan/quarantine.c:168 [inline] qlist_free_all+0x6a/0x170 mm/kasan/quarantine.c:187 kasan_quarantine_reduce+0x18e/0x1d0 mm/kasan/quarantine.c:294 __kasan_slab_alloc+0x65/0x90 mm/kasan/common.c:305 kasan_slab_alloc include/linux/kasan.h:188 [inline] slab_post_alloc_hook mm/slab.h:763 [inline] slab_alloc_node mm/slub.c:3478 [inline] slab_alloc mm/slub.c:3486 [inline] __kmem_cache_alloc_lru mm/slub.c:3493 [inline] kmem_cache_alloc_lru+0x219/0x6f0 mm/slub.c:3509 alloc_inode_sb include/linux/fs.h:2937 [inline] ext4_alloc_inode+0x28/0x650 fs/ext4/super.c:1408 alloc_inode+0x5d/0x220 fs/inode.c:261 new_inode_pseudo fs/inode.c:1006 [inline] new_inode+0x22/0x260 fs/inode.c:1032 __ext4_new_inode+0x333/0x5200 fs/ext4/ialloc.c:958 ext4_symlink+0x5d7/0xa20 fs/ext4/namei.c:3398 vfs_symlink fs/namei.c:4464 [inline] vfs_symlink+0x3e5/0x620 fs/namei.c:4448 do_symlinkat+0x25f/0x310 fs/namei.c:4490 __do_sys_symlinkat fs/namei.c:4506 [inline] __se_sys_symlinkat fs/namei.c:4503 [inline] __x64_sys_symlinkat+0x97/0xc0 fs/namei.c:4503 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x40/0x110 arch/x86/entry/common.c:82 Fixes: d57493d6d1be ("net: sched: ife: check on metadata length") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Jamal Hadi Salim Cc: Alexander Aring Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/ife/ife.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ife/ife.c b/net/ife/ife.c index 13bbf8cb6a39..be05b690b9ef 100644 --- a/net/ife/ife.c +++ b/net/ife/ife.c @@ -82,6 +82,7 @@ void *ife_decode(struct sk_buff *skb, u16 *metalen) if (unlikely(!pskb_may_pull(skb, total_pull))) return NULL; + ifehdr = (struct ifeheadr *)(skb->data + skb->dev->hard_header_len); skb_set_mac_header(skb, total_pull); __skb_pull(skb, total_pull); *metalen = ifehdrln - IFE_METAHDRLEN; From 309fdb1c33fe726d92d0030481346f24e1b01f07 Mon Sep 17 00:00:00 2001 From: Zhipeng Lu Date: Thu, 14 Dec 2023 21:04:04 +0800 Subject: [PATCH 38/69] ethernet: atheros: fix a memleak in atl1e_setup_ring_resources In the error handling of 'offset > adapter->ring_size', the tx_ring->tx_buffer allocated by kzalloc should be freed, instead of 'goto failed' instantly. Fixes: a6a5325239c2 ("atl1e: Atheros L1E Gigabit Ethernet driver") Signed-off-by: Zhipeng Lu Reviewed-by: Suman Ghosh Signed-off-by: David S. Miller --- drivers/net/ethernet/atheros/atl1e/atl1e_main.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c index 5935be190b9e..5f2a6fcba967 100644 --- a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c +++ b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c @@ -866,10 +866,13 @@ static int atl1e_setup_ring_resources(struct atl1e_adapter *adapter) netdev_err(adapter->netdev, "offset(%d) > ring size(%d) !!\n", offset, adapter->ring_size); err = -1; - goto failed; + goto free_buffer; } return 0; +free_buffer: + kfree(tx_ring->tx_buffer); + tx_ring->tx_buffer = NULL; failed: if (adapter->ring_vir_addr != NULL) { dma_free_coherent(&pdev->dev, adapter->ring_size, From 64b8bc7d5f1434c636a40bdcfcd42b278d1714be Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 14 Dec 2023 15:27:47 +0000 Subject: [PATCH 39/69] net/rose: fix races in rose_kill_by_device() syzbot found an interesting netdev refcounting issue in net/rose/af_rose.c, thanks to CONFIG_NET_DEV_REFCNT_TRACKER=y [1] Problem is that rose_kill_by_device() can change rose->device while other threads do not expect the pointer to be changed. We have to first collect sockets in a temporary array, then perform the changes while holding the socket lock and rose_list_lock spinlock (in this order) Change rose_release() to also acquire rose_list_lock before releasing the netdev refcount. [1] [ 1185.055088][ T7889] ref_tracker: reference already released. [ 1185.061476][ T7889] ref_tracker: allocated in: [ 1185.066081][ T7889] rose_bind+0x4ab/0xd10 [ 1185.070446][ T7889] __sys_bind+0x1ec/0x220 [ 1185.074818][ T7889] __x64_sys_bind+0x72/0xb0 [ 1185.079356][ T7889] do_syscall_64+0x40/0x110 [ 1185.083897][ T7889] entry_SYSCALL_64_after_hwframe+0x63/0x6b [ 1185.089835][ T7889] ref_tracker: freed in: [ 1185.094088][ T7889] rose_release+0x2f5/0x570 [ 1185.098629][ T7889] __sock_release+0xae/0x260 [ 1185.103262][ T7889] sock_close+0x1c/0x20 [ 1185.107453][ T7889] __fput+0x270/0xbb0 [ 1185.111467][ T7889] task_work_run+0x14d/0x240 [ 1185.116085][ T7889] get_signal+0x106f/0x2790 [ 1185.120622][ T7889] arch_do_signal_or_restart+0x90/0x7f0 [ 1185.126205][ T7889] exit_to_user_mode_prepare+0x121/0x240 [ 1185.131846][ T7889] syscall_exit_to_user_mode+0x1e/0x60 [ 1185.137293][ T7889] do_syscall_64+0x4d/0x110 [ 1185.141783][ T7889] entry_SYSCALL_64_after_hwframe+0x63/0x6b [ 1185.148085][ T7889] ------------[ cut here ]------------ WARNING: CPU: 1 PID: 7889 at lib/ref_tracker.c:255 ref_tracker_free+0x61a/0x810 lib/ref_tracker.c:255 Modules linked in: CPU: 1 PID: 7889 Comm: syz-executor.2 Not tainted 6.7.0-rc4-syzkaller-00162-g65c95f78917e #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 11/10/2023 RIP: 0010:ref_tracker_free+0x61a/0x810 lib/ref_tracker.c:255 Code: 00 44 8b 6b 18 31 ff 44 89 ee e8 21 62 f5 fc 45 85 ed 0f 85 a6 00 00 00 e8 a3 66 f5 fc 48 8b 34 24 48 89 ef e8 27 5f f1 05 90 <0f> 0b 90 bb ea ff ff ff e9 52 fd ff ff e8 84 66 f5 fc 4c 8d 6d 44 RSP: 0018:ffffc90004917850 EFLAGS: 00010202 RAX: 0000000000000201 RBX: ffff88802618f4c0 RCX: 0000000000000000 RDX: 0000000000000202 RSI: ffffffff8accb920 RDI: 0000000000000001 RBP: ffff8880269ea5b8 R08: 0000000000000001 R09: fffffbfff23e35f6 R10: ffffffff91f1afb7 R11: 0000000000000001 R12: 1ffff92000922f0c R13: 0000000005a2039b R14: ffff88802618f4d8 R15: 00000000ffffffff FS: 00007f0a720ef6c0(0000) GS:ffff8880b9900000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f43a819d988 CR3: 0000000076c64000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: netdev_tracker_free include/linux/netdevice.h:4127 [inline] netdev_put include/linux/netdevice.h:4144 [inline] netdev_put include/linux/netdevice.h:4140 [inline] rose_kill_by_device net/rose/af_rose.c:195 [inline] rose_device_event+0x25d/0x330 net/rose/af_rose.c:218 notifier_call_chain+0xb6/0x3b0 kernel/notifier.c:93 call_netdevice_notifiers_info+0xbe/0x130 net/core/dev.c:1967 call_netdevice_notifiers_extack net/core/dev.c:2005 [inline] call_netdevice_notifiers net/core/dev.c:2019 [inline] __dev_notify_flags+0x1f5/0x2e0 net/core/dev.c:8646 dev_change_flags+0x122/0x170 net/core/dev.c:8682 dev_ifsioc+0x9ad/0x1090 net/core/dev_ioctl.c:529 dev_ioctl+0x224/0x1090 net/core/dev_ioctl.c:786 sock_do_ioctl+0x198/0x270 net/socket.c:1234 sock_ioctl+0x22e/0x6b0 net/socket.c:1339 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:871 [inline] __se_sys_ioctl fs/ioctl.c:857 [inline] __x64_sys_ioctl+0x18f/0x210 fs/ioctl.c:857 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0x40/0x110 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b RIP: 0033:0x7f0a7147cba9 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 e1 20 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b0 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f0a720ef0c8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 00007f0a7159bf80 RCX: 00007f0a7147cba9 RDX: 0000000020000040 RSI: 0000000000008914 RDI: 0000000000000004 RBP: 00007f0a714c847a R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 000000000000000b R14: 00007f0a7159bf80 R15: 00007ffc8bb3a5f8 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Bernard Pidoux Signed-off-by: David S. Miller --- net/rose/af_rose.c | 41 +++++++++++++++++++++++++++++++++++------ 1 file changed, 35 insertions(+), 6 deletions(-) diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index ecb91ad4ce63..ef81d019b20f 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -182,21 +182,47 @@ void rose_kill_by_neigh(struct rose_neigh *neigh) */ static void rose_kill_by_device(struct net_device *dev) { - struct sock *s; + struct sock *sk, *array[16]; + struct rose_sock *rose; + bool rescan; + int i, cnt; +start: + rescan = false; + cnt = 0; spin_lock_bh(&rose_list_lock); - sk_for_each(s, &rose_list) { - struct rose_sock *rose = rose_sk(s); - + sk_for_each(sk, &rose_list) { + rose = rose_sk(sk); if (rose->device == dev) { - rose_disconnect(s, ENETUNREACH, ROSE_OUT_OF_ORDER, 0); + if (cnt == ARRAY_SIZE(array)) { + rescan = true; + break; + } + sock_hold(sk); + array[cnt++] = sk; + } + } + spin_unlock_bh(&rose_list_lock); + + for (i = 0; i < cnt; i++) { + sk = array[cnt]; + rose = rose_sk(sk); + lock_sock(sk); + spin_lock_bh(&rose_list_lock); + if (rose->device == dev) { + rose_disconnect(sk, ENETUNREACH, ROSE_OUT_OF_ORDER, 0); if (rose->neighbour) rose->neighbour->use--; netdev_put(rose->device, &rose->dev_tracker); rose->device = NULL; } + spin_unlock_bh(&rose_list_lock); + release_sock(sk); + sock_put(sk); + cond_resched(); } - spin_unlock_bh(&rose_list_lock); + if (rescan) + goto start; } /* @@ -656,7 +682,10 @@ static int rose_release(struct socket *sock) break; } + spin_lock_bh(&rose_list_lock); netdev_put(rose->device, &rose->dev_tracker); + rose->device = NULL; + spin_unlock_bh(&rose_list_lock); sock->sk = NULL; release_sock(sk); sock_put(sk); From f67eabffb57d0bee379994a18ec5f462b2cbdf86 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 23 Oct 2023 16:26:23 -0700 Subject: [PATCH 40/69] Bluetooth: Fix not notifying when connection encryption changes Some layers such as SMP depend on getting notified about encryption changes immediately as they only allow certain PDU to be transmitted over an encrypted link which may cause SMP implementation to reject valid PDUs received thus causing pairing to fail when it shouldn't. Fixes: 7aca0ac4792e ("Bluetooth: Wait for HCI_OP_WRITE_AUTH_PAYLOAD_TO to complete") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_event.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 0849e0dafa95..5b6fd625fc09 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -820,8 +820,6 @@ static u8 hci_cc_write_auth_payload_timeout(struct hci_dev *hdev, void *data, if (!rp->status) conn->auth_payload_timeout = get_unaligned_le16(sent + 2); - hci_encrypt_cfm(conn, 0); - unlock: hci_dev_unlock(hdev); @@ -3683,12 +3681,8 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, void *data, cp.handle = cpu_to_le16(conn->handle); cp.timeout = cpu_to_le16(hdev->auth_payload_timeout); if (hci_send_cmd(conn->hdev, HCI_OP_WRITE_AUTH_PAYLOAD_TO, - sizeof(cp), &cp)) { + sizeof(cp), &cp)) bt_dev_err(hdev, "write auth payload timeout failed"); - goto notify; - } - - goto unlock; } notify: From 769bf60e17ee1a56a81e7c031192c3928312c52e Mon Sep 17 00:00:00 2001 From: Ying Hsu Date: Fri, 10 Nov 2023 01:46:05 +0000 Subject: [PATCH 41/69] Bluetooth: Fix deadlock in vhci_send_frame syzbot found a potential circular dependency leading to a deadlock: -> #3 (&hdev->req_lock){+.+.}-{3:3}: __mutex_lock_common+0x1b6/0x1bc2 kernel/locking/mutex.c:599 __mutex_lock kernel/locking/mutex.c:732 [inline] mutex_lock_nested+0x17/0x1c kernel/locking/mutex.c:784 hci_dev_do_close+0x3f/0x9f net/bluetooth/hci_core.c:551 hci_rfkill_set_block+0x130/0x1ac net/bluetooth/hci_core.c:935 rfkill_set_block+0x1e6/0x3b8 net/rfkill/core.c:345 rfkill_fop_write+0x2d8/0x672 net/rfkill/core.c:1274 vfs_write+0x277/0xcf5 fs/read_write.c:594 ksys_write+0x19b/0x2bd fs/read_write.c:650 do_syscall_x64 arch/x86/entry/common.c:55 [inline] do_syscall_64+0x51/0xba arch/x86/entry/common.c:93 entry_SYSCALL_64_after_hwframe+0x61/0xcb -> #2 (rfkill_global_mutex){+.+.}-{3:3}: __mutex_lock_common+0x1b6/0x1bc2 kernel/locking/mutex.c:599 __mutex_lock kernel/locking/mutex.c:732 [inline] mutex_lock_nested+0x17/0x1c kernel/locking/mutex.c:784 rfkill_register+0x30/0x7e3 net/rfkill/core.c:1045 hci_register_dev+0x48f/0x96d net/bluetooth/hci_core.c:2622 __vhci_create_device drivers/bluetooth/hci_vhci.c:341 [inline] vhci_create_device+0x3ad/0x68f drivers/bluetooth/hci_vhci.c:374 vhci_get_user drivers/bluetooth/hci_vhci.c:431 [inline] vhci_write+0x37b/0x429 drivers/bluetooth/hci_vhci.c:511 call_write_iter include/linux/fs.h:2109 [inline] new_sync_write fs/read_write.c:509 [inline] vfs_write+0xaa8/0xcf5 fs/read_write.c:596 ksys_write+0x19b/0x2bd fs/read_write.c:650 do_syscall_x64 arch/x86/entry/common.c:55 [inline] do_syscall_64+0x51/0xba arch/x86/entry/common.c:93 entry_SYSCALL_64_after_hwframe+0x61/0xcb -> #1 (&data->open_mutex){+.+.}-{3:3}: __mutex_lock_common+0x1b6/0x1bc2 kernel/locking/mutex.c:599 __mutex_lock kernel/locking/mutex.c:732 [inline] mutex_lock_nested+0x17/0x1c kernel/locking/mutex.c:784 vhci_send_frame+0x68/0x9c drivers/bluetooth/hci_vhci.c:75 hci_send_frame+0x1cc/0x2ff net/bluetooth/hci_core.c:2989 hci_sched_acl_pkt net/bluetooth/hci_core.c:3498 [inline] hci_sched_acl net/bluetooth/hci_core.c:3583 [inline] hci_tx_work+0xb94/0x1a60 net/bluetooth/hci_core.c:3654 process_one_work+0x901/0xfb8 kernel/workqueue.c:2310 worker_thread+0xa67/0x1003 kernel/workqueue.c:2457 kthread+0x36a/0x430 kernel/kthread.c:319 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:298 -> #0 ((work_completion)(&hdev->tx_work)){+.+.}-{0:0}: check_prev_add kernel/locking/lockdep.c:3053 [inline] check_prevs_add kernel/locking/lockdep.c:3172 [inline] validate_chain kernel/locking/lockdep.c:3787 [inline] __lock_acquire+0x2d32/0x77fa kernel/locking/lockdep.c:5011 lock_acquire+0x273/0x4d5 kernel/locking/lockdep.c:5622 __flush_work+0xee/0x19f kernel/workqueue.c:3090 hci_dev_close_sync+0x32f/0x1113 net/bluetooth/hci_sync.c:4352 hci_dev_do_close+0x47/0x9f net/bluetooth/hci_core.c:553 hci_rfkill_set_block+0x130/0x1ac net/bluetooth/hci_core.c:935 rfkill_set_block+0x1e6/0x3b8 net/rfkill/core.c:345 rfkill_fop_write+0x2d8/0x672 net/rfkill/core.c:1274 vfs_write+0x277/0xcf5 fs/read_write.c:594 ksys_write+0x19b/0x2bd fs/read_write.c:650 do_syscall_x64 arch/x86/entry/common.c:55 [inline] do_syscall_64+0x51/0xba arch/x86/entry/common.c:93 entry_SYSCALL_64_after_hwframe+0x61/0xcb This change removes the need for acquiring the open_mutex in vhci_send_frame, thus eliminating the potential deadlock while maintaining the required packet ordering. Fixes: 92d4abd66f70 ("Bluetooth: vhci: Fix race when opening vhci device") Signed-off-by: Ying Hsu Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/hci_vhci.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/bluetooth/hci_vhci.c b/drivers/bluetooth/hci_vhci.c index f3892e9ce800..572d68d52965 100644 --- a/drivers/bluetooth/hci_vhci.c +++ b/drivers/bluetooth/hci_vhci.c @@ -11,6 +11,7 @@ #include #include +#include #include #include #include @@ -44,6 +45,7 @@ struct vhci_data { bool wakeup; __u16 msft_opcode; bool aosp_capable; + atomic_t initialized; }; static int vhci_open_dev(struct hci_dev *hdev) @@ -75,11 +77,10 @@ static int vhci_send_frame(struct hci_dev *hdev, struct sk_buff *skb) memcpy(skb_push(skb, 1), &hci_skb_pkt_type(skb), 1); - mutex_lock(&data->open_mutex); skb_queue_tail(&data->readq, skb); - mutex_unlock(&data->open_mutex); - wake_up_interruptible(&data->read_wait); + if (atomic_read(&data->initialized)) + wake_up_interruptible(&data->read_wait); return 0; } @@ -464,7 +465,8 @@ static int __vhci_create_device(struct vhci_data *data, __u8 opcode) skb_put_u8(skb, 0xff); skb_put_u8(skb, opcode); put_unaligned_le16(hdev->id, skb_put(skb, 2)); - skb_queue_tail(&data->readq, skb); + skb_queue_head(&data->readq, skb); + atomic_inc(&data->initialized); wake_up_interruptible(&data->read_wait); return 0; From 99e67d46e5ff3c7c901af6009edec72d3d363be8 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 20 Nov 2023 10:04:39 -0500 Subject: [PATCH 42/69] Bluetooth: hci_event: Fix not checking if HCI_OP_INQUIRY has been sent Before setting HCI_INQUIRY bit check if HCI_OP_INQUIRY was really sent otherwise the controller maybe be generating invalid events or, more likely, it is a result of fuzzing tools attempting to test the right behavior of the stack when unexpected events are generated. Cc: stable@vger.kernel.org Link: https://bugzilla.kernel.org/show_bug.cgi?id=218151 Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_event.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 5b6fd625fc09..a94decff233e 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -2302,7 +2302,8 @@ static void hci_cs_inquiry(struct hci_dev *hdev, __u8 status) return; } - set_bit(HCI_INQUIRY, &hdev->flags); + if (hci_sent_cmd_data(hdev, HCI_OP_INQUIRY)) + set_bit(HCI_INQUIRY, &hdev->flags); } static void hci_cs_create_conn(struct hci_dev *hdev, __u8 status) From a5812c68d849505ea657f653446512b85887f813 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 22 Nov 2023 23:17:44 +0100 Subject: [PATCH 43/69] Bluetooth: hci_event: shut up a false-positive warning Turning on -Wstringop-overflow globally exposed a misleading compiler warning in bluetooth: net/bluetooth/hci_event.c: In function 'hci_cc_read_class_of_dev': net/bluetooth/hci_event.c:524:9: error: 'memcpy' writing 3 bytes into a region of size 0 overflows the destination [-Werror=stringop-overflow=] 524 | memcpy(hdev->dev_class, rp->dev_class, 3); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The problem here is the check for hdev being NULL in bt_dev_dbg() that leads the compiler to conclude that hdev->dev_class might be an invalid pointer access. Add another explicit check for the same condition to make sure gcc sees this cannot happen. Fixes: a9de9248064b ("[Bluetooth] Switch from OGF+OCF to using only opcodes") Fixes: 1b56c90018f0 ("Makefile: Enable -Wstringop-overflow globally") Signed-off-by: Arnd Bergmann Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_event.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index a94decff233e..cc5fd290d529 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -516,6 +516,9 @@ static u8 hci_cc_read_class_of_dev(struct hci_dev *hdev, void *data, { struct hci_rp_read_class_of_dev *rp = data; + if (WARN_ON(!hdev)) + return HCI_ERROR_UNSPECIFIED; + bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); if (rp->status) From 50efc63d1a7a7b9a6ed21adae1b9a7123ec8abc0 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 8 Dec 2023 17:22:29 -0500 Subject: [PATCH 44/69] Bluetooth: hci_core: Fix hci_conn_hash_lookup_cis hci_conn_hash_lookup_cis shall always match the requested CIG and CIS ids even when they are unset as otherwise it result in not being able to bind/connect different sockets to the same address as that would result in having multiple sockets mapping to the same hci_conn which doesn't really work and prevents BAP audio configuration such as AC 6(i) when CIG and CIS are left unset. Fixes: c14516faede3 ("Bluetooth: hci_conn: Fix not matching by CIS ID") Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 20988623c5cc..fb5e3ef3ec2f 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1227,11 +1227,11 @@ static inline struct hci_conn *hci_conn_hash_lookup_cis(struct hci_dev *hdev, continue; /* Match CIG ID if set */ - if (cig != BT_ISO_QOS_CIG_UNSET && cig != c->iso_qos.ucast.cig) + if (cig != c->iso_qos.ucast.cig) continue; /* Match CIS ID if set */ - if (id != BT_ISO_QOS_CIS_UNSET && id != c->iso_qos.ucast.cis) + if (id != c->iso_qos.ucast.cis) continue; /* Match destination address if set */ From 78b99eb1faa7371bf9c534690f26a71b6996622d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Danis?= Date: Fri, 8 Dec 2023 18:41:50 +0100 Subject: [PATCH 45/69] Bluetooth: L2CAP: Send reject on command corrupted request MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit L2CAP/COS/CED/BI-02-C PTS test send a malformed L2CAP signaling packet with 2 commands in it (a connection request and an unknown command) and expect to get a connection response packet and a command reject packet. The second is currently not sent. Cc: stable@vger.kernel.org Signed-off-by: Frédéric Danis Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/l2cap_core.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 17ca13e8c044..baeebee41cd9 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -6492,6 +6492,14 @@ drop: kfree_skb(skb); } +static inline void l2cap_sig_send_rej(struct l2cap_conn *conn, u16 ident) +{ + struct l2cap_cmd_rej_unk rej; + + rej.reason = cpu_to_le16(L2CAP_REJ_NOT_UNDERSTOOD); + l2cap_send_cmd(conn, ident, L2CAP_COMMAND_REJ, sizeof(rej), &rej); +} + static inline void l2cap_sig_channel(struct l2cap_conn *conn, struct sk_buff *skb) { @@ -6517,23 +6525,24 @@ static inline void l2cap_sig_channel(struct l2cap_conn *conn, if (len > skb->len || !cmd->ident) { BT_DBG("corrupted command"); + l2cap_sig_send_rej(conn, cmd->ident); break; } err = l2cap_bredr_sig_cmd(conn, cmd, len, skb->data); if (err) { - struct l2cap_cmd_rej_unk rej; - BT_ERR("Wrong link type (%d)", err); - - rej.reason = cpu_to_le16(L2CAP_REJ_NOT_UNDERSTOOD); - l2cap_send_cmd(conn, cmd->ident, L2CAP_COMMAND_REJ, - sizeof(rej), &rej); + l2cap_sig_send_rej(conn, cmd->ident); } skb_pull(skb, len); } + if (skb->len > 0) { + BT_DBG("corrupted command"); + l2cap_sig_send_rej(conn, 0); + } + drop: kfree_skb(skb); } From 59b047bc98084f8af2c41483e4d68a5adf2fa7f7 Mon Sep 17 00:00:00 2001 From: Xiao Yao Date: Tue, 12 Dec 2023 00:27:18 +0800 Subject: [PATCH 46/69] Bluetooth: MGMT/SMP: Fix address type when using SMP over BREDR/LE If two Bluetooth devices both support BR/EDR and BLE, and also support Secure Connections, then they only need to pair once. The LTK generated during the LE pairing process may be converted into a BR/EDR link key for BR/EDR transport, and conversely, a link key generated during the BR/EDR SSP pairing process can be converted into an LTK for LE transport. Hence, the link type of the link key and LTK is not fixed, they can be either an LE LINK or an ACL LINK. Currently, in the mgmt_new_irk/ltk/crsk/link_key functions, the link type is fixed, which could lead to incorrect address types being reported to the application layer. Therefore, it is necessary to add link_type/addr_type to the smp_irk/ltk/crsk and link_key, to ensure the generation of the correct address type. SMP over BREDR: Before Fix: > ACL Data RX: Handle 11 flags 0x02 dlen 12 BR/EDR SMP: Identity Address Information (0x09) len 7 Address: F8:7D:76:F2:12:F3 (OUI F8-7D-76) @ MGMT Event: New Identity Resolving Key (0x0018) plen 30 Random address: 00:00:00:00:00:00 (Non-Resolvable) LE Address: F8:7D:76:F2:12:F3 (OUI F8-7D-76) @ MGMT Event: New Long Term Key (0x000a) plen 37 LE Address: F8:7D:76:F2:12:F3 (OUI F8-7D-76) Key type: Authenticated key from P-256 (0x03) After Fix: > ACL Data RX: Handle 11 flags 0x02 dlen 12 BR/EDR SMP: Identity Address Information (0x09) len 7 Address: F8:7D:76:F2:12:F3 (OUI F8-7D-76) @ MGMT Event: New Identity Resolving Key (0x0018) plen 30 Random address: 00:00:00:00:00:00 (Non-Resolvable) BR/EDR Address: F8:7D:76:F2:12:F3 (OUI F8-7D-76) @ MGMT Event: New Long Term Key (0x000a) plen 37 BR/EDR Address: F8:7D:76:F2:12:F3 (OUI F8-7D-76) Key type: Authenticated key from P-256 (0x03) SMP over LE: Before Fix: @ MGMT Event: New Identity Resolving Key (0x0018) plen 30 Random address: 5F:5C:07:37:47:D5 (Resolvable) LE Address: F8:7D:76:F2:12:F3 (OUI F8-7D-76) @ MGMT Event: New Long Term Key (0x000a) plen 37 LE Address: F8:7D:76:F2:12:F3 (OUI F8-7D-76) Key type: Authenticated key from P-256 (0x03) @ MGMT Event: New Link Key (0x0009) plen 26 BR/EDR Address: F8:7D:76:F2:12:F3 (OUI F8-7D-76) Key type: Authenticated Combination key from P-256 (0x08) After Fix: @ MGMT Event: New Identity Resolving Key (0x0018) plen 30 Random address: 5E:03:1C:00:38:21 (Resolvable) LE Address: F8:7D:76:F2:12:F3 (OUI F8-7D-76) @ MGMT Event: New Long Term Key (0x000a) plen 37 LE Address: F8:7D:76:F2:12:F3 (OUI F8-7D-76) Key type: Authenticated key from P-256 (0x03) @ MGMT Event: New Link Key (0x0009) plen 26 Store hint: Yes (0x01) LE Address: F8:7D:76:F2:12:F3 (OUI F8-7D-76) Key type: Authenticated Combination key from P-256 (0x08) Cc: stable@vger.kernel.org Signed-off-by: Xiao Yao Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 5 +++++ net/bluetooth/mgmt.c | 25 ++++++++++++++++++------- net/bluetooth/smp.c | 7 +++++++ 3 files changed, 30 insertions(+), 7 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index fb5e3ef3ec2f..a3a1ea2696a8 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -189,6 +189,7 @@ struct blocked_key { struct smp_csrk { bdaddr_t bdaddr; u8 bdaddr_type; + u8 link_type; u8 type; u8 val[16]; }; @@ -198,6 +199,7 @@ struct smp_ltk { struct rcu_head rcu; bdaddr_t bdaddr; u8 bdaddr_type; + u8 link_type; u8 authenticated; u8 type; u8 enc_size; @@ -212,6 +214,7 @@ struct smp_irk { bdaddr_t rpa; bdaddr_t bdaddr; u8 addr_type; + u8 link_type; u8 val[16]; }; @@ -219,6 +222,8 @@ struct link_key { struct list_head list; struct rcu_head rcu; bdaddr_t bdaddr; + u8 bdaddr_type; + u8 link_type; u8 type; u8 val[HCI_LINK_KEY_SIZE]; u8 pin_len; diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index ba2e00646e8e..9dd815b6603f 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2897,7 +2897,8 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data, for (i = 0; i < key_count; i++) { struct mgmt_link_key_info *key = &cp->keys[i]; - if (key->addr.type != BDADDR_BREDR || key->type > 0x08) + /* Considering SMP over BREDR/LE, there is no need to check addr_type */ + if (key->type > 0x08) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS, MGMT_STATUS_INVALID_PARAMS); @@ -7130,6 +7131,7 @@ static int load_irks(struct sock *sk, struct hci_dev *hdev, void *cp_data, for (i = 0; i < irk_count; i++) { struct mgmt_irk_info *irk = &cp->irks[i]; + u8 addr_type = le_addr_type(irk->addr.type); if (hci_is_blocked_key(hdev, HCI_BLOCKED_KEY_TYPE_IRK, @@ -7139,8 +7141,12 @@ static int load_irks(struct sock *sk, struct hci_dev *hdev, void *cp_data, continue; } + /* When using SMP over BR/EDR, the addr type should be set to BREDR */ + if (irk->addr.type == BDADDR_BREDR) + addr_type = BDADDR_BREDR; + hci_add_irk(hdev, &irk->addr.bdaddr, - le_addr_type(irk->addr.type), irk->val, + addr_type, irk->val, BDADDR_ANY); } @@ -7221,6 +7227,7 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev, for (i = 0; i < key_count; i++) { struct mgmt_ltk_info *key = &cp->keys[i]; u8 type, authenticated; + u8 addr_type = le_addr_type(key->addr.type); if (hci_is_blocked_key(hdev, HCI_BLOCKED_KEY_TYPE_LTK, @@ -7255,8 +7262,12 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev, continue; } + /* When using SMP over BR/EDR, the addr type should be set to BREDR */ + if (key->addr.type == BDADDR_BREDR) + addr_type = BDADDR_BREDR; + hci_add_ltk(hdev, &key->addr.bdaddr, - le_addr_type(key->addr.type), type, authenticated, + addr_type, type, authenticated, key->val, key->enc_size, key->ediv, key->rand); } @@ -9523,7 +9534,7 @@ void mgmt_new_link_key(struct hci_dev *hdev, struct link_key *key, ev.store_hint = persistent; bacpy(&ev.key.addr.bdaddr, &key->bdaddr); - ev.key.addr.type = BDADDR_BREDR; + ev.key.addr.type = link_to_bdaddr(key->link_type, key->bdaddr_type); ev.key.type = key->type; memcpy(ev.key.val, key->val, HCI_LINK_KEY_SIZE); ev.key.pin_len = key->pin_len; @@ -9574,7 +9585,7 @@ void mgmt_new_ltk(struct hci_dev *hdev, struct smp_ltk *key, bool persistent) ev.store_hint = persistent; bacpy(&ev.key.addr.bdaddr, &key->bdaddr); - ev.key.addr.type = link_to_bdaddr(LE_LINK, key->bdaddr_type); + ev.key.addr.type = link_to_bdaddr(key->link_type, key->bdaddr_type); ev.key.type = mgmt_ltk_type(key); ev.key.enc_size = key->enc_size; ev.key.ediv = key->ediv; @@ -9603,7 +9614,7 @@ void mgmt_new_irk(struct hci_dev *hdev, struct smp_irk *irk, bool persistent) bacpy(&ev.rpa, &irk->rpa); bacpy(&ev.irk.addr.bdaddr, &irk->bdaddr); - ev.irk.addr.type = link_to_bdaddr(LE_LINK, irk->addr_type); + ev.irk.addr.type = link_to_bdaddr(irk->link_type, irk->addr_type); memcpy(ev.irk.val, irk->val, sizeof(irk->val)); mgmt_event(MGMT_EV_NEW_IRK, hdev, &ev, sizeof(ev), NULL); @@ -9632,7 +9643,7 @@ void mgmt_new_csrk(struct hci_dev *hdev, struct smp_csrk *csrk, ev.store_hint = persistent; bacpy(&ev.key.addr.bdaddr, &csrk->bdaddr); - ev.key.addr.type = link_to_bdaddr(LE_LINK, csrk->bdaddr_type); + ev.key.addr.type = link_to_bdaddr(csrk->link_type, csrk->bdaddr_type); ev.key.type = csrk->type; memcpy(ev.key.val, csrk->val, sizeof(csrk->val)); diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 5f2f97de295e..1e7ea3a4b7ef 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -1059,6 +1059,7 @@ static void smp_notify_keys(struct l2cap_conn *conn) } if (smp->remote_irk) { + smp->remote_irk->link_type = hcon->type; mgmt_new_irk(hdev, smp->remote_irk, persistent); /* Now that user space can be considered to know the @@ -1078,24 +1079,28 @@ static void smp_notify_keys(struct l2cap_conn *conn) } if (smp->csrk) { + smp->csrk->link_type = hcon->type; smp->csrk->bdaddr_type = hcon->dst_type; bacpy(&smp->csrk->bdaddr, &hcon->dst); mgmt_new_csrk(hdev, smp->csrk, persistent); } if (smp->responder_csrk) { + smp->responder_csrk->link_type = hcon->type; smp->responder_csrk->bdaddr_type = hcon->dst_type; bacpy(&smp->responder_csrk->bdaddr, &hcon->dst); mgmt_new_csrk(hdev, smp->responder_csrk, persistent); } if (smp->ltk) { + smp->ltk->link_type = hcon->type; smp->ltk->bdaddr_type = hcon->dst_type; bacpy(&smp->ltk->bdaddr, &hcon->dst); mgmt_new_ltk(hdev, smp->ltk, persistent); } if (smp->responder_ltk) { + smp->responder_ltk->link_type = hcon->type; smp->responder_ltk->bdaddr_type = hcon->dst_type; bacpy(&smp->responder_ltk->bdaddr, &hcon->dst); mgmt_new_ltk(hdev, smp->responder_ltk, persistent); @@ -1115,6 +1120,8 @@ static void smp_notify_keys(struct l2cap_conn *conn) key = hci_add_link_key(hdev, smp->conn->hcon, &hcon->dst, smp->link_key, type, 0, &persistent); if (key) { + key->link_type = hcon->type; + key->bdaddr_type = hcon->dst_type; mgmt_new_link_key(hdev, key, persistent); /* Don't keep debug keys around if the relevant From 04a342cc49a8522e99c9b3346371c329d841dcd2 Mon Sep 17 00:00:00 2001 From: Alex Lu Date: Tue, 12 Dec 2023 10:30:34 +0800 Subject: [PATCH 47/69] Bluetooth: Add more enc key size check When we are slave role and receives l2cap conn req when encryption has started, we should check the enc key size to avoid KNOB attack or BLUFFS attack. From SIG recommendation, implementations are advised to reject service-level connections on an encrypted baseband link with key strengths below 7 octets. A simple and clear way to achieve this is to place the enc key size check in hci_cc_read_enc_key_size() The btmon log below shows the case that lacks enc key size check. > HCI Event: Connect Request (0x04) plen 10 Address: BB:22:33:44:55:99 (OUI BB-22-33) Class: 0x480104 Major class: Computer (desktop, notebook, PDA, organizers) Minor class: Desktop workstation Capturing (Scanner, Microphone) Telephony (Cordless telephony, Modem, Headset) Link type: ACL (0x01) < HCI Command: Accept Connection Request (0x01|0x0009) plen 7 Address: BB:22:33:44:55:99 (OUI BB-22-33) Role: Peripheral (0x01) > HCI Event: Command Status (0x0f) plen 4 Accept Connection Request (0x01|0x0009) ncmd 2 Status: Success (0x00) > HCI Event: Connect Complete (0x03) plen 11 Status: Success (0x00) Handle: 1 Address: BB:22:33:44:55:99 (OUI BB-22-33) Link type: ACL (0x01) Encryption: Disabled (0x00) ... > HCI Event: Encryption Change (0x08) plen 4 Status: Success (0x00) Handle: 1 Address: BB:22:33:44:55:99 (OUI BB-22-33) Encryption: Enabled with E0 (0x01) < HCI Command: Read Encryption Key Size (0x05|0x0008) plen 2 Handle: 1 Address: BB:22:33:44:55:99 (OUI BB-22-33) > HCI Event: Command Complete (0x0e) plen 7 Read Encryption Key Size (0x05|0x0008) ncmd 2 Status: Success (0x00) Handle: 1 Address: BB:22:33:44:55:99 (OUI BB-22-33) Key size: 6 // We should check the enc key size ... > ACL Data RX: Handle 1 flags 0x02 dlen 12 L2CAP: Connection Request (0x02) ident 3 len 4 PSM: 25 (0x0019) Source CID: 64 < ACL Data TX: Handle 1 flags 0x00 dlen 16 L2CAP: Connection Response (0x03) ident 3 len 8 Destination CID: 64 Source CID: 64 Result: Connection pending (0x0001) Status: Authorization pending (0x0002) > HCI Event: Number of Completed Packets (0x13) plen 5 Num handles: 1 Handle: 1 Address: BB:22:33:44:55:99 (OUI BB-22-33) Count: 1 #35: len 16 (25 Kb/s) Latency: 5 msec (2-7 msec ~4 msec) < ACL Data TX: Handle 1 flags 0x00 dlen 16 L2CAP: Connection Response (0x03) ident 3 len 8 Destination CID: 64 Source CID: 64 Result: Connection successful (0x0000) Status: No further information available (0x0000) Cc: stable@vger.kernel.org Signed-off-by: Alex Lu Signed-off-by: Max Chou Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_event.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index cc5fd290d529..ebf17b51072f 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -750,9 +750,23 @@ static u8 hci_cc_read_enc_key_size(struct hci_dev *hdev, void *data, } else { conn->enc_key_size = rp->key_size; status = 0; + + if (conn->enc_key_size < hdev->min_enc_key_size) { + /* As slave role, the conn->state has been set to + * BT_CONNECTED and l2cap conn req might not be received + * yet, at this moment the l2cap layer almost does + * nothing with the non-zero status. + * So we also clear encrypt related bits, and then the + * handler of l2cap conn req will get the right secure + * state at a later time. + */ + status = HCI_ERROR_AUTH_FAILURE; + clear_bit(HCI_CONN_ENCRYPT, &conn->flags); + clear_bit(HCI_CONN_AES_CCM, &conn->flags); + } } - hci_encrypt_cfm(conn, 0); + hci_encrypt_cfm(conn, status); done: hci_dev_unlock(hdev); From 2e07e8348ea454615e268222ae3fc240421be768 Mon Sep 17 00:00:00 2001 From: Hyunwoo Kim Date: Sat, 9 Dec 2023 05:55:18 -0500 Subject: [PATCH 48/69] Bluetooth: af_bluetooth: Fix Use-After-Free in bt_sock_recvmsg This can cause a race with bt_sock_ioctl() because bt_sock_recvmsg() gets the skb from sk->sk_receive_queue and then frees it without holding lock_sock. A use-after-free for a skb occurs with the following flow. ``` bt_sock_recvmsg() -> skb_recv_datagram() -> skb_free_datagram() bt_sock_ioctl() -> skb_peek() ``` Add lock_sock to bt_sock_recvmsg() to fix this issue. Cc: stable@vger.kernel.org Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Hyunwoo Kim Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/af_bluetooth.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 336a76165454..b93464ac3517 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -309,11 +309,14 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, if (flags & MSG_OOB) return -EOPNOTSUPP; + lock_sock(sk); + skb = skb_recv_datagram(sk, flags, &err); if (!skb) { if (sk->sk_shutdown & RCV_SHUTDOWN) - return 0; + err = 0; + release_sock(sk); return err; } @@ -343,6 +346,8 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, skb_free_datagram(sk, skb); + release_sock(sk); + if (flags & MSG_TRUNC) copied = skblen; From 23c93c3b6275a59f2a685f4a693944b53c31df4e Mon Sep 17 00:00:00 2001 From: Andy Gospodarek Date: Thu, 14 Dec 2023 13:31:38 -0800 Subject: [PATCH 49/69] bnxt_en: do not map packet buffers twice Remove double-mapping of DMA buffers as it can prevent page pool entries from being freed. Mapping is managed by page pool infrastructure and was previously managed by the driver in __bnxt_alloc_rx_page before allowing the page pool infrastructure to manage it. Fixes: 578fcfd26e2a ("bnxt_en: Let the page pool manage the DMA mapping") Reviewed-by: Somnath Kotur Signed-off-by: Andy Gospodarek Signed-off-by: Michael Chan Reviewed-by: David Wei Link: https://lore.kernel.org/r/20231214213138.98095-1-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c index 96f5ca778c67..8cb9a99154aa 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c @@ -59,7 +59,6 @@ struct bnxt_sw_tx_bd *bnxt_xmit_bd(struct bnxt *bp, for (i = 0; i < num_frags ; i++) { skb_frag_t *frag = &sinfo->frags[i]; struct bnxt_sw_tx_bd *frag_tx_buf; - struct pci_dev *pdev = bp->pdev; dma_addr_t frag_mapping; int frag_len; @@ -73,16 +72,10 @@ struct bnxt_sw_tx_bd *bnxt_xmit_bd(struct bnxt *bp, txbd = &txr->tx_desc_ring[TX_RING(prod)][TX_IDX(prod)]; frag_len = skb_frag_size(frag); - frag_mapping = skb_frag_dma_map(&pdev->dev, frag, 0, - frag_len, DMA_TO_DEVICE); - - if (unlikely(dma_mapping_error(&pdev->dev, frag_mapping))) - return NULL; - - dma_unmap_addr_set(frag_tx_buf, mapping, frag_mapping); - flags = frag_len << TX_BD_LEN_SHIFT; txbd->tx_bd_len_flags_type = cpu_to_le32(flags); + frag_mapping = page_pool_get_dma_addr(skb_frag_page(frag)) + + skb_frag_off(frag); txbd->tx_bd_haddr = cpu_to_le64(frag_mapping); len = frag_len; From 117211aa739a926e6555cfea883be84bee6f1695 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 16 Dec 2023 00:05:02 +0100 Subject: [PATCH 50/69] bpf: Add missing BPF_LINK_TYPE invocations Pengfei Xu reported [1] Syzkaller/KASAN issue found in bpf_link_show_fdinfo. The reason is missing BPF_LINK_TYPE invocation for uprobe multi link and for several other links, adding that. [1] https://lore.kernel.org/bpf/ZXptoKRSLspnk2ie@xpf.sh.intel.com/ Fixes: 89ae89f53d20 ("bpf: Add multi uprobe link") Fixes: e420bed02507 ("bpf: Add fd-based tcx multi-prog infra with link support") Fixes: 84601d6ee68a ("bpf: add bpf_link support for BPF_NETFILTER programs") Fixes: 35dfaad7188c ("netkit, bpf: Add bpf programmable net device") Reported-by: Pengfei Xu Signed-off-by: Jiri Olsa Signed-off-by: Andrii Nakryiko Tested-by: Pengfei Xu Acked-by: Hou Tao Link: https://lore.kernel.org/bpf/20231215230502.2769743-1-jolsa@kernel.org --- include/linux/bpf_types.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index fc0d6f32c687..94baced5a1ad 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -142,9 +142,13 @@ BPF_LINK_TYPE(BPF_LINK_TYPE_ITER, iter) #ifdef CONFIG_NET BPF_LINK_TYPE(BPF_LINK_TYPE_NETNS, netns) BPF_LINK_TYPE(BPF_LINK_TYPE_XDP, xdp) +BPF_LINK_TYPE(BPF_LINK_TYPE_NETFILTER, netfilter) +BPF_LINK_TYPE(BPF_LINK_TYPE_TCX, tcx) +BPF_LINK_TYPE(BPF_LINK_TYPE_NETKIT, netkit) #endif #ifdef CONFIG_PERF_EVENTS BPF_LINK_TYPE(BPF_LINK_TYPE_PERF_EVENT, perf) #endif BPF_LINK_TYPE(BPF_LINK_TYPE_KPROBE_MULTI, kprobe_multi) BPF_LINK_TYPE(BPF_LINK_TYPE_STRUCT_OPS, struct_ops) +BPF_LINK_TYPE(BPF_LINK_TYPE_UPROBE_MULTI, uprobe_multi) From b1dfc0f76231bbf395c59d20a2070684620d5d0f Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Tue, 12 Dec 2023 00:05:35 +0000 Subject: [PATCH 51/69] net: phy: skip LED triggers on PHYs on SFP modules Calling led_trigger_register() when attaching a PHY located on an SFP module potentially (and practically) leads into a deadlock. Fix this by not calling led_trigger_register() for PHYs localted on SFP modules as such modules actually never got any LEDs. ====================================================== WARNING: possible circular locking dependency detected 6.7.0-rc4-next-20231208+ #0 Tainted: G O ------------------------------------------------------ kworker/u8:2/43 is trying to acquire lock: ffffffc08108c4e8 (triggers_list_lock){++++}-{3:3}, at: led_trigger_register+0x4c/0x1a8 but task is already holding lock: ffffff80c5c6f318 (&sfp->sm_mutex){+.+.}-{3:3}, at: cleanup_module+0x2ba8/0x3120 [sfp] which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #3 (&sfp->sm_mutex){+.+.}-{3:3}: __mutex_lock+0x88/0x7a0 mutex_lock_nested+0x20/0x28 cleanup_module+0x2ae0/0x3120 [sfp] sfp_register_bus+0x5c/0x9c sfp_register_socket+0x48/0xd4 cleanup_module+0x271c/0x3120 [sfp] platform_probe+0x64/0xb8 really_probe+0x17c/0x3c0 __driver_probe_device+0x78/0x164 driver_probe_device+0x3c/0xd4 __driver_attach+0xec/0x1f0 bus_for_each_dev+0x60/0xa0 driver_attach+0x20/0x28 bus_add_driver+0x108/0x208 driver_register+0x5c/0x118 __platform_driver_register+0x24/0x2c init_module+0x28/0xa7c [sfp] do_one_initcall+0x70/0x2ec do_init_module+0x54/0x1e4 load_module+0x1b78/0x1c8c __do_sys_init_module+0x1bc/0x2cc __arm64_sys_init_module+0x18/0x20 invoke_syscall.constprop.0+0x4c/0xdc do_el0_svc+0x3c/0xbc el0_svc+0x34/0x80 el0t_64_sync_handler+0xf8/0x124 el0t_64_sync+0x150/0x154 -> #2 (rtnl_mutex){+.+.}-{3:3}: __mutex_lock+0x88/0x7a0 mutex_lock_nested+0x20/0x28 rtnl_lock+0x18/0x20 set_device_name+0x30/0x130 netdev_trig_activate+0x13c/0x1ac led_trigger_set+0x118/0x234 led_trigger_write+0x104/0x17c sysfs_kf_bin_write+0x64/0x80 kernfs_fop_write_iter+0x128/0x1b4 vfs_write+0x178/0x2a4 ksys_write+0x58/0xd4 __arm64_sys_write+0x18/0x20 invoke_syscall.constprop.0+0x4c/0xdc do_el0_svc+0x3c/0xbc el0_svc+0x34/0x80 el0t_64_sync_handler+0xf8/0x124 el0t_64_sync+0x150/0x154 -> #1 (&led_cdev->trigger_lock){++++}-{3:3}: down_write+0x4c/0x13c led_trigger_write+0xf8/0x17c sysfs_kf_bin_write+0x64/0x80 kernfs_fop_write_iter+0x128/0x1b4 vfs_write+0x178/0x2a4 ksys_write+0x58/0xd4 __arm64_sys_write+0x18/0x20 invoke_syscall.constprop.0+0x4c/0xdc do_el0_svc+0x3c/0xbc el0_svc+0x34/0x80 el0t_64_sync_handler+0xf8/0x124 el0t_64_sync+0x150/0x154 -> #0 (triggers_list_lock){++++}-{3:3}: __lock_acquire+0x12a0/0x2014 lock_acquire+0x100/0x2ac down_write+0x4c/0x13c led_trigger_register+0x4c/0x1a8 phy_led_triggers_register+0x9c/0x214 phy_attach_direct+0x154/0x36c phylink_attach_phy+0x30/0x60 phylink_sfp_connect_phy+0x140/0x510 sfp_add_phy+0x34/0x50 init_module+0x15c/0xa7c [sfp] cleanup_module+0x1d94/0x3120 [sfp] cleanup_module+0x2bb4/0x3120 [sfp] process_one_work+0x1f8/0x4ec worker_thread+0x1e8/0x3d8 kthread+0x104/0x110 ret_from_fork+0x10/0x20 other info that might help us debug this: Chain exists of: triggers_list_lock --> rtnl_mutex --> &sfp->sm_mutex Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&sfp->sm_mutex); lock(rtnl_mutex); lock(&sfp->sm_mutex); lock(triggers_list_lock); *** DEADLOCK *** 4 locks held by kworker/u8:2/43: #0: ffffff80c000f938 ((wq_completion)events_power_efficient){+.+.}-{0:0}, at: process_one_work+0x150/0x4ec #1: ffffffc08214bde8 ((work_completion)(&(&sfp->timeout)->work)){+.+.}-{0:0}, at: process_one_work+0x150/0x4ec #2: ffffffc0810902f8 (rtnl_mutex){+.+.}-{3:3}, at: rtnl_lock+0x18/0x20 #3: ffffff80c5c6f318 (&sfp->sm_mutex){+.+.}-{3:3}, at: cleanup_module+0x2ba8/0x3120 [sfp] stack backtrace: CPU: 0 PID: 43 Comm: kworker/u8:2 Tainted: G O 6.7.0-rc4-next-20231208+ #0 Hardware name: Bananapi BPI-R4 (DT) Workqueue: events_power_efficient cleanup_module [sfp] Call trace: dump_backtrace+0xa8/0x10c show_stack+0x14/0x1c dump_stack_lvl+0x5c/0xa0 dump_stack+0x14/0x1c print_circular_bug+0x328/0x430 check_noncircular+0x124/0x134 __lock_acquire+0x12a0/0x2014 lock_acquire+0x100/0x2ac down_write+0x4c/0x13c led_trigger_register+0x4c/0x1a8 phy_led_triggers_register+0x9c/0x214 phy_attach_direct+0x154/0x36c phylink_attach_phy+0x30/0x60 phylink_sfp_connect_phy+0x140/0x510 sfp_add_phy+0x34/0x50 init_module+0x15c/0xa7c [sfp] cleanup_module+0x1d94/0x3120 [sfp] cleanup_module+0x2bb4/0x3120 [sfp] process_one_work+0x1f8/0x4ec worker_thread+0x1e8/0x3d8 kthread+0x104/0x110 ret_from_fork+0x10/0x20 Signed-off-by: Daniel Golle Fixes: 01e5b728e9e4 ("net: phy: Add a binding for PHY LEDs") Link: https://lore.kernel.org/r/102a9dce38bdf00215735d04cd4704458273ad9c.1702339354.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/phy/phy_device.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 2ce74593d6e4..a42df2c1bd04 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1548,7 +1548,8 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev, goto error; phy_resume(phydev); - phy_led_triggers_register(phydev); + if (!phydev->is_on_sfp_module) + phy_led_triggers_register(phydev); /** * If the external phy used by current mac interface is managed by @@ -1817,7 +1818,8 @@ void phy_detach(struct phy_device *phydev) } phydev->phylink = NULL; - phy_led_triggers_unregister(phydev); + if (!phydev->is_on_sfp_module) + phy_led_triggers_unregister(phydev); if (phydev->mdio.dev.driver) module_put(phydev->mdio.dev.driver->owner); From c8f021eec5817601dbd25ab7e3ad5c720965c688 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 15 Dec 2023 17:04:24 +0100 Subject: [PATCH 52/69] selftests: mptcp: join: fix subflow_send_ack lookup MPC backups tests will skip unexpected sometimes (For example, when compiling kernel with an older version of gcc, such as gcc-8), since static functions like mptcp_subflow_send_ack also be listed in /proc/kallsyms, with a 't' in front of it, not 'T' ('T' is for a global function): > grep "mptcp_subflow_send_ack" /proc/kallsyms 0000000000000000 T __pfx___mptcp_subflow_send_ack 0000000000000000 T __mptcp_subflow_send_ack 0000000000000000 t __pfx_mptcp_subflow_send_ack 0000000000000000 t mptcp_subflow_send_ack In this case, mptcp_lib_kallsyms_doesnt_have "mptcp_subflow_send_ack$" will be false, MPC backups tests will skip. This is not what we expected. The correct logic here should be: if mptcp_subflow_send_ack is not a global function in /proc/kallsyms, do these MPC backups tests. So a 'T' must be added in front of mptcp_subflow_send_ack. Fixes: 632978f0a961 ("selftests: mptcp: join: skip MPC backups tests if not supported") Cc: stable@vger.kernel.org Signed-off-by: Geliang Tang Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts Signed-off-by: David S. Miller --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 3c94f2f194d6..24a57b3ae215 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -2776,7 +2776,7 @@ backup_tests() fi if reset "mpc backup" && - continue_if mptcp_lib_kallsyms_doesnt_have "mptcp_subflow_send_ack$"; then + continue_if mptcp_lib_kallsyms_doesnt_have "T mptcp_subflow_send_ack$"; then pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,backup speed=slow \ run_tests $ns1 $ns2 10.0.1.1 @@ -2785,7 +2785,7 @@ backup_tests() fi if reset "mpc backup both sides" && - continue_if mptcp_lib_kallsyms_doesnt_have "mptcp_subflow_send_ack$"; then + continue_if mptcp_lib_kallsyms_doesnt_have "T mptcp_subflow_send_ack$"; then pm_nl_add_endpoint $ns1 10.0.1.1 flags subflow,backup pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,backup speed=slow \ @@ -2795,7 +2795,7 @@ backup_tests() fi if reset "mpc switch to backup" && - continue_if mptcp_lib_kallsyms_doesnt_have "mptcp_subflow_send_ack$"; then + continue_if mptcp_lib_kallsyms_doesnt_have "T mptcp_subflow_send_ack$"; then pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow sflags=backup speed=slow \ run_tests $ns1 $ns2 10.0.1.1 @@ -2804,7 +2804,7 @@ backup_tests() fi if reset "mpc switch to backup both sides" && - continue_if mptcp_lib_kallsyms_doesnt_have "mptcp_subflow_send_ack$"; then + continue_if mptcp_lib_kallsyms_doesnt_have "T mptcp_subflow_send_ack$"; then pm_nl_add_endpoint $ns1 10.0.1.1 flags subflow pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow sflags=backup speed=slow \ From 4fd19a30701659af5839b7bd19d1f05f05933ebe Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 15 Dec 2023 17:04:25 +0100 Subject: [PATCH 53/69] mptcp: fix inconsistent state on fastopen race The netlink PM can race with fastopen self-connect attempts, shutting down the first subflow via: MPTCP_PM_CMD_DEL_ADDR -> mptcp_nl_remove_id_zero_address -> mptcp_pm_nl_rm_subflow_received -> mptcp_close_ssk and transitioning such subflow to FIN_WAIT1 status before the syn-ack packet is processed. The MPTCP code does not react to such state change, leaving the connection in not-fallback status and the subflow handshake uncompleted, triggering the following splat: WARNING: CPU: 0 PID: 10630 at net/mptcp/subflow.c:1405 subflow_data_ready+0x39f/0x690 net/mptcp/subflow.c:1405 Modules linked in: CPU: 0 PID: 10630 Comm: kworker/u4:11 Not tainted 6.6.0-syzkaller-14500-g1c41041124bd #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/09/2023 Workqueue: bat_events batadv_nc_worker RIP: 0010:subflow_data_ready+0x39f/0x690 net/mptcp/subflow.c:1405 Code: 18 89 ee e8 e3 d2 21 f7 40 84 ed 75 1f e8 a9 d7 21 f7 44 89 fe bf 07 00 00 00 e8 0c d3 21 f7 41 83 ff 07 74 07 e8 91 d7 21 f7 <0f> 0b e8 8a d7 21 f7 48 89 df e8 d2 b2 ff ff 31 ff 89 c5 89 c6 e8 RSP: 0018:ffffc90000007448 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff888031efc700 RCX: ffffffff8a65baf4 RDX: ffff888043222140 RSI: ffffffff8a65baff RDI: 0000000000000005 RBP: 0000000000000000 R08: 0000000000000005 R09: 0000000000000007 R10: 000000000000000b R11: 0000000000000000 R12: 1ffff92000000e89 R13: ffff88807a534d80 R14: ffff888021c11a00 R15: 000000000000000b FS: 0000000000000000(0000) GS:ffff8880b9800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fa19a0ffc81 CR3: 000000007a2db000 CR4: 00000000003506f0 DR0: 000000000000d8dd DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Call Trace: tcp_data_ready+0x14c/0x5b0 net/ipv4/tcp_input.c:5128 tcp_data_queue+0x19c3/0x5190 net/ipv4/tcp_input.c:5208 tcp_rcv_state_process+0x11ef/0x4e10 net/ipv4/tcp_input.c:6844 tcp_v4_do_rcv+0x369/0xa10 net/ipv4/tcp_ipv4.c:1929 tcp_v4_rcv+0x3888/0x3b30 net/ipv4/tcp_ipv4.c:2329 ip_protocol_deliver_rcu+0x9f/0x480 net/ipv4/ip_input.c:205 ip_local_deliver_finish+0x2e4/0x510 net/ipv4/ip_input.c:233 NF_HOOK include/linux/netfilter.h:314 [inline] NF_HOOK include/linux/netfilter.h:308 [inline] ip_local_deliver+0x1b6/0x550 net/ipv4/ip_input.c:254 dst_input include/net/dst.h:461 [inline] ip_rcv_finish+0x1c4/0x2e0 net/ipv4/ip_input.c:449 NF_HOOK include/linux/netfilter.h:314 [inline] NF_HOOK include/linux/netfilter.h:308 [inline] ip_rcv+0xce/0x440 net/ipv4/ip_input.c:569 __netif_receive_skb_one_core+0x115/0x180 net/core/dev.c:5527 __netif_receive_skb+0x1f/0x1b0 net/core/dev.c:5641 process_backlog+0x101/0x6b0 net/core/dev.c:5969 __napi_poll.constprop.0+0xb4/0x540 net/core/dev.c:6531 napi_poll net/core/dev.c:6600 [inline] net_rx_action+0x956/0xe90 net/core/dev.c:6733 __do_softirq+0x21a/0x968 kernel/softirq.c:553 do_softirq kernel/softirq.c:454 [inline] do_softirq+0xaa/0xe0 kernel/softirq.c:441 __local_bh_enable_ip+0xf8/0x120 kernel/softirq.c:381 spin_unlock_bh include/linux/spinlock.h:396 [inline] batadv_nc_purge_paths+0x1ce/0x3c0 net/batman-adv/network-coding.c:471 batadv_nc_worker+0x9b1/0x10e0 net/batman-adv/network-coding.c:722 process_one_work+0x884/0x15c0 kernel/workqueue.c:2630 process_scheduled_works kernel/workqueue.c:2703 [inline] worker_thread+0x8b9/0x1290 kernel/workqueue.c:2784 kthread+0x33c/0x440 kernel/kthread.c:388 ret_from_fork+0x45/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:242 To address the issue, catch the racing subflow state change and use it to cause the MPTCP fallback. Such fallback is also used to cause the first subflow state propagation to the msk socket via mptcp_set_connected(). After this change, the first subflow can additionally propagate the TCP_FIN_WAIT1 state, so rename the helper accordingly. Finally, if the state propagation is delayed to the msk release callback, the first subflow can change to a different state in between. Cache the relevant target state in a new msk-level field and use such value to update the msk state at release time. Fixes: 1e777f39b4d7 ("mptcp: add MSG_FASTOPEN sendmsg flag support") Cc: stable@vger.kernel.org Reported-by: Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/458 Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 6 +++--- net/mptcp/protocol.h | 9 ++++++--- net/mptcp/subflow.c | 28 +++++++++++++++++----------- 3 files changed, 26 insertions(+), 17 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index bc81ea53a049..5cd5c3f535a8 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -3402,12 +3402,12 @@ static void mptcp_release_cb(struct sock *sk) if (__test_and_clear_bit(MPTCP_CLEAN_UNA, &msk->cb_flags)) __mptcp_clean_una_wakeup(sk); if (unlikely(msk->cb_flags)) { - /* be sure to set the current sk state before taking actions + /* be sure to sync the msk state before taking actions * depending on sk_state (MPTCP_ERROR_REPORT) * On sk release avoid actions depending on the first subflow */ - if (__test_and_clear_bit(MPTCP_CONNECTED, &msk->cb_flags) && msk->first) - __mptcp_set_connected(sk); + if (__test_and_clear_bit(MPTCP_SYNC_STATE, &msk->cb_flags) && msk->first) + __mptcp_sync_state(sk, msk->pending_state); if (__test_and_clear_bit(MPTCP_ERROR_REPORT, &msk->cb_flags)) __mptcp_error_report(sk); if (__test_and_clear_bit(MPTCP_SYNC_SNDBUF, &msk->cb_flags)) diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index fe6f2d399ee8..aa1a93fe40ff 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -124,7 +124,7 @@ #define MPTCP_ERROR_REPORT 3 #define MPTCP_RETRANSMIT 4 #define MPTCP_FLUSH_JOIN_LIST 5 -#define MPTCP_CONNECTED 6 +#define MPTCP_SYNC_STATE 6 #define MPTCP_SYNC_SNDBUF 7 struct mptcp_skb_cb { @@ -296,6 +296,9 @@ struct mptcp_sock { bool use_64bit_ack; /* Set when we received a 64-bit DSN */ bool csum_enabled; bool allow_infinite_fallback; + u8 pending_state; /* A subflow asked to set this sk_state, + * protected by the msk data lock + */ u8 mpc_endpoint_id; u8 recvmsg_inq:1, cork:1, @@ -728,7 +731,7 @@ void mptcp_get_options(const struct sk_buff *skb, struct mptcp_options_received *mp_opt); void mptcp_finish_connect(struct sock *sk); -void __mptcp_set_connected(struct sock *sk); +void __mptcp_sync_state(struct sock *sk, int state); void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout); static inline void mptcp_stop_tout_timer(struct sock *sk) @@ -1115,7 +1118,7 @@ static inline bool subflow_simultaneous_connect(struct sock *sk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); - return sk->sk_state == TCP_ESTABLISHED && + return (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_FIN_WAIT1) && is_active_ssk(subflow) && !subflow->conn_finished; } diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index a4f3c27f0309..6d7684c35e93 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -419,22 +419,28 @@ static bool subflow_use_different_dport(struct mptcp_sock *msk, const struct soc return inet_sk(sk)->inet_dport != inet_sk((struct sock *)msk)->inet_dport; } -void __mptcp_set_connected(struct sock *sk) +void __mptcp_sync_state(struct sock *sk, int state) { - __mptcp_propagate_sndbuf(sk, mptcp_sk(sk)->first); + struct mptcp_sock *msk = mptcp_sk(sk); + + __mptcp_propagate_sndbuf(sk, msk->first); if (sk->sk_state == TCP_SYN_SENT) { - inet_sk_state_store(sk, TCP_ESTABLISHED); + inet_sk_state_store(sk, state); sk->sk_state_change(sk); } } -static void mptcp_set_connected(struct sock *sk) +static void mptcp_propagate_state(struct sock *sk, struct sock *ssk) { + struct mptcp_sock *msk = mptcp_sk(sk); + mptcp_data_lock(sk); - if (!sock_owned_by_user(sk)) - __mptcp_set_connected(sk); - else - __set_bit(MPTCP_CONNECTED, &mptcp_sk(sk)->cb_flags); + if (!sock_owned_by_user(sk)) { + __mptcp_sync_state(sk, ssk->sk_state); + } else { + msk->pending_state = ssk->sk_state; + __set_bit(MPTCP_SYNC_STATE, &msk->cb_flags); + } mptcp_data_unlock(sk); } @@ -496,7 +502,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) subflow_set_remote_key(msk, subflow, &mp_opt); MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVEACK); mptcp_finish_connect(sk); - mptcp_set_connected(parent); + mptcp_propagate_state(parent, sk); } else if (subflow->request_join) { u8 hmac[SHA256_DIGEST_SIZE]; @@ -540,7 +546,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) } else if (mptcp_check_fallback(sk)) { fallback: mptcp_rcv_space_init(msk, sk); - mptcp_set_connected(parent); + mptcp_propagate_state(parent, sk); } return; @@ -1740,7 +1746,7 @@ static void subflow_state_change(struct sock *sk) mptcp_rcv_space_init(msk, sk); pr_fallback(msk); subflow->conn_finished = 1; - mptcp_set_connected(parent); + mptcp_propagate_state(parent, sk); } /* as recvmsg() does not acquire the subflow socket for ssk selection From a8f570b247972775f710375125ebabfc47b1e518 Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Fri, 15 Dec 2023 17:04:26 +0100 Subject: [PATCH 54/69] mptcp: fill in missing MODULE_DESCRIPTION() W=1 builds warn on missing MODULE_DESCRIPTION, add them here in MPTCP. Only two were missing: two modules with different KUnit tests for MPTCP. Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts Signed-off-by: David S. Miller --- net/mptcp/crypto_test.c | 1 + net/mptcp/token_test.c | 1 + 2 files changed, 2 insertions(+) diff --git a/net/mptcp/crypto_test.c b/net/mptcp/crypto_test.c index 017248dea038..220414e5c850 100644 --- a/net/mptcp/crypto_test.c +++ b/net/mptcp/crypto_test.c @@ -70,3 +70,4 @@ static struct kunit_suite mptcp_crypto_suite = { kunit_test_suite(mptcp_crypto_suite); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("KUnit tests for MPTCP Crypto"); diff --git a/net/mptcp/token_test.c b/net/mptcp/token_test.c index 0758865ab658..bfff53e668da 100644 --- a/net/mptcp/token_test.c +++ b/net/mptcp/token_test.c @@ -143,3 +143,4 @@ static struct kunit_suite mptcp_token_suite = { kunit_test_suite(mptcp_token_suite); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("KUnit tests for MPTCP Token"); From 356c71c46169d5f3ff7f9ae939d73aceb3b2e514 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 15 Dec 2023 17:04:27 +0100 Subject: [PATCH 55/69] mailmap: add entries for Geliang Tang Map Geliang's old mail addresses to his @linux.dev one. Suggested-by: Mat Martineau Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts Signed-off-by: Matthieu Baerts Signed-off-by: David S. Miller --- .mailmap | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.mailmap b/.mailmap index 3ac1c12545f2..68e72a6017a0 100644 --- a/.mailmap +++ b/.mailmap @@ -191,6 +191,10 @@ Gao Xiang Gao Xiang Gao Xiang Gao Xiang +Geliang Tang +Geliang Tang +Geliang Tang +Geliang Tang Georgi Djakov Gerald Schaefer Gerald Schaefer From 7d881346121a97756f34e00e6296a5d63f001f7f Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Wed, 6 Dec 2023 12:19:05 -0800 Subject: [PATCH 56/69] ice: stop trashing VF VSI aggregator node ID information When creating new VSIs, they are assigned into an aggregator node in the scheduler tree. Information about which aggregator node a VSI is assigned into is maintained by the vsi->agg_node structure. In ice_vsi_decfg(), this information is being destroyed, by overwriting the valid flag and the agg_id field to zero. For VF VSIs, this breaks the aggregator node configuration replay, which depends on this information. This results in VFs being inserted into the default aggregator node. The resulting configuration will have unexpected Tx bandwidth sharing behavior. This was broken by commit 6624e780a577 ("ice: split ice_vsi_setup into smaller functions"), which added the block to reset the agg_node data. The vsi->agg_node structure is not managed by the scheduler code, but is instead a wrapper around an aggregator node ID that is tracked at the VSI layer. Its been around for a long time, and its primary purpose was for handling VFs. The SR-IOV VF reset flow does not make use of the standard VSI rebuild/replay logic, and uses vsi->agg_node as part of its handling to rebuild the aggregator node configuration. The logic for aggregator nodes stretches back to early ice driver code from commit b126bd6bcd67 ("ice: create scheduler aggregator node config and move VSIs") The logic in ice_vsi_decfg() which trashes the ice_agg_node data is clearly wrong. It destroys information that is necessary for handling VF reset,. It is also not the correct way to actually remove a VSI from an aggregator node. For that, we need to implement logic in the scheduler code. Further, non-VF VSIs properly replay their aggregator configuration using existing scheduler replay logic. To fix the VF replay logic, remove this broken aggregator node cleanup logic. This is the simplest way to immediately fix this. This ensures that VFs will have proper aggregate configuration after a reset. This is especially important since VFs often perform resets as part of their reconfiguration flows. Without fixing this, VFs will be placed in the default aggregator node and Tx bandwidth will not be shared in the expected and configured manner. Fixes: 6624e780a577 ("ice: split ice_vsi_setup into smaller functions") Signed-off-by: Jacob Keller Reviewed-by: Przemek Kitszel Reviewed-by: Simon Horman Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_lib.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 4b1e56396293..de7ba87af45d 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -2620,10 +2620,6 @@ void ice_vsi_decfg(struct ice_vsi *vsi) if (vsi->type == ICE_VSI_VF && vsi->agg_node && vsi->agg_node->valid) vsi->agg_node->num_vsis--; - if (vsi->agg_node) { - vsi->agg_node->valid = false; - vsi->agg_node->agg_id = 0; - } } /** From 4d50fcdc2476eef94c14c6761073af5667bb43b6 Mon Sep 17 00:00:00 2001 From: Dave Ertman Date: Mon, 11 Dec 2023 13:19:28 -0800 Subject: [PATCH 57/69] ice: alter feature support check for SRIOV and LAG Previously, the ice driver had support for using a handler for bonding netdev events to ensure that conflicting features were not allowed to be activated at the same time. While this was still in place, additional support was added to specifically support SRIOV and LAG together. These both utilized the netdev event handler, but the SRIOV and LAG feature was behind a capabilities feature check to make sure the current NVM has support. The exclusion part of the event handler should be removed since there are users who have custom made solutions that depend on the non-exclusion of features. Wrap the creation/registration and cleanup of the event handler and associated structs in the probe flow with a feature check so that the only systems that support the full implementation of LAG features will initialize support. This will leave other systems unhindered with functionality as it existed before any LAG code was added. Fixes: bb52f42acef6 ("ice: Add driver support for firmware changes for LAG") Reviewed-by: Jesse Brandeburg Signed-off-by: Dave Ertman Reviewed-by: Simon Horman Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_lag.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_lag.c b/drivers/net/ethernet/intel/ice/ice_lag.c index 280994ee5933..b47cd43ae871 100644 --- a/drivers/net/ethernet/intel/ice/ice_lag.c +++ b/drivers/net/ethernet/intel/ice/ice_lag.c @@ -1981,6 +1981,8 @@ int ice_init_lag(struct ice_pf *pf) int n, err; ice_lag_init_feature_support_flag(pf); + if (!ice_is_feature_supported(pf, ICE_F_SRIOV_LAG)) + return 0; pf->lag = kzalloc(sizeof(*lag), GFP_KERNEL); if (!pf->lag) From f5728a418945ba53e2fdf38a6e5c5a2670965e85 Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Tue, 12 Dec 2023 10:29:01 +0100 Subject: [PATCH 58/69] ice: Fix PF with enabled XDP going no-carrier after reset Commit 6624e780a577fc596788 ("ice: split ice_vsi_setup into smaller functions") has refactored a bunch of code involved in PFR. In this process, TC queue number adjustment for XDP was lost. Bring it back. Lack of such adjustment causes interface to go into no-carrier after a reset, if XDP program is attached, with the following message: ice 0000:b1:00.0: Failed to set LAN Tx queue context, error: -22 ice 0000:b1:00.0 ens801f0np0: Failed to open VSI 0x0006 on switch 0x0001 ice 0000:b1:00.0: enable VSI failed, err -22, VSI index 0, type ICE_VSI_PF ice 0000:b1:00.0: PF VSI rebuild failed: -22 ice 0000:b1:00.0: Rebuild failed, unload and reload driver Fixes: 6624e780a577 ("ice: split ice_vsi_setup into smaller functions") Reviewed-by: Przemek Kitszel Signed-off-by: Larysa Zaremba Reviewed-by: Simon Horman Tested-by: Chandan Kumar Rout (A Contingent Worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_lib.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index de7ba87af45d..1bad6e17f9be 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -2371,6 +2371,9 @@ static int ice_vsi_cfg_tc_lan(struct ice_pf *pf, struct ice_vsi *vsi) } else { max_txqs[i] = vsi->alloc_txq; } + + if (vsi->type == ICE_VSI_PF) + max_txqs[i] += vsi->num_xdp_txq; } dev_dbg(dev, "vsi->tc_cfg.ena_tc = %d\n", vsi->tc_cfg.ena_tc); From 3dc5d44545453de1de9c53cc529cc960a85933da Mon Sep 17 00:00:00 2001 From: Ronald Wahl Date: Thu, 14 Dec 2023 19:11:12 +0100 Subject: [PATCH 59/69] net: ks8851: Fix TX stall caused by TX buffer overrun There is a bug in the ks8851 Ethernet driver that more data is written to the hardware TX buffer than actually available. This is caused by wrong accounting of the free TX buffer space. The driver maintains a tx_space variable that represents the TX buffer space that is deemed to be free. The ks8851_start_xmit_spi() function adds an SKB to a queue if tx_space is large enough and reduces tx_space by the amount of buffer space it will later need in the TX buffer and then schedules a work item. If there is not enough space then the TX queue is stopped. The worker function ks8851_tx_work() dequeues all the SKBs and writes the data into the hardware TX buffer. The last packet will trigger an interrupt after it was send. Here it is assumed that all data fits into the TX buffer. In the interrupt routine (which runs asynchronously because it is a threaded interrupt) tx_space is updated with the current value from the hardware. Also the TX queue is woken up again. Now it could happen that after data was sent to the hardware and before handling the TX interrupt new data is queued in ks8851_start_xmit_spi() when the TX buffer space had still some space left. When the interrupt is actually handled tx_space is updated from the hardware but now we already have new SKBs queued that have not been written to the hardware TX buffer yet. Since tx_space has been overwritten by the value from the hardware the space is not accounted for. Now we have more data queued then buffer space available in the hardware and ks8851_tx_work() will potentially overrun the hardware TX buffer. In many cases it will still work because often the buffer is written out fast enough so that no overrun occurs but for example if the peer throttles us via flow control then an overrun may happen. This can be fixed in different ways. The most simple way would be to set tx_space to 0 before writing data to the hardware TX buffer preventing the queuing of more SKBs until the TX interrupt has been handled. I have chosen a slightly more efficient (and still rather simple) way and track the amount of data that is already queued and not yet written to the hardware. When new SKBs are to be queued the already queued amount of data is honoured when checking free TX buffer space. I tested this with a setup of two linked KS8851 running iperf3 between the two in bidirectional mode. Before the fix I got a stall after some minutes. With the fix I saw now issues anymore after hours. Fixes: 3ba81f3ece3c ("net: Micrel KS8851 SPI network driver") Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: Ben Dooks Cc: Tristram Ha Cc: netdev@vger.kernel.org Cc: stable@vger.kernel.org # 5.10+ Signed-off-by: Ronald Wahl Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20231214181112.76052-1-rwahl@gmx.de Signed-off-by: Paolo Abeni --- drivers/net/ethernet/micrel/ks8851.h | 3 ++ drivers/net/ethernet/micrel/ks8851_common.c | 22 +++++------ drivers/net/ethernet/micrel/ks8851_spi.c | 42 +++++++++++++-------- 3 files changed, 41 insertions(+), 26 deletions(-) diff --git a/drivers/net/ethernet/micrel/ks8851.h b/drivers/net/ethernet/micrel/ks8851.h index fecd43754cea..e5ec0a363aff 100644 --- a/drivers/net/ethernet/micrel/ks8851.h +++ b/drivers/net/ethernet/micrel/ks8851.h @@ -350,6 +350,8 @@ union ks8851_tx_hdr { * @rxd: Space for receiving SPI data, in DMA-able space. * @txd: Space for transmitting SPI data, in DMA-able space. * @msg_enable: The message flags controlling driver output (see ethtool). + * @tx_space: Free space in the hardware TX buffer (cached copy of KS_TXMIR). + * @queued_len: Space required in hardware TX buffer for queued packets in txq. * @fid: Incrementing frame id tag. * @rc_ier: Cached copy of KS_IER. * @rc_ccr: Cached copy of KS_CCR. @@ -399,6 +401,7 @@ struct ks8851_net { struct work_struct rxctrl_work; struct sk_buff_head txq; + unsigned int queued_len; struct eeprom_93cx6 eeprom; struct regulator *vdd_reg; diff --git a/drivers/net/ethernet/micrel/ks8851_common.c b/drivers/net/ethernet/micrel/ks8851_common.c index cfbc900d4aeb..0bf13b38b8f5 100644 --- a/drivers/net/ethernet/micrel/ks8851_common.c +++ b/drivers/net/ethernet/micrel/ks8851_common.c @@ -362,16 +362,18 @@ static irqreturn_t ks8851_irq(int irq, void *_ks) handled |= IRQ_RXPSI; if (status & IRQ_TXI) { - handled |= IRQ_TXI; - - /* no lock here, tx queue should have been stopped */ - - /* update our idea of how much tx space is available to the - * system */ - ks->tx_space = ks8851_rdreg16(ks, KS_TXMIR); + unsigned short tx_space = ks8851_rdreg16(ks, KS_TXMIR); netif_dbg(ks, intr, ks->netdev, - "%s: txspace %d\n", __func__, ks->tx_space); + "%s: txspace %d\n", __func__, tx_space); + + spin_lock(&ks->statelock); + ks->tx_space = tx_space; + if (netif_queue_stopped(ks->netdev)) + netif_wake_queue(ks->netdev); + spin_unlock(&ks->statelock); + + handled |= IRQ_TXI; } if (status & IRQ_RXI) @@ -414,9 +416,6 @@ static irqreturn_t ks8851_irq(int irq, void *_ks) if (status & IRQ_LCI) mii_check_link(&ks->mii); - if (status & IRQ_TXI) - netif_wake_queue(ks->netdev); - return IRQ_HANDLED; } @@ -500,6 +499,7 @@ static int ks8851_net_open(struct net_device *dev) ks8851_wrreg16(ks, KS_ISR, ks->rc_ier); ks8851_wrreg16(ks, KS_IER, ks->rc_ier); + ks->queued_len = 0; netif_start_queue(ks->netdev); netif_dbg(ks, ifup, ks->netdev, "network device up\n"); diff --git a/drivers/net/ethernet/micrel/ks8851_spi.c b/drivers/net/ethernet/micrel/ks8851_spi.c index 70bc7253454f..88e26c120b48 100644 --- a/drivers/net/ethernet/micrel/ks8851_spi.c +++ b/drivers/net/ethernet/micrel/ks8851_spi.c @@ -286,6 +286,18 @@ static void ks8851_wrfifo_spi(struct ks8851_net *ks, struct sk_buff *txp, netdev_err(ks->netdev, "%s: spi_sync() failed\n", __func__); } +/** + * calc_txlen - calculate size of message to send packet + * @len: Length of data + * + * Returns the size of the TXFIFO message needed to send + * this packet. + */ +static unsigned int calc_txlen(unsigned int len) +{ + return ALIGN(len + 4, 4); +} + /** * ks8851_rx_skb_spi - receive skbuff * @ks: The device state @@ -305,7 +317,9 @@ static void ks8851_rx_skb_spi(struct ks8851_net *ks, struct sk_buff *skb) */ static void ks8851_tx_work(struct work_struct *work) { + unsigned int dequeued_len = 0; struct ks8851_net_spi *kss; + unsigned short tx_space; struct ks8851_net *ks; unsigned long flags; struct sk_buff *txb; @@ -322,6 +336,8 @@ static void ks8851_tx_work(struct work_struct *work) last = skb_queue_empty(&ks->txq); if (txb) { + dequeued_len += calc_txlen(txb->len); + ks8851_wrreg16_spi(ks, KS_RXQCR, ks->rc_rxqcr | RXQCR_SDA); ks8851_wrfifo_spi(ks, txb, last); @@ -332,6 +348,13 @@ static void ks8851_tx_work(struct work_struct *work) } } + tx_space = ks8851_rdreg16_spi(ks, KS_TXMIR); + + spin_lock(&ks->statelock); + ks->queued_len -= dequeued_len; + ks->tx_space = tx_space; + spin_unlock(&ks->statelock); + ks8851_unlock_spi(ks, &flags); } @@ -346,18 +369,6 @@ static void ks8851_flush_tx_work_spi(struct ks8851_net *ks) flush_work(&kss->tx_work); } -/** - * calc_txlen - calculate size of message to send packet - * @len: Length of data - * - * Returns the size of the TXFIFO message needed to send - * this packet. - */ -static unsigned int calc_txlen(unsigned int len) -{ - return ALIGN(len + 4, 4); -} - /** * ks8851_start_xmit_spi - transmit packet using SPI * @skb: The buffer to transmit @@ -386,16 +397,17 @@ static netdev_tx_t ks8851_start_xmit_spi(struct sk_buff *skb, spin_lock(&ks->statelock); - if (needed > ks->tx_space) { + if (ks->queued_len + needed > ks->tx_space) { netif_stop_queue(dev); ret = NETDEV_TX_BUSY; } else { - ks->tx_space -= needed; + ks->queued_len += needed; skb_queue_tail(&ks->txq, skb); } spin_unlock(&ks->statelock); - schedule_work(&kss->tx_work); + if (ret == NETDEV_TX_OK) + schedule_work(&kss->tx_work); return ret; } From 340943fbff3d8faa44d2223ca04917df28786a07 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Fri, 15 Dec 2023 12:33:53 -0800 Subject: [PATCH 60/69] net: mana: select PAGE_POOL Mana uses PAGE_POOL API. x86_64 defconfig doesn't select it: ld: vmlinux.o: in function `mana_create_page_pool.isra.0': mana_en.c:(.text+0x9ae36f): undefined reference to `page_pool_create' ld: vmlinux.o: in function `mana_get_rxfrag': mana_en.c:(.text+0x9afed1): undefined reference to `page_pool_alloc_pages' make[3]: *** [/home/yury/work/linux/scripts/Makefile.vmlinux:37: vmlinux] Error 1 make[2]: *** [/home/yury/work/linux/Makefile:1154: vmlinux] Error 2 make[1]: *** [/home/yury/work/linux/Makefile:234: __sub-make] Error 2 make[1]: Leaving directory '/home/yury/work/build-linux-x86_64' make: *** [Makefile:234: __sub-make] Error 2 So we need to select it explicitly. Signed-off-by: Yury Norov Reviewed-by: Simon Horman Tested-by: Simon Horman # build-tested Fixes: ca9c54d2 ("net: mana: Add a driver for Microsoft Azure Network Adapter") Link: https://lore.kernel.org/r/20231215203353.635379-1-yury.norov@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/microsoft/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/microsoft/Kconfig b/drivers/net/ethernet/microsoft/Kconfig index 090e6b983243..01eb7445ead9 100644 --- a/drivers/net/ethernet/microsoft/Kconfig +++ b/drivers/net/ethernet/microsoft/Kconfig @@ -20,6 +20,7 @@ config MICROSOFT_MANA depends on PCI_MSI && X86_64 depends on PCI_HYPERV select AUXILIARY_BUS + select PAGE_POOL help This driver supports Microsoft Azure Network Adapter (MANA). So far, the driver is only supported on X86_64. From fa94a0c8424a5e1bd184bf1f05fbcd5914ce283d Mon Sep 17 00:00:00 2001 From: Jijie Shao Date: Sat, 16 Dec 2023 15:04:13 +0800 Subject: [PATCH 61/69] net: hns3: add new maintainer for the HNS3 ethernet driver Jijie Shao will be responsible for maintaining the hns3 driver's code in the future, so add Jijie to the hns3 driver's matainer list. Signed-off-by: Jijie Shao Link: https://lore.kernel.org/r/20231216070413.233668-1-shaojijie@huawei.com Signed-off-by: Paolo Abeni --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index bba17f97eda7..d83607884678 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9526,6 +9526,7 @@ F: drivers/bus/hisi_lpc.c HISILICON NETWORK SUBSYSTEM 3 DRIVER (HNS3) M: Yisen Zhuang M: Salil Mehta +M: Jijie Shao L: netdev@vger.kernel.org S: Maintained W: http://www.hisilicon.com From 01a564bab4876007ce35f312e16797dfe40e4823 Mon Sep 17 00:00:00 2001 From: Liu Jian Date: Sat, 16 Dec 2023 15:52:18 +0800 Subject: [PATCH 62/69] net: check vlan filter feature in vlan_vids_add_by_dev() and vlan_vids_del_by_dev() I got the below warning trace: WARNING: CPU: 4 PID: 4056 at net/core/dev.c:11066 unregister_netdevice_many_notify CPU: 4 PID: 4056 Comm: ip Not tainted 6.7.0-rc4+ #15 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.15.0-1 04/01/2014 RIP: 0010:unregister_netdevice_many_notify+0x9a4/0x9b0 Call Trace: rtnl_dellink rtnetlink_rcv_msg netlink_rcv_skb netlink_unicast netlink_sendmsg __sock_sendmsg ____sys_sendmsg ___sys_sendmsg __sys_sendmsg do_syscall_64 entry_SYSCALL_64_after_hwframe It can be repoduced via: ip netns add ns1 ip netns exec ns1 ip link add bond0 type bond mode 0 ip netns exec ns1 ip link add bond_slave_1 type veth peer veth2 ip netns exec ns1 ip link set bond_slave_1 master bond0 [1] ip netns exec ns1 ethtool -K bond0 rx-vlan-filter off [2] ip netns exec ns1 ip link add link bond_slave_1 name bond_slave_1.0 type vlan id 0 [3] ip netns exec ns1 ip link add link bond0 name bond0.0 type vlan id 0 [4] ip netns exec ns1 ip link set bond_slave_1 nomaster [5] ip netns exec ns1 ip link del veth2 ip netns del ns1 This is all caused by command [1] turning off the rx-vlan-filter function of bond0. The reason is the same as commit 01f4fd270870 ("bonding: Fix incorrect deletion of ETH_P_8021AD protocol vid from slaves"). Commands [2] [3] add the same vid to slave and master respectively, causing command [4] to empty slave->vlan_info. The following command [5] triggers this problem. To fix this problem, we should add VLAN_FILTER feature checks in vlan_vids_add_by_dev() and vlan_vids_del_by_dev() to prevent incorrect addition or deletion of vlan_vid information. Fixes: 348a1443cc43 ("vlan: introduce functions to do mass addition/deletion of vids by another device") Signed-off-by: Liu Jian Signed-off-by: Paolo Abeni --- net/8021q/vlan_core.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 0beb44f2fe1f..f00158234505 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -407,6 +407,8 @@ int vlan_vids_add_by_dev(struct net_device *dev, return 0; list_for_each_entry(vid_info, &vlan_info->vid_list, list) { + if (!vlan_hw_filter_capable(by_dev, vid_info->proto)) + continue; err = vlan_vid_add(dev, vid_info->proto, vid_info->vid); if (err) goto unwind; @@ -417,6 +419,8 @@ unwind: list_for_each_entry_continue_reverse(vid_info, &vlan_info->vid_list, list) { + if (!vlan_hw_filter_capable(by_dev, vid_info->proto)) + continue; vlan_vid_del(dev, vid_info->proto, vid_info->vid); } @@ -436,8 +440,11 @@ void vlan_vids_del_by_dev(struct net_device *dev, if (!vlan_info) return; - list_for_each_entry(vid_info, &vlan_info->vid_list, list) + list_for_each_entry(vid_info, &vlan_info->vid_list, list) { + if (!vlan_hw_filter_capable(by_dev, vid_info->proto)) + continue; vlan_vid_del(dev, vid_info->proto, vid_info->vid); + } } EXPORT_SYMBOL(vlan_vids_del_by_dev); From 2258b666482d3326aec8b72ec3e009a2aad9582c Mon Sep 17 00:00:00 2001 From: Liu Jian Date: Sat, 16 Dec 2023 15:52:19 +0800 Subject: [PATCH 63/69] selftests: add vlan hw filter tests Add one basic vlan hw filter test. Signed-off-by: Liu Jian Reviewed-by: Hangbin Liu Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/Makefile | 1 + tools/testing/selftests/net/vlan_hw_filter.sh | 29 +++++++++++++++++++ 2 files changed, 30 insertions(+) create mode 100755 tools/testing/selftests/net/vlan_hw_filter.sh diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 5b2aca4c5f10..9e5bf59a20bf 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -91,6 +91,7 @@ TEST_PROGS += test_bridge_neigh_suppress.sh TEST_PROGS += test_vxlan_nolocalbypass.sh TEST_PROGS += test_bridge_backup_port.sh TEST_PROGS += fdb_flush.sh +TEST_PROGS += vlan_hw_filter.sh TEST_FILES := settings diff --git a/tools/testing/selftests/net/vlan_hw_filter.sh b/tools/testing/selftests/net/vlan_hw_filter.sh new file mode 100755 index 000000000000..7bc804ffaf7c --- /dev/null +++ b/tools/testing/selftests/net/vlan_hw_filter.sh @@ -0,0 +1,29 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +readonly NETNS="ns-$(mktemp -u XXXXXX)" + +ret=0 + +cleanup() { + ip netns del $NETNS +} + +trap cleanup EXIT + +fail() { + echo "ERROR: ${1:-unexpected return code} (ret: $_)" >&2 + ret=1 +} + +ip netns add ${NETNS} +ip netns exec ${NETNS} ip link add bond0 type bond mode 0 +ip netns exec ${NETNS} ip link add bond_slave_1 type veth peer veth2 +ip netns exec ${NETNS} ip link set bond_slave_1 master bond0 +ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off +ip netns exec ${NETNS} ip link add link bond_slave_1 name bond_slave_1.0 type vlan id 0 +ip netns exec ${NETNS} ip link add link bond0 name bond0.0 type vlan id 0 +ip netns exec ${NETNS} ip link set bond_slave_1 nomaster +ip netns exec ${NETNS} ip link del veth2 || fail "Please check vlan HW filter function" + +exit $ret From bd7f77dae69532ffc027ee50ff99e3792dc30b7f Mon Sep 17 00:00:00 2001 From: Lai Peter Jun Ann Date: Mon, 18 Dec 2023 15:51:32 +0800 Subject: [PATCH 64/69] net: stmmac: fix incorrect flag check in timestamp interrupt The driver should continue get the timestamp if STMMAC_FLAG_EXT_SNAPSHOT_EN flag is set. Fixes: aa5513f5d95f ("net: stmmac: replace the ext_snapshot_en field with a flag") Cc: # 6.6 Signed-off-by: Song Yoong Siang Signed-off-by: Lai Peter Jun Ann Reviewed-by: Jacob Keller Reviewed-by: Serge Semin Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c index 540f6a4ec0b8..f05bd757dfe5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c @@ -237,7 +237,7 @@ static void timestamp_interrupt(struct stmmac_priv *priv) */ ts_status = readl(priv->ioaddr + GMAC_TIMESTAMP_STATUS); - if (priv->plat->flags & STMMAC_FLAG_EXT_SNAPSHOT_EN) + if (!(priv->plat->flags & STMMAC_FLAG_EXT_SNAPSHOT_EN)) return; num_snapshot = (ts_status & GMAC_TIMESTAMP_ATSNS_MASK) >> From 7cb8cd4daacfea646cf8b5925ca2c66c98b18480 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Sun, 17 Dec 2023 16:37:40 +0100 Subject: [PATCH 65/69] net: ethernet: mtk_wed: fix possible NULL pointer dereference in mtk_wed_wo_queue_tx_clean() In order to avoid a NULL pointer dereference, check entry->buf pointer before running skb_free_frag in mtk_wed_wo_queue_tx_clean routine. Fixes: 799684448e3e ("net: ethernet: mtk_wed: introduce wed wo support") Signed-off-by: Lorenzo Bianconi Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/3c1262464d215faa8acebfc08869798c81c96f4a.1702827359.git.lorenzo@kernel.org Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mediatek/mtk_wed_wo.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mediatek/mtk_wed_wo.c b/drivers/net/ethernet/mediatek/mtk_wed_wo.c index 3bd51a3d6650..ae44ad5f8ce8 100644 --- a/drivers/net/ethernet/mediatek/mtk_wed_wo.c +++ b/drivers/net/ethernet/mediatek/mtk_wed_wo.c @@ -291,6 +291,9 @@ mtk_wed_wo_queue_tx_clean(struct mtk_wed_wo *wo, struct mtk_wed_wo_queue *q) for (i = 0; i < q->n_desc; i++) { struct mtk_wed_wo_queue_entry *entry = &q->entry[i]; + if (!entry->buf) + continue; + dma_unmap_single(wo->hw->dev, entry->addr, entry->len, DMA_TO_DEVICE); skb_free_frag(entry->buf); From d6e5794b06c0fab74fe6e4fa55d508a5ceb14735 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 18 Dec 2023 18:06:54 +0100 Subject: [PATCH 66/69] net: avoid build bug in skb extension length calculation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GCC seems to incorrectly fail to evaluate skb_ext_total_length() at compile time under certain conditions. The issue even occurs if all values in skb_ext_type_len[] are "0", ruling out the possibility of an actual overflow. As the patch has been in mainline since v6.6 without triggering the problem it seems to be a very uncommon occurrence. As the issue only occurs when -fno-tree-loop-im is specified as part of CFLAGS_GCOV, disable the BUILD_BUG_ON() only when building with coverage reporting enabled. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202312171924.4FozI5FG-lkp@intel.com/ Suggested-by: Arnd Bergmann Link: https://lore.kernel.org/lkml/487cfd35-fe68-416f-9bfd-6bb417f98304@app.fastmail.com/ Fixes: 5d21d0a65b57 ("net: generalize calculation of skb extensions length") Cc: Signed-off-by: Thomas Weißschuh Acked-by: Arnd Bergmann Link: https://lore.kernel.org/r/20231218-net-skbuff-build-bug-v1-1-eefc2fb0a7d3@weissschuh.net Signed-off-by: Paolo Abeni --- net/core/skbuff.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 83af8aaeb893..94cc40a6f797 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4825,7 +4825,9 @@ static __always_inline unsigned int skb_ext_total_length(void) static void skb_extensions_init(void) { BUILD_BUG_ON(SKB_EXT_NUM >= 8); +#if !IS_ENABLED(CONFIG_KCOV_INSTRUMENT_ALL) BUILD_BUG_ON(skb_ext_total_length() > 255); +#endif skbuff_ext_cache = kmem_cache_create("skbuff_ext_cache", SKB_EXT_ALIGN_VALUE * skb_ext_total_length(), From dade3f6a1e4e35a5ae916d5e78b3229ec34c78ec Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 18 Dec 2023 20:02:43 -0700 Subject: [PATCH 67/69] net/ipv6: Revert remove expired routes with a separated list of routes This reverts commit 3dec89b14d37ee635e772636dad3f09f78f1ab87. The commit has some race conditions given how expires is managed on a fib6_info in relation to gc start, adding the entry to the gc list and setting the timer value leading to UAF. Revert the commit and try again in a later release. Fixes: 3dec89b14d37 ("net/ipv6: Remove expired routes with a separated list of routes") Cc: Kui-Feng Lee Signed-off-by: David Ahern Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20231219030243.25687-1-dsahern@kernel.org Signed-off-by: Paolo Abeni --- include/net/ip6_fib.h | 64 +++++++++---------------------------------- net/ipv6/ip6_fib.c | 55 ++++--------------------------------- net/ipv6/route.c | 6 ++-- 3 files changed, 22 insertions(+), 103 deletions(-) diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 1ba9f4ddf2f6..9ba6413fd2e3 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -179,9 +179,6 @@ struct fib6_info { refcount_t fib6_ref; unsigned long expires; - - struct hlist_node gc_link; - struct dst_metrics *fib6_metrics; #define fib6_pmtu fib6_metrics->metrics[RTAX_MTU-1] @@ -250,6 +247,19 @@ static inline bool fib6_requires_src(const struct fib6_info *rt) return rt->fib6_src.plen > 0; } +static inline void fib6_clean_expires(struct fib6_info *f6i) +{ + f6i->fib6_flags &= ~RTF_EXPIRES; + f6i->expires = 0; +} + +static inline void fib6_set_expires(struct fib6_info *f6i, + unsigned long expires) +{ + f6i->expires = expires; + f6i->fib6_flags |= RTF_EXPIRES; +} + static inline bool fib6_check_expired(const struct fib6_info *f6i) { if (f6i->fib6_flags & RTF_EXPIRES) @@ -257,11 +267,6 @@ static inline bool fib6_check_expired(const struct fib6_info *f6i) return false; } -static inline bool fib6_has_expires(const struct fib6_info *f6i) -{ - return f6i->fib6_flags & RTF_EXPIRES; -} - /* Function to safely get fn->fn_sernum for passed in rt * and store result in passed in cookie. * Return true if we can get cookie safely @@ -383,7 +388,6 @@ struct fib6_table { struct inet_peer_base tb6_peers; unsigned int flags; unsigned int fib_seq; - struct hlist_head tb6_gc_hlist; /* GC candidates */ #define RT6_TABLE_HAS_DFLT_ROUTER BIT(0) }; @@ -500,48 +504,6 @@ void fib6_gc_cleanup(void); int fib6_init(void); -/* fib6_info must be locked by the caller, and fib6_info->fib6_table can be - * NULL. - */ -static inline void fib6_set_expires_locked(struct fib6_info *f6i, - unsigned long expires) -{ - struct fib6_table *tb6; - - tb6 = f6i->fib6_table; - f6i->expires = expires; - if (tb6 && !fib6_has_expires(f6i)) - hlist_add_head(&f6i->gc_link, &tb6->tb6_gc_hlist); - f6i->fib6_flags |= RTF_EXPIRES; -} - -/* fib6_info must be locked by the caller, and fib6_info->fib6_table can be - * NULL. If fib6_table is NULL, the fib6_info will no be inserted into the - * list of GC candidates until it is inserted into a table. - */ -static inline void fib6_set_expires(struct fib6_info *f6i, - unsigned long expires) -{ - spin_lock_bh(&f6i->fib6_table->tb6_lock); - fib6_set_expires_locked(f6i, expires); - spin_unlock_bh(&f6i->fib6_table->tb6_lock); -} - -static inline void fib6_clean_expires_locked(struct fib6_info *f6i) -{ - if (fib6_has_expires(f6i)) - hlist_del_init(&f6i->gc_link); - f6i->fib6_flags &= ~RTF_EXPIRES; - f6i->expires = 0; -} - -static inline void fib6_clean_expires(struct fib6_info *f6i) -{ - spin_lock_bh(&f6i->fib6_table->tb6_lock); - fib6_clean_expires_locked(f6i); - spin_unlock_bh(&f6i->fib6_table->tb6_lock); -} - struct ipv6_route_iter { struct seq_net_private p; struct fib6_walker w; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 7772f42ff2b9..4fc2cae0d116 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -160,8 +160,6 @@ struct fib6_info *fib6_info_alloc(gfp_t gfp_flags, bool with_fib6_nh) INIT_LIST_HEAD(&f6i->fib6_siblings); refcount_set(&f6i->fib6_ref, 1); - INIT_HLIST_NODE(&f6i->gc_link); - return f6i; } @@ -248,7 +246,6 @@ static struct fib6_table *fib6_alloc_table(struct net *net, u32 id) net->ipv6.fib6_null_entry); table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; inet_peer_base_init(&table->tb6_peers); - INIT_HLIST_HEAD(&table->tb6_gc_hlist); } return table; @@ -1060,8 +1057,6 @@ static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn, lockdep_is_held(&table->tb6_lock)); } } - - fib6_clean_expires_locked(rt); } /* @@ -1123,10 +1118,9 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt, if (!(iter->fib6_flags & RTF_EXPIRES)) return -EEXIST; if (!(rt->fib6_flags & RTF_EXPIRES)) - fib6_clean_expires_locked(iter); + fib6_clean_expires(iter); else - fib6_set_expires_locked(iter, - rt->expires); + fib6_set_expires(iter, rt->expires); if (rt->fib6_pmtu) fib6_metric_set(iter, RTAX_MTU, @@ -1485,10 +1479,6 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt, if (rt->nh) list_add(&rt->nh_list, &rt->nh->f6i_list); __fib6_update_sernum_upto_root(rt, fib6_new_sernum(info->nl_net)); - - if (fib6_has_expires(rt)) - hlist_add_head(&rt->gc_link, &table->tb6_gc_hlist); - fib6_start_gc(info->nl_net, rt); } @@ -2291,8 +2281,9 @@ static void fib6_flush_trees(struct net *net) * Garbage collection */ -static int fib6_age(struct fib6_info *rt, struct fib6_gc_args *gc_args) +static int fib6_age(struct fib6_info *rt, void *arg) { + struct fib6_gc_args *gc_args = arg; unsigned long now = jiffies; /* @@ -2300,7 +2291,7 @@ static int fib6_age(struct fib6_info *rt, struct fib6_gc_args *gc_args) * Routes are expired even if they are in use. */ - if (fib6_has_expires(rt) && rt->expires) { + if (rt->fib6_flags & RTF_EXPIRES && rt->expires) { if (time_after(now, rt->expires)) { RT6_TRACE("expiring %p\n", rt); return -1; @@ -2317,40 +2308,6 @@ static int fib6_age(struct fib6_info *rt, struct fib6_gc_args *gc_args) return 0; } -static void fib6_gc_table(struct net *net, - struct fib6_table *tb6, - struct fib6_gc_args *gc_args) -{ - struct fib6_info *rt; - struct hlist_node *n; - struct nl_info info = { - .nl_net = net, - .skip_notify = false, - }; - - hlist_for_each_entry_safe(rt, n, &tb6->tb6_gc_hlist, gc_link) - if (fib6_age(rt, gc_args) == -1) - fib6_del(rt, &info); -} - -static void fib6_gc_all(struct net *net, struct fib6_gc_args *gc_args) -{ - struct fib6_table *table; - struct hlist_head *head; - unsigned int h; - - rcu_read_lock(); - for (h = 0; h < FIB6_TABLE_HASHSZ; h++) { - head = &net->ipv6.fib_table_hash[h]; - hlist_for_each_entry_rcu(table, head, tb6_hlist) { - spin_lock_bh(&table->tb6_lock); - fib6_gc_table(net, table, gc_args); - spin_unlock_bh(&table->tb6_lock); - } - } - rcu_read_unlock(); -} - void fib6_run_gc(unsigned long expires, struct net *net, bool force) { struct fib6_gc_args gc_args; @@ -2366,7 +2323,7 @@ void fib6_run_gc(unsigned long expires, struct net *net, bool force) net->ipv6.sysctl.ip6_rt_gc_interval; gc_args.more = 0; - fib6_gc_all(net, &gc_args); + fib6_clean_all(net, fib6_age, &gc_args); now = jiffies; net->ipv6.ip6_rt_last_gc = now; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index b132feae3393..ea1dec8448fc 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3763,10 +3763,10 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, rt->dst_nocount = true; if (cfg->fc_flags & RTF_EXPIRES) - fib6_set_expires_locked(rt, jiffies + - clock_t_to_jiffies(cfg->fc_expires)); + fib6_set_expires(rt, jiffies + + clock_t_to_jiffies(cfg->fc_expires)); else - fib6_clean_expires_locked(rt); + fib6_clean_expires(rt); if (cfg->fc_protocol == RTPROT_UNSPEC) cfg->fc_protocol = RTPROT_BOOT; From b8056f2ce07f27c43b9488dd1bc8bfbb60d0779d Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Tue, 19 Dec 2023 14:57:37 +0800 Subject: [PATCH 68/69] kselftest: rtnetlink.sh: use grep_fail when expecting the cmd fail run_cmd_grep_fail should be used when expecting the cmd fail, or the ret will be set to 1, and the total test return 1 when exiting. This would cause the result report to fail if run via run_kselftest.sh. Before fix: # ./rtnetlink.sh -t kci_test_addrlft PASS: preferred_lft addresses have expired # echo $? 1 After fix: # ./rtnetlink.sh -t kci_test_addrlft PASS: preferred_lft addresses have expired # echo $? 0 Fixes: 9c2a19f71515 ("kselftest: rtnetlink.sh: add verbose flag") Signed-off-by: Hangbin Liu Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20231219065737.1725120-1-liuhangbin@gmail.com Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/rtnetlink.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index 38be9706c45f..26827ea4e3e5 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -297,7 +297,7 @@ kci_test_addrlft() done sleep 5 - run_cmd_grep "10.23.11." ip addr show dev "$devdummy" + run_cmd_grep_fail "10.23.11." ip addr show dev "$devdummy" if [ $? -eq 0 ]; then check_err 1 end_test "FAIL: preferred_lft addresses remaining" From 24ab059d2ebd62fdccc43794796f6ffbabe49ebc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 19 Dec 2023 12:53:31 +0000 Subject: [PATCH 69/69] net: check dev->gso_max_size in gso_features_check() Some drivers might misbehave if TSO packets get too big. GVE for instance uses a 16bit field in its TX descriptor, and will do bad things if a packet is bigger than 2^16 bytes. Linux TCP stack honors dev->gso_max_size, but there are other ways for too big packets to reach an ndo_start_xmit() handler : virtio_net, af_packet, GRO... Add a generic check in gso_features_check() and fallback to GSO when needed. gso_max_size was added in the blamed commit. Fixes: 82cc1a7a5687 ("[NET]: Add per-connection option to set max TSO frame size") Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20231219125331.4127498-1-edumazet@google.com Signed-off-by: Paolo Abeni --- net/core/dev.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/core/dev.c b/net/core/dev.c index c879246be48d..ad20bebe153f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3472,6 +3472,9 @@ static netdev_features_t gso_features_check(const struct sk_buff *skb, if (gso_segs > READ_ONCE(dev->gso_max_segs)) return features & ~NETIF_F_GSO_MASK; + if (unlikely(skb->len >= READ_ONCE(dev->gso_max_size))) + return features & ~NETIF_F_GSO_MASK; + if (!skb_shinfo(skb)->gso_type) { skb_warn_bad_offload(skb); return features & ~NETIF_F_GSO_MASK;