From 544cc3f8573bf9a82e8f348741f2f68d2a8376fb Mon Sep 17 00:00:00 2001 From: Jernej Skrabec Date: Sun, 1 Nov 2020 08:26:09 +0100 Subject: [PATCH 001/296] arm64: dts: allwinner: h6: orangepi-one-plus: Fix ethernet RX/TX delay on OrangePi One Plus board is set on PHY. Reflect that in ethernet node. Fixes: 7ee32a17e0d6 ("arm64: dts: allwinner: h6: orangepi-one-plus: Enable ethernet") Signed-off-by: Jernej Skrabec Signed-off-by: Maxime Ripard Tested-by: Marcus Cooper Link: https://lore.kernel.org/r/20201101072609.1681891-1-jernej.skrabec@siol.net --- arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi-one-plus.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi-one-plus.dts b/arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi-one-plus.dts index fceb298bfd53..29a081e72a9b 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi-one-plus.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi-one-plus.dts @@ -27,7 +27,7 @@ &emac { pinctrl-names = "default"; pinctrl-0 = <&ext_rgmii_pins>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-handle = <&ext_rgmii_phy>; phy-supply = <®_gmac_3v3>; allwinner,rx-delay-ps = <200>; From ad2091f893bd5dfe2824f0d6819600d120698e9f Mon Sep 17 00:00:00 2001 From: Paul Kocialkowski Date: Sat, 31 Oct 2020 19:21:29 +0100 Subject: [PATCH 002/296] ARM: sunxi: Add machine match for the Allwinner V3 SoC The Allwinner V3 SoC shares the same base as the V3s but comes with extra pins and features available. As a result, it has its dedicated compatible string (already used in device trees), which is added here. Signed-off-by: Paul Kocialkowski Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/20201031182137.1879521-2-contact@paulk.fr --- arch/arm/mach-sunxi/sunxi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mach-sunxi/sunxi.c b/arch/arm/mach-sunxi/sunxi.c index 06da2747a90b..19635721013d 100644 --- a/arch/arm/mach-sunxi/sunxi.c +++ b/arch/arm/mach-sunxi/sunxi.c @@ -66,6 +66,7 @@ static const char * const sun8i_board_dt_compat[] = { "allwinner,sun8i-h2-plus", "allwinner,sun8i-h3", "allwinner,sun8i-r40", + "allwinner,sun8i-v3", "allwinner,sun8i-v3s", NULL, }; From 6ab48105aae79b9d8062e9bc922baaeff80918d7 Mon Sep 17 00:00:00 2001 From: Matteo Scordino Date: Fri, 30 Oct 2020 23:43:25 +0000 Subject: [PATCH 003/296] ARM: dts: s3: pinecube: align compatible property to other S3 boards The compatible string in the Pine64 Pinecube dts diverges from the ones used in other S3 based boards, like the LicheePi and the Elimo Impetus and Initium. Discussion on LKML decided the PineCube should align to the others. Signed-off-by: Matteo Scordino Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/20201030234325.5865-7-matteo.scordino@gmail.com --- arch/arm/boot/dts/sun8i-s3-pinecube.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/sun8i-s3-pinecube.dts b/arch/arm/boot/dts/sun8i-s3-pinecube.dts index 9bab6b7f4014..4aa0ee897a0a 100644 --- a/arch/arm/boot/dts/sun8i-s3-pinecube.dts +++ b/arch/arm/boot/dts/sun8i-s3-pinecube.dts @@ -10,7 +10,7 @@ / { model = "PineCube IP Camera"; - compatible = "pine64,pinecube", "allwinner,sun8i-s3"; + compatible = "pine64,pinecube", "sochip,s3", "allwinner,sun8i-v3"; aliases { serial0 = &uart2; From 8c9cb4094ccf242eddd140efba13872c55f68a87 Mon Sep 17 00:00:00 2001 From: Pablo Greco Date: Mon, 2 Nov 2020 11:16:40 -0300 Subject: [PATCH 004/296] ARM: dts: sun7i: bananapi: Enable RGMII RX/TX delay on Ethernet PHY The Ethernet PHY on the Bananapi M1 has the RX and TX delays enabled on the PHY, using pull-ups on the RXDLY and TXDLY pins. Fix the phy-mode description to correct reflect this so that the implementation doesn't reconfigure the delays incorrectly. This happened with commit bbc4d71d6354 ("net: phy: realtek: fix rtl8211e rx/tx delay config"). Fixes: 8a5b272fbf44 ("ARM: dts: sun7i: Add Banana Pi board") Signed-off-by: Pablo Greco Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/1604326600-39544-1-git-send-email-pgreco@centosproject.org --- arch/arm/boot/dts/sun7i-a20-bananapi.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/sun7i-a20-bananapi.dts b/arch/arm/boot/dts/sun7i-a20-bananapi.dts index bb3987e101c2..0b3d9ae75650 100644 --- a/arch/arm/boot/dts/sun7i-a20-bananapi.dts +++ b/arch/arm/boot/dts/sun7i-a20-bananapi.dts @@ -132,7 +132,7 @@ pinctrl-names = "default"; pinctrl-0 = <&gmac_rgmii_pins>; phy-handle = <&phy1>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-supply = <®_gmac_3v3>; status = "okay"; }; From bd5cdcdc66e1f7179ff6d172d1e5f55e43403aa8 Mon Sep 17 00:00:00 2001 From: Pablo Greco Date: Mon, 2 Nov 2020 11:19:14 -0300 Subject: [PATCH 005/296] ARM: dts: sun8i: r40: bananapi-m2-berry: Fix dcdc1 regulator DCDC1 regulator powers many different subsystems. While some of them can work at 3.0 V, some of them can not. For example, VCC-HDMI can only work between 3.24 V and 3.36 V. According to OS images provided by the board manufacturer this regulator should be set to 3.3 V. Set DCDC1 and DCDC1SW to 3.3 V in order to fix this. Fixes: 23edc168bd98 ("ARM: dts: sun8i: Add board dts file for Banana Pi M2 Berry") Fixes: 27e81e1970a8 ("ARM: dts: sun8i: v40: bananapi-m2-berry: Enable GMAC ethernet controller") Signed-off-by: Pablo Greco Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/1604326755-39742-1-git-send-email-pgreco@centosproject.org --- arch/arm/boot/dts/sun8i-v40-bananapi-m2-berry.dts | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/arm/boot/dts/sun8i-v40-bananapi-m2-berry.dts b/arch/arm/boot/dts/sun8i-v40-bananapi-m2-berry.dts index 15c22b06fc4b..84eb08295718 100644 --- a/arch/arm/boot/dts/sun8i-v40-bananapi-m2-berry.dts +++ b/arch/arm/boot/dts/sun8i-v40-bananapi-m2-berry.dts @@ -198,16 +198,16 @@ }; ®_dc1sw { - regulator-min-microvolt = <3000000>; - regulator-max-microvolt = <3000000>; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; regulator-name = "vcc-gmac-phy"; }; ®_dcdc1 { regulator-always-on; - regulator-min-microvolt = <3000000>; - regulator-max-microvolt = <3000000>; - regulator-name = "vcc-3v0"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + regulator-name = "vcc-3v3"; }; ®_dcdc2 { From 8a82d91fa275aaea49be06d7f5b1407ce1c0dd4b Mon Sep 17 00:00:00 2001 From: Pablo Greco Date: Mon, 2 Nov 2020 11:19:29 -0300 Subject: [PATCH 006/296] ARM: dts: sun8i: v40: bananapi-m2-berry: Fix ethernet node Ethernet PHY on BananaPi M2 Berry provides RX and TX delays. Fix ethernet node to reflect that fact. Fixes: 27e81e1970a8 ("ARM: dts: sun8i: v40: bananapi-m2-berry: Enable GMAC ethernet controller") Signed-off-by: Pablo Greco Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/1604326769-39802-1-git-send-email-pgreco@centosproject.org --- arch/arm/boot/dts/sun8i-v40-bananapi-m2-berry.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/sun8i-v40-bananapi-m2-berry.dts b/arch/arm/boot/dts/sun8i-v40-bananapi-m2-berry.dts index 84eb08295718..47954551f573 100644 --- a/arch/arm/boot/dts/sun8i-v40-bananapi-m2-berry.dts +++ b/arch/arm/boot/dts/sun8i-v40-bananapi-m2-berry.dts @@ -120,7 +120,7 @@ pinctrl-names = "default"; pinctrl-0 = <&gmac_rgmii_pins>; phy-handle = <&phy1>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-supply = <®_dc1sw>; status = "okay"; }; From dbd7ae5154d5fff7e84a9f3010bb06499017ef29 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Mon, 2 Nov 2020 16:14:45 +0000 Subject: [PATCH 007/296] xfrm/compat: Translate by copying XFRMA_UNSPEC attribute xfrm_xlate32() translates 64-bit message provided by kernel to be sent for 32-bit listener (acknowledge or monitor). Translator code doesn't expect XFRMA_UNSPEC attribute as it doesn't know its payload. Kernel never attaches such attribute, but a user can. I've searched if any opensource does it and the answer is no. Nothing on github and google finds only tfcproject that has such code commented-out. What will happen if a user sends a netlink message with XFRMA_UNSPEC attribute? Ipsec code ignores this attribute. But if there is a monitor-process or 32-bit user requested ack - kernel will try to translate such message and will hit WARN_ONCE() in xfrm_xlate64_attr(). Deal with XFRMA_UNSPEC by copying the attribute payload with xfrm_nla_cpy(). In result, the default switch-case in xfrm_xlate64_attr() becomes an unused code. Leave those 3 lines in case a new xfrm attribute will be added. Fixes: 5461fc0c8d9f ("xfrm/compat: Add 64=>32-bit messages translator") Reported-by: syzbot+a7e701c8385bd8543074@syzkaller.appspotmail.com Signed-off-by: Dmitry Safonov Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_compat.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/xfrm/xfrm_compat.c b/net/xfrm/xfrm_compat.c index e28f0c9ecd6a..17edbf935e35 100644 --- a/net/xfrm/xfrm_compat.c +++ b/net/xfrm/xfrm_compat.c @@ -234,6 +234,7 @@ static int xfrm_xlate64_attr(struct sk_buff *dst, const struct nlattr *src) case XFRMA_PAD: /* Ignore */ return 0; + case XFRMA_UNSPEC: case XFRMA_ALG_AUTH: case XFRMA_ALG_CRYPT: case XFRMA_ALG_COMP: From d1949d045fd67eab8a32a579a8c1ab1681330854 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Mon, 2 Nov 2020 16:14:46 +0000 Subject: [PATCH 008/296] xfrm/compat: memset(0) 64-bit padding at right place 32-bit messages translated by xfrm_compat can have attributes attached. For all, but XFRMA_SA, XFRMA_POLICY the size of payload is the same in 32-bit UABI and 64-bit UABI. For XFRMA_SA (struct xfrm_usersa_info) and XFRMA_POLICY (struct xfrm_userpolicy_info) it's only tail-padding that is present in 64-bit payload, but not in 32-bit. The proper size for destination nlattr is already calculated by xfrm_user_rcv_calculate_len64() and allocated with kvmalloc(). xfrm_attr_cpy32() copies 32-bit copy_len into 64-bit attribute translated payload, zero-filling possible padding for SA/POLICY. Due to a typo, *pos already has 64-bit payload size, in a result next memset(0) is called on the memory after the translated attribute, not on the tail-padding of it. Fixes: 5106f4a8acff ("xfrm/compat: Add 32=>64-bit messages translator") Reported-by: syzbot+c43831072e7df506a646@syzkaller.appspotmail.com Signed-off-by: Dmitry Safonov Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_compat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_compat.c b/net/xfrm/xfrm_compat.c index 17edbf935e35..556e9f33b815 100644 --- a/net/xfrm/xfrm_compat.c +++ b/net/xfrm/xfrm_compat.c @@ -388,7 +388,7 @@ static int xfrm_attr_cpy32(void *dst, size_t *pos, const struct nlattr *src, memcpy(nla, src, nla_attr_size(copy_len)); nla->nla_len = nla_attr_size(payload); - *pos += nla_attr_size(payload); + *pos += nla_attr_size(copy_len); nlmsg->nlmsg_len += nla->nla_len; memset(dst + *pos, 0, payload - copy_len); From ad37f77fd3659e87fd9833a83692e0e4eba0f5cd Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Mon, 2 Nov 2020 16:14:47 +0000 Subject: [PATCH 009/296] xfrm/compat: Don't allocate memory with __GFP_ZERO 32-bit to 64-bit messages translator zerofies needed paddings in the translation, the rest is the actual payload. Don't allocate zero pages as they are not needed. Fixes: 5106f4a8acff ("xfrm/compat: Add 32=>64-bit messages translator") Signed-off-by: Dmitry Safonov Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_compat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_compat.c b/net/xfrm/xfrm_compat.c index 556e9f33b815..d8e8a11ca845 100644 --- a/net/xfrm/xfrm_compat.c +++ b/net/xfrm/xfrm_compat.c @@ -564,7 +564,7 @@ static struct nlmsghdr *xfrm_user_rcv_msg_compat(const struct nlmsghdr *h32, return NULL; len += NLMSG_HDRLEN; - h64 = kvmalloc(len, GFP_KERNEL | __GFP_ZERO); + h64 = kvmalloc(len, GFP_KERNEL); if (!h64) return ERR_PTR(-ENOMEM); From cb7ff314e1d9f3d6c62fa2c392e41174721ed0b3 Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Fri, 6 Nov 2020 09:14:20 +0000 Subject: [PATCH 010/296] drm/tegra: sor: Don't warn on probe deferral Deferred probe is an expected return value for tegra_output_probe(). Given that the driver deals with it properly, there's no need to output a warning that may potentially confuse users. Signed-off-by: Jon Hunter Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/sor.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/tegra/sor.c b/drivers/gpu/drm/tegra/sor.c index e88a17c2937f..a5b944dacb35 100644 --- a/drivers/gpu/drm/tegra/sor.c +++ b/drivers/gpu/drm/tegra/sor.c @@ -3764,10 +3764,9 @@ static int tegra_sor_probe(struct platform_device *pdev) return err; err = tegra_output_probe(&sor->output); - if (err < 0) { - dev_err(&pdev->dev, "failed to probe output: %d\n", err); - return err; - } + if (err < 0) + return dev_err_probe(&pdev->dev, err, + "failed to probe output\n"); if (sor->ops && sor->ops->probe) { err = sor->ops->probe(sor); From 48f486e13ffdb49fbb9b38c21d0e108ed60ab1a2 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Tue, 10 Nov 2020 09:14:43 +0800 Subject: [PATCH 011/296] net: xfrm: fix memory leak in xfrm_user_policy() if xfrm_get_translator() failed, xfrm_user_policy() return without freeing 'data', which is allocated in memdup_sockptr(). Fixes: 96392ee5a13b ("xfrm/compat: Translate 32-bit user_policy from sockptr") Reported-by: Hulk Robot Signed-off-by: Yu Kuai Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_state.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index a77da7aae6fe..2f1517827995 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -2382,8 +2382,10 @@ int xfrm_user_policy(struct sock *sk, int optname, sockptr_t optval, int optlen) if (in_compat_syscall()) { struct xfrm_translator *xtr = xfrm_get_translator(); - if (!xtr) + if (!xtr) { + kfree(data); return -EOPNOTSUPP; + } err = xtr->xlate_user_policy_sockptr(&data, optlen); xfrm_put_translator(xtr); From 5c1d644c09dbc13b2dc652435786e42b05ac1bb7 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 13 Oct 2020 10:51:58 +0100 Subject: [PATCH 012/296] drm/tegra: sor: Ensure regulators are disabled on teardown The Tegra SOR driver uses the devm infrastructure to request regulators, but enables them without registering them with the infrastructure. This results in the following splat if probing fails for any odd resaon (such as dependencies not being available): [ 8.974187] tegra-sor 15580000.sor: cannot get HDMI supply: -517 [ 9.414403] tegra-sor 15580000.sor: failed to probe HDMI: -517 [ 9.421240] ------------[ cut here ]------------ [ 9.425879] WARNING: CPU: 1 PID: 164 at drivers/regulator/core.c:2089 _regulator_put.part.0+0x16c/0x174 [ 9.435259] Modules linked in: tegra_drm(E+) cec(E) ahci_tegra(E) drm_kms_helper(E) drm(E) libahci_platform(E) libahci(E) max77620_regulator(E) xhci_tegra(E+) sdhci_tegra(E) xhci_hcd(E) libata(E) sdhci_pltfm(E) cqhci(E) fixed(E) usbcore(E) scsi_mod(E) sdhci(E) host1x(E) [ 9.459211] CPU: 1 PID: 164 Comm: systemd-udevd Tainted: G S D W E 5.9.0-rc7-00298-gf6337624c4fe #1980 [ 9.469285] Hardware name: NVIDIA Jetson TX2 Developer Kit (DT) [ 9.475202] pstate: 80000005 (Nzcv daif -PAN -UAO BTYPE=--) [ 9.480784] pc : _regulator_put.part.0+0x16c/0x174 [ 9.485581] lr : regulator_put+0x44/0x60 [ 9.489501] sp : ffffffc011d837b0 [ 9.492814] x29: ffffffc011d837b0 x28: ffffff81dd085900 [ 9.498141] x27: ffffff81de1c8ec0 x26: ffffff81de1c8c10 [ 9.503464] x25: ffffff81dd085800 x24: ffffffc008f2c6b0 [ 9.508790] x23: ffffffc0117373f0 x22: 0000000000000005 [ 9.514101] x21: ffffff81dd085900 x20: ffffffc01172b098 [ 9.515822] ata1: SATA link down (SStatus 0 SControl 300) [ 9.519426] x19: ffffff81dd085100 x18: 0000000000000030 [ 9.530122] x17: 0000000000000000 x16: 0000000000000000 [ 9.535453] x15: 0000000000000000 x14: 000000000000038f [ 9.540777] x13: 0000000000000003 x12: 0000000000000040 [ 9.546105] x11: ffffff81eb800000 x10: 0000000000000ae0 [ 9.551417] x9 : ffffffc0106fea24 x8 : ffffff81de83e6c0 [ 9.556728] x7 : 0000000000000018 x6 : 00000000000003c3 [ 9.562064] x5 : 0000000000005660 x4 : 0000000000000000 [ 9.567392] x3 : ffffffc01172b388 x2 : ffffff81de83db80 [ 9.572702] x1 : 0000000000000000 x0 : 0000000000000001 [ 9.578034] Call trace: [ 9.580494] _regulator_put.part.0+0x16c/0x174 [ 9.584940] regulator_put+0x44/0x60 [ 9.588522] devm_regulator_release+0x20/0x2c [ 9.592885] release_nodes+0x1c8/0x2c0 [ 9.596636] devres_release_all+0x44/0x6c [ 9.600649] really_probe+0x1ec/0x504 [ 9.604316] driver_probe_device+0x100/0x170 [ 9.608589] device_driver_attach+0xcc/0xd4 [ 9.612774] __driver_attach+0xb0/0x17c [ 9.616614] bus_for_each_dev+0x7c/0xd4 [ 9.620450] driver_attach+0x30/0x3c [ 9.624027] bus_add_driver+0x154/0x250 [ 9.627867] driver_register+0x84/0x140 [ 9.631719] __platform_register_drivers+0xa0/0x180 [ 9.636660] host1x_drm_init+0x60/0x1000 [tegra_drm] [ 9.641629] do_one_initcall+0x54/0x2d0 [ 9.645490] do_init_module+0x68/0x29c [ 9.649244] load_module+0x2178/0x26c0 [ 9.652997] __do_sys_finit_module+0xb0/0x120 [ 9.657356] __arm64_sys_finit_module+0x2c/0x40 [ 9.661902] el0_svc_common.constprop.0+0x80/0x240 [ 9.666701] do_el0_svc+0x30/0xa0 [ 9.670022] el0_svc+0x18/0x50 [ 9.673081] el0_sync_handler+0x90/0x318 [ 9.677006] el0_sync+0x158/0x180 [ 9.680324] ---[ end trace 90f6c89d62d85ff6 ]--- Instead, let's register a callback that will disable the regulators on teardown. This allows for the removal of the .remove callbacks, which are not needed anymore. Signed-off-by: Marc Zyngier Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/sor.c | 59 +++++++++++++++---------------------- 1 file changed, 24 insertions(+), 35 deletions(-) diff --git a/drivers/gpu/drm/tegra/sor.c b/drivers/gpu/drm/tegra/sor.c index a5b944dacb35..00656d8b98e2 100644 --- a/drivers/gpu/drm/tegra/sor.c +++ b/drivers/gpu/drm/tegra/sor.c @@ -397,7 +397,6 @@ struct tegra_sor; struct tegra_sor_ops { const char *name; int (*probe)(struct tegra_sor *sor); - int (*remove)(struct tegra_sor *sor); void (*audio_enable)(struct tegra_sor *sor); void (*audio_disable)(struct tegra_sor *sor); }; @@ -2942,6 +2941,24 @@ static const struct drm_encoder_helper_funcs tegra_sor_dp_helpers = { .atomic_check = tegra_sor_encoder_atomic_check, }; +static void tegra_sor_disable_regulator(void *data) +{ + struct regulator *reg = data; + + regulator_disable(reg); +} + +static int tegra_sor_enable_regulator(struct tegra_sor *sor, struct regulator *reg) +{ + int err; + + err = regulator_enable(reg); + if (err) + return err; + + return devm_add_action_or_reset(sor->dev, tegra_sor_disable_regulator, reg); +} + static int tegra_sor_hdmi_probe(struct tegra_sor *sor) { int err; @@ -2953,7 +2970,7 @@ static int tegra_sor_hdmi_probe(struct tegra_sor *sor) return PTR_ERR(sor->avdd_io_supply); } - err = regulator_enable(sor->avdd_io_supply); + err = tegra_sor_enable_regulator(sor, sor->avdd_io_supply); if (err < 0) { dev_err(sor->dev, "failed to enable AVDD I/O supply: %d\n", err); @@ -2967,7 +2984,7 @@ static int tegra_sor_hdmi_probe(struct tegra_sor *sor) return PTR_ERR(sor->vdd_pll_supply); } - err = regulator_enable(sor->vdd_pll_supply); + err = tegra_sor_enable_regulator(sor, sor->vdd_pll_supply); if (err < 0) { dev_err(sor->dev, "failed to enable VDD PLL supply: %d\n", err); @@ -2981,7 +2998,7 @@ static int tegra_sor_hdmi_probe(struct tegra_sor *sor) return PTR_ERR(sor->hdmi_supply); } - err = regulator_enable(sor->hdmi_supply); + err = tegra_sor_enable_regulator(sor, sor->hdmi_supply); if (err < 0) { dev_err(sor->dev, "failed to enable HDMI supply: %d\n", err); return err; @@ -2992,19 +3009,9 @@ static int tegra_sor_hdmi_probe(struct tegra_sor *sor) return 0; } -static int tegra_sor_hdmi_remove(struct tegra_sor *sor) -{ - regulator_disable(sor->hdmi_supply); - regulator_disable(sor->vdd_pll_supply); - regulator_disable(sor->avdd_io_supply); - - return 0; -} - static const struct tegra_sor_ops tegra_sor_hdmi_ops = { .name = "HDMI", .probe = tegra_sor_hdmi_probe, - .remove = tegra_sor_hdmi_remove, .audio_enable = tegra_sor_hdmi_audio_enable, .audio_disable = tegra_sor_hdmi_audio_disable, }; @@ -3017,7 +3024,7 @@ static int tegra_sor_dp_probe(struct tegra_sor *sor) if (IS_ERR(sor->avdd_io_supply)) return PTR_ERR(sor->avdd_io_supply); - err = regulator_enable(sor->avdd_io_supply); + err = tegra_sor_enable_regulator(sor, sor->avdd_io_supply); if (err < 0) return err; @@ -3025,25 +3032,16 @@ static int tegra_sor_dp_probe(struct tegra_sor *sor) if (IS_ERR(sor->vdd_pll_supply)) return PTR_ERR(sor->vdd_pll_supply); - err = regulator_enable(sor->vdd_pll_supply); + err = tegra_sor_enable_regulator(sor, sor->vdd_pll_supply); if (err < 0) return err; return 0; } -static int tegra_sor_dp_remove(struct tegra_sor *sor) -{ - regulator_disable(sor->vdd_pll_supply); - regulator_disable(sor->avdd_io_supply); - - return 0; -} - static const struct tegra_sor_ops tegra_sor_dp_ops = { .name = "DP", .probe = tegra_sor_dp_probe, - .remove = tegra_sor_dp_remove, }; static int tegra_sor_init(struct host1x_client *client) @@ -3773,7 +3771,7 @@ static int tegra_sor_probe(struct platform_device *pdev) if (err < 0) { dev_err(&pdev->dev, "failed to probe %s: %d\n", sor->ops->name, err); - goto output; + goto remove; } } @@ -3954,9 +3952,6 @@ unregister: rpm_disable: pm_runtime_disable(&pdev->dev); remove: - if (sor->ops && sor->ops->remove) - sor->ops->remove(sor); -output: tegra_output_remove(&sor->output); return err; } @@ -3975,12 +3970,6 @@ static int tegra_sor_remove(struct platform_device *pdev) pm_runtime_disable(&pdev->dev); - if (sor->ops && sor->ops->remove) { - err = sor->ops->remove(sor); - if (err < 0) - dev_err(&pdev->dev, "failed to remove SOR: %d\n", err); - } - tegra_output_remove(&sor->output); return 0; From 41f71629b4c432f8dd47d70ace813be5f79d4d75 Mon Sep 17 00:00:00 2001 From: Deepak R Varma Date: Thu, 5 Nov 2020 23:29:28 +0530 Subject: [PATCH 013/296] drm/tegra: replace idr_init() by idr_init_base() idr_init() uses base 0 which is an invalid identifier for this driver. The new function idr_init_base allows IDR to set the ID lookup from base 1. This avoids all lookups that otherwise starts from 0 since 0 is always unused. References: commit 6ce711f27500 ("idr: Make 1-based IDRs more efficient") Signed-off-by: Deepak R Varma Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/drm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index ba9d1c3e7cac..e4baf07992a4 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -90,7 +90,7 @@ static int tegra_drm_open(struct drm_device *drm, struct drm_file *filp) if (!fpriv) return -ENOMEM; - idr_init(&fpriv->contexts); + idr_init_base(&fpriv->contexts, 1); mutex_init(&fpriv->lock); filp->driver_priv = fpriv; From 123f01a0c989905a1cef6c1397a022eb321474d8 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 13 Nov 2020 21:38:35 +0100 Subject: [PATCH 014/296] drm/tegra: output: Do not put OF node twice The original patch for commit 3d2e7aec7013 ("drm/tegra: output: Don't leak OF node on error") contained this hunk, but it was accidentally dropped during conflict resolution. This causes use-after-free errors on devices that use an I2C controller for HDMI DDC/CI on Tegra210 and later. Fixes: 3d2e7aec7013 ("drm/tegra: output: Don't leak OF node on error") Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/output.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/tegra/output.c b/drivers/gpu/drm/tegra/output.c index 5a4fd0dbf4cf..47d26b5d9945 100644 --- a/drivers/gpu/drm/tegra/output.c +++ b/drivers/gpu/drm/tegra/output.c @@ -129,7 +129,6 @@ int tegra_output_probe(struct tegra_output *output) if (!output->ddc) { err = -EPROBE_DEFER; - of_node_put(ddc); return err; } } From 4bbf439b09c5ac3f8b3e9584fe080375d8d0ad2d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 12 Nov 2020 14:40:37 -0500 Subject: [PATCH 015/296] fix return values of seq_read_iter() Unlike ->read(), ->read_iter() instances *must* return the amount of data they'd left in iterator. For ->read() returning less than it has actually copied is a QoI issue; read(fd, unmapped_page - 5, 8) is allowed to fill all 5 bytes of destination and return 4; it's not nice to caller, but POSIX allows pretty much anything in such situation, up to and including a SIGSEGV. generic_file_splice_read() uses pipe-backed iterator as destination; there a short copy comes from pipe being full, not from running into an un{mapped,writable} page in the middle of destination as we have for iovec-backed iterators read(2) uses. And there we rely upon the ->read_iter() reporting the actual amount it has left in destination. Conversion of a ->read() instance into ->read_iter() has to watch out for that. If you really need an "all or nothing" kind of behaviour somewhere, you need to do iov_iter_revert() to prune the partial copy. In case of seq_read_iter() we can handle short copy just fine; the data is in m->buf and next call will fetch it from there. Fixes: d4d50710a8b4 (seq_file: add seq_read_iter) Tested-by: Nathan Chancellor Signed-off-by: Al Viro --- fs/seq_file.c | 57 ++++++++++++++++++++++++--------------------------- 1 file changed, 27 insertions(+), 30 deletions(-) diff --git a/fs/seq_file.c b/fs/seq_file.c index 3b20e21604e7..03a369ccd28c 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -168,12 +168,14 @@ EXPORT_SYMBOL(seq_read); ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter) { struct seq_file *m = iocb->ki_filp->private_data; - size_t size = iov_iter_count(iter); size_t copied = 0; size_t n; void *p; int err = 0; + if (!iov_iter_count(iter)) + return 0; + mutex_lock(&m->lock); /* @@ -206,36 +208,34 @@ ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter) if (!m->buf) goto Enomem; } - /* if not empty - flush it first */ + // something left in the buffer - copy it out first if (m->count) { - n = min(m->count, size); - if (copy_to_iter(m->buf + m->from, n, iter) != n) - goto Efault; + n = copy_to_iter(m->buf + m->from, m->count, iter); m->count -= n; m->from += n; - size -= n; copied += n; - if (!size) + if (m->count) // hadn't managed to copy everything goto Done; } - /* we need at least one record in buffer */ + // get a non-empty record in the buffer m->from = 0; p = m->op->start(m, &m->index); while (1) { err = PTR_ERR(p); - if (!p || IS_ERR(p)) + if (!p || IS_ERR(p)) // EOF or an error break; err = m->op->show(m, p); - if (err < 0) + if (err < 0) // hard error break; - if (unlikely(err)) + if (unlikely(err)) // ->show() says "skip it" m->count = 0; - if (unlikely(!m->count)) { + if (unlikely(!m->count)) { // empty record p = m->op->next(m, p, &m->index); continue; } - if (m->count < m->size) + if (!seq_has_overflowed(m)) // got it goto Fill; + // need a bigger buffer m->op->stop(m, p); kvfree(m->buf); m->count = 0; @@ -244,11 +244,14 @@ ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter) goto Enomem; p = m->op->start(m, &m->index); } + // EOF or an error m->op->stop(m, p); m->count = 0; goto Done; Fill: - /* they want more? let's try to get some more */ + // one non-empty record is in the buffer; if they want more, + // try to fit more in, but in any case we need to advance + // the iterator once for every record shown. while (1) { size_t offs = m->count; loff_t pos = m->index; @@ -259,30 +262,27 @@ Fill: m->op->next); m->index++; } - if (!p || IS_ERR(p)) { - err = PTR_ERR(p); + if (!p || IS_ERR(p)) // no next record for us break; - } - if (m->count >= size) + if (m->count >= iov_iter_count(iter)) break; err = m->op->show(m, p); - if (seq_has_overflowed(m) || err) { + if (err > 0) { // ->show() says "skip it" m->count = offs; - if (likely(err <= 0)) - break; + } else if (err || seq_has_overflowed(m)) { + m->count = offs; + break; } } m->op->stop(m, p); - n = min(m->count, size); - if (copy_to_iter(m->buf, n, iter) != n) - goto Efault; + n = copy_to_iter(m->buf, m->count, iter); copied += n; m->count -= n; m->from = n; Done: - if (!copied) - copied = err; - else { + if (unlikely(!copied)) { + copied = m->count ? -EFAULT : err; + } else { iocb->ki_pos += copied; m->read_pos += copied; } @@ -291,9 +291,6 @@ Done: Enomem: err = -ENOMEM; goto Done; -Efault: - err = -EFAULT; - goto Done; } EXPORT_SYMBOL(seq_read_iter); From 59024c936561090d766370e839a880dd42232c33 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 10 Nov 2020 21:59:23 +0200 Subject: [PATCH 016/296] pinctrl: jasperlake: Unhide SPI group of pins If the group of pins is hidden in the pin list it affects the register offset calculation despite fixed GPIO base. Hence, the offsets of all pins after the hidden group are broken. Instead we have to unhide the group and use a flag to exclude it from GPIO number space. Fixes: e278dcb7048b ("pinctrl: intel: Add Intel Jasper Lake pin controller support") Reported-by: Divagar Mohandass Signed-off-by: Andy Shevchenko Acked-by: Mika Westerberg --- drivers/pinctrl/intel/pinctrl-jasperlake.c | 450 +++++++++++---------- 1 file changed, 230 insertions(+), 220 deletions(-) diff --git a/drivers/pinctrl/intel/pinctrl-jasperlake.c b/drivers/pinctrl/intel/pinctrl-jasperlake.c index 9bd0e8e6310c..c5e204c8da9c 100644 --- a/drivers/pinctrl/intel/pinctrl-jasperlake.c +++ b/drivers/pinctrl/intel/pinctrl-jasperlake.c @@ -65,252 +65,263 @@ static const struct pinctrl_pin_desc jsl_pins[] = { PINCTRL_PIN(17, "EMMC_CLK"), PINCTRL_PIN(18, "EMMC_RESETB"), PINCTRL_PIN(19, "A4WP_PRESENT"), + /* SPI */ + PINCTRL_PIN(20, "SPI0_IO_2"), + PINCTRL_PIN(21, "SPI0_IO_3"), + PINCTRL_PIN(22, "SPI0_MOSI_IO_0"), + PINCTRL_PIN(23, "SPI0_MISO_IO_1"), + PINCTRL_PIN(24, "SPI0_TPM_CSB"), + PINCTRL_PIN(25, "SPI0_FLASH_0_CSB"), + PINCTRL_PIN(26, "SPI0_FLASH_1_CSB"), + PINCTRL_PIN(27, "SPI0_CLK"), + PINCTRL_PIN(28, "SPI0_CLK_LOOPBK"), /* GPP_B */ - PINCTRL_PIN(20, "CORE_VID_0"), - PINCTRL_PIN(21, "CORE_VID_1"), - PINCTRL_PIN(22, "VRALERTB"), - PINCTRL_PIN(23, "CPU_GP_2"), - PINCTRL_PIN(24, "CPU_GP_3"), - PINCTRL_PIN(25, "SRCCLKREQB_0"), - PINCTRL_PIN(26, "SRCCLKREQB_1"), - PINCTRL_PIN(27, "SRCCLKREQB_2"), - PINCTRL_PIN(28, "SRCCLKREQB_3"), - PINCTRL_PIN(29, "SRCCLKREQB_4"), - PINCTRL_PIN(30, "SRCCLKREQB_5"), - PINCTRL_PIN(31, "PMCALERTB"), - PINCTRL_PIN(32, "SLP_S0B"), - PINCTRL_PIN(33, "PLTRSTB"), - PINCTRL_PIN(34, "SPKR"), - PINCTRL_PIN(35, "GSPI0_CS0B"), - PINCTRL_PIN(36, "GSPI0_CLK"), - PINCTRL_PIN(37, "GSPI0_MISO"), - PINCTRL_PIN(38, "GSPI0_MOSI"), - PINCTRL_PIN(39, "GSPI1_CS0B"), - PINCTRL_PIN(40, "GSPI1_CLK"), - PINCTRL_PIN(41, "GSPI1_MISO"), - PINCTRL_PIN(42, "GSPI1_MOSI"), - PINCTRL_PIN(43, "DDSP_HPD_A"), - PINCTRL_PIN(44, "GSPI0_CLK_LOOPBK"), - PINCTRL_PIN(45, "GSPI1_CLK_LOOPBK"), + PINCTRL_PIN(29, "CORE_VID_0"), + PINCTRL_PIN(30, "CORE_VID_1"), + PINCTRL_PIN(31, "VRALERTB"), + PINCTRL_PIN(32, "CPU_GP_2"), + PINCTRL_PIN(33, "CPU_GP_3"), + PINCTRL_PIN(34, "SRCCLKREQB_0"), + PINCTRL_PIN(35, "SRCCLKREQB_1"), + PINCTRL_PIN(36, "SRCCLKREQB_2"), + PINCTRL_PIN(37, "SRCCLKREQB_3"), + PINCTRL_PIN(38, "SRCCLKREQB_4"), + PINCTRL_PIN(39, "SRCCLKREQB_5"), + PINCTRL_PIN(40, "PMCALERTB"), + PINCTRL_PIN(41, "SLP_S0B"), + PINCTRL_PIN(42, "PLTRSTB"), + PINCTRL_PIN(43, "SPKR"), + PINCTRL_PIN(44, "GSPI0_CS0B"), + PINCTRL_PIN(45, "GSPI0_CLK"), + PINCTRL_PIN(46, "GSPI0_MISO"), + PINCTRL_PIN(47, "GSPI0_MOSI"), + PINCTRL_PIN(48, "GSPI1_CS0B"), + PINCTRL_PIN(49, "GSPI1_CLK"), + PINCTRL_PIN(50, "GSPI1_MISO"), + PINCTRL_PIN(51, "GSPI1_MOSI"), + PINCTRL_PIN(52, "DDSP_HPD_A"), + PINCTRL_PIN(53, "GSPI0_CLK_LOOPBK"), + PINCTRL_PIN(54, "GSPI1_CLK_LOOPBK"), /* GPP_A */ - PINCTRL_PIN(46, "ESPI_IO_0"), - PINCTRL_PIN(47, "ESPI_IO_1"), - PINCTRL_PIN(48, "ESPI_IO_2"), - PINCTRL_PIN(49, "ESPI_IO_3"), - PINCTRL_PIN(50, "ESPI_CSB"), - PINCTRL_PIN(51, "ESPI_CLK"), - PINCTRL_PIN(52, "ESPI_RESETB"), - PINCTRL_PIN(53, "SMBCLK"), - PINCTRL_PIN(54, "SMBDATA"), - PINCTRL_PIN(55, "SMBALERTB"), - PINCTRL_PIN(56, "CPU_GP_0"), - PINCTRL_PIN(57, "CPU_GP_1"), - PINCTRL_PIN(58, "USB2_OCB_1"), - PINCTRL_PIN(59, "USB2_OCB_2"), - PINCTRL_PIN(60, "USB2_OCB_3"), - PINCTRL_PIN(61, "DDSP_HPD_A_TIME_SYNC_0"), - PINCTRL_PIN(62, "DDSP_HPD_B"), - PINCTRL_PIN(63, "DDSP_HPD_C"), - PINCTRL_PIN(64, "USB2_OCB_0"), - PINCTRL_PIN(65, "PCHHOTB"), - PINCTRL_PIN(66, "ESPI_CLK_LOOPBK"), + PINCTRL_PIN(55, "ESPI_IO_0"), + PINCTRL_PIN(56, "ESPI_IO_1"), + PINCTRL_PIN(57, "ESPI_IO_2"), + PINCTRL_PIN(58, "ESPI_IO_3"), + PINCTRL_PIN(59, "ESPI_CSB"), + PINCTRL_PIN(60, "ESPI_CLK"), + PINCTRL_PIN(61, "ESPI_RESETB"), + PINCTRL_PIN(62, "SMBCLK"), + PINCTRL_PIN(63, "SMBDATA"), + PINCTRL_PIN(64, "SMBALERTB"), + PINCTRL_PIN(65, "CPU_GP_0"), + PINCTRL_PIN(66, "CPU_GP_1"), + PINCTRL_PIN(67, "USB2_OCB_1"), + PINCTRL_PIN(68, "USB2_OCB_2"), + PINCTRL_PIN(69, "USB2_OCB_3"), + PINCTRL_PIN(70, "DDSP_HPD_A_TIME_SYNC_0"), + PINCTRL_PIN(71, "DDSP_HPD_B"), + PINCTRL_PIN(72, "DDSP_HPD_C"), + PINCTRL_PIN(73, "USB2_OCB_0"), + PINCTRL_PIN(74, "PCHHOTB"), + PINCTRL_PIN(75, "ESPI_CLK_LOOPBK"), /* GPP_S */ - PINCTRL_PIN(67, "SNDW1_CLK"), - PINCTRL_PIN(68, "SNDW1_DATA"), - PINCTRL_PIN(69, "SNDW2_CLK"), - PINCTRL_PIN(70, "SNDW2_DATA"), - PINCTRL_PIN(71, "SNDW1_CLK"), - PINCTRL_PIN(72, "SNDW1_DATA"), - PINCTRL_PIN(73, "SNDW4_CLK_DMIC_CLK_0"), - PINCTRL_PIN(74, "SNDW4_DATA_DMIC_DATA_0"), + PINCTRL_PIN(76, "SNDW1_CLK"), + PINCTRL_PIN(77, "SNDW1_DATA"), + PINCTRL_PIN(78, "SNDW2_CLK"), + PINCTRL_PIN(79, "SNDW2_DATA"), + PINCTRL_PIN(80, "SNDW1_CLK"), + PINCTRL_PIN(81, "SNDW1_DATA"), + PINCTRL_PIN(82, "SNDW4_CLK_DMIC_CLK_0"), + PINCTRL_PIN(83, "SNDW4_DATA_DMIC_DATA_0"), /* GPP_R */ - PINCTRL_PIN(75, "HDA_BCLK"), - PINCTRL_PIN(76, "HDA_SYNC"), - PINCTRL_PIN(77, "HDA_SDO"), - PINCTRL_PIN(78, "HDA_SDI_0"), - PINCTRL_PIN(79, "HDA_RSTB"), - PINCTRL_PIN(80, "HDA_SDI_1"), - PINCTRL_PIN(81, "I2S1_SFRM"), - PINCTRL_PIN(82, "I2S1_TXD"), + PINCTRL_PIN(84, "HDA_BCLK"), + PINCTRL_PIN(85, "HDA_SYNC"), + PINCTRL_PIN(86, "HDA_SDO"), + PINCTRL_PIN(87, "HDA_SDI_0"), + PINCTRL_PIN(88, "HDA_RSTB"), + PINCTRL_PIN(89, "HDA_SDI_1"), + PINCTRL_PIN(90, "I2S1_SFRM"), + PINCTRL_PIN(91, "I2S1_TXD"), /* GPP_H */ - PINCTRL_PIN(83, "GPPC_H_0"), - PINCTRL_PIN(84, "SD_PWR_EN_B"), - PINCTRL_PIN(85, "MODEM_CLKREQ"), - PINCTRL_PIN(86, "SX_EXIT_HOLDOFFB"), - PINCTRL_PIN(87, "I2C2_SDA"), - PINCTRL_PIN(88, "I2C2_SCL"), - PINCTRL_PIN(89, "I2C3_SDA"), - PINCTRL_PIN(90, "I2C3_SCL"), - PINCTRL_PIN(91, "I2C4_SDA"), - PINCTRL_PIN(92, "I2C4_SCL"), - PINCTRL_PIN(93, "CPU_VCCIO_PWR_GATEB"), - PINCTRL_PIN(94, "I2S2_SCLK"), - PINCTRL_PIN(95, "I2S2_SFRM"), - PINCTRL_PIN(96, "I2S2_TXD"), - PINCTRL_PIN(97, "I2S2_RXD"), - PINCTRL_PIN(98, "I2S1_SCLK"), - PINCTRL_PIN(99, "GPPC_H_16"), - PINCTRL_PIN(100, "GPPC_H_17"), - PINCTRL_PIN(101, "GPPC_H_18"), - PINCTRL_PIN(102, "GPPC_H_19"), - PINCTRL_PIN(103, "GPPC_H_20"), - PINCTRL_PIN(104, "GPPC_H_21"), - PINCTRL_PIN(105, "GPPC_H_22"), - PINCTRL_PIN(106, "GPPC_H_23"), + PINCTRL_PIN(92, "GPPC_H_0"), + PINCTRL_PIN(93, "SD_PWR_EN_B"), + PINCTRL_PIN(94, "MODEM_CLKREQ"), + PINCTRL_PIN(95, "SX_EXIT_HOLDOFFB"), + PINCTRL_PIN(96, "I2C2_SDA"), + PINCTRL_PIN(97, "I2C2_SCL"), + PINCTRL_PIN(98, "I2C3_SDA"), + PINCTRL_PIN(99, "I2C3_SCL"), + PINCTRL_PIN(100, "I2C4_SDA"), + PINCTRL_PIN(101, "I2C4_SCL"), + PINCTRL_PIN(102, "CPU_VCCIO_PWR_GATEB"), + PINCTRL_PIN(103, "I2S2_SCLK"), + PINCTRL_PIN(104, "I2S2_SFRM"), + PINCTRL_PIN(105, "I2S2_TXD"), + PINCTRL_PIN(106, "I2S2_RXD"), + PINCTRL_PIN(107, "I2S1_SCLK"), + PINCTRL_PIN(108, "GPPC_H_16"), + PINCTRL_PIN(109, "GPPC_H_17"), + PINCTRL_PIN(110, "GPPC_H_18"), + PINCTRL_PIN(111, "GPPC_H_19"), + PINCTRL_PIN(112, "GPPC_H_20"), + PINCTRL_PIN(113, "GPPC_H_21"), + PINCTRL_PIN(114, "GPPC_H_22"), + PINCTRL_PIN(115, "GPPC_H_23"), /* GPP_D */ - PINCTRL_PIN(107, "SPI1_CSB"), - PINCTRL_PIN(108, "SPI1_CLK"), - PINCTRL_PIN(109, "SPI1_MISO_IO_1"), - PINCTRL_PIN(110, "SPI1_MOSI_IO_0"), - PINCTRL_PIN(111, "ISH_I2C0_SDA"), - PINCTRL_PIN(112, "ISH_I2C0_SCL"), - PINCTRL_PIN(113, "ISH_I2C1_SDA"), - PINCTRL_PIN(114, "ISH_I2C1_SCL"), - PINCTRL_PIN(115, "ISH_SPI_CSB"), - PINCTRL_PIN(116, "ISH_SPI_CLK"), - PINCTRL_PIN(117, "ISH_SPI_MISO"), - PINCTRL_PIN(118, "ISH_SPI_MOSI"), - PINCTRL_PIN(119, "ISH_UART0_RXD"), - PINCTRL_PIN(120, "ISH_UART0_TXD"), - PINCTRL_PIN(121, "ISH_UART0_RTSB"), - PINCTRL_PIN(122, "ISH_UART0_CTSB"), - PINCTRL_PIN(123, "SPI1_IO_2"), - PINCTRL_PIN(124, "SPI1_IO_3"), - PINCTRL_PIN(125, "I2S_MCLK"), - PINCTRL_PIN(126, "CNV_MFUART2_RXD"), - PINCTRL_PIN(127, "CNV_MFUART2_TXD"), - PINCTRL_PIN(128, "CNV_PA_BLANKING"), - PINCTRL_PIN(129, "I2C5_SDA"), - PINCTRL_PIN(130, "I2C5_SCL"), - PINCTRL_PIN(131, "GSPI2_CLK_LOOPBK"), - PINCTRL_PIN(132, "SPI1_CLK_LOOPBK"), + PINCTRL_PIN(116, "SPI1_CSB"), + PINCTRL_PIN(117, "SPI1_CLK"), + PINCTRL_PIN(118, "SPI1_MISO_IO_1"), + PINCTRL_PIN(119, "SPI1_MOSI_IO_0"), + PINCTRL_PIN(120, "ISH_I2C0_SDA"), + PINCTRL_PIN(121, "ISH_I2C0_SCL"), + PINCTRL_PIN(122, "ISH_I2C1_SDA"), + PINCTRL_PIN(123, "ISH_I2C1_SCL"), + PINCTRL_PIN(124, "ISH_SPI_CSB"), + PINCTRL_PIN(125, "ISH_SPI_CLK"), + PINCTRL_PIN(126, "ISH_SPI_MISO"), + PINCTRL_PIN(127, "ISH_SPI_MOSI"), + PINCTRL_PIN(128, "ISH_UART0_RXD"), + PINCTRL_PIN(129, "ISH_UART0_TXD"), + PINCTRL_PIN(130, "ISH_UART0_RTSB"), + PINCTRL_PIN(131, "ISH_UART0_CTSB"), + PINCTRL_PIN(132, "SPI1_IO_2"), + PINCTRL_PIN(133, "SPI1_IO_3"), + PINCTRL_PIN(134, "I2S_MCLK"), + PINCTRL_PIN(135, "CNV_MFUART2_RXD"), + PINCTRL_PIN(136, "CNV_MFUART2_TXD"), + PINCTRL_PIN(137, "CNV_PA_BLANKING"), + PINCTRL_PIN(138, "I2C5_SDA"), + PINCTRL_PIN(139, "I2C5_SCL"), + PINCTRL_PIN(140, "GSPI2_CLK_LOOPBK"), + PINCTRL_PIN(141, "SPI1_CLK_LOOPBK"), /* vGPIO */ - PINCTRL_PIN(133, "CNV_BTEN"), - PINCTRL_PIN(134, "CNV_WCEN"), - PINCTRL_PIN(135, "CNV_BT_HOST_WAKEB"), - PINCTRL_PIN(136, "CNV_BT_IF_SELECT"), - PINCTRL_PIN(137, "vCNV_BT_UART_TXD"), - PINCTRL_PIN(138, "vCNV_BT_UART_RXD"), - PINCTRL_PIN(139, "vCNV_BT_UART_CTS_B"), - PINCTRL_PIN(140, "vCNV_BT_UART_RTS_B"), - PINCTRL_PIN(141, "vCNV_MFUART1_TXD"), - PINCTRL_PIN(142, "vCNV_MFUART1_RXD"), - PINCTRL_PIN(143, "vCNV_MFUART1_CTS_B"), - PINCTRL_PIN(144, "vCNV_MFUART1_RTS_B"), - PINCTRL_PIN(145, "vUART0_TXD"), - PINCTRL_PIN(146, "vUART0_RXD"), - PINCTRL_PIN(147, "vUART0_CTS_B"), - PINCTRL_PIN(148, "vUART0_RTS_B"), - PINCTRL_PIN(149, "vISH_UART0_TXD"), - PINCTRL_PIN(150, "vISH_UART0_RXD"), - PINCTRL_PIN(151, "vISH_UART0_CTS_B"), - PINCTRL_PIN(152, "vISH_UART0_RTS_B"), - PINCTRL_PIN(153, "vCNV_BT_I2S_BCLK"), - PINCTRL_PIN(154, "vCNV_BT_I2S_WS_SYNC"), - PINCTRL_PIN(155, "vCNV_BT_I2S_SDO"), - PINCTRL_PIN(156, "vCNV_BT_I2S_SDI"), - PINCTRL_PIN(157, "vI2S2_SCLK"), - PINCTRL_PIN(158, "vI2S2_SFRM"), - PINCTRL_PIN(159, "vI2S2_TXD"), - PINCTRL_PIN(160, "vI2S2_RXD"), - PINCTRL_PIN(161, "vSD3_CD_B"), + PINCTRL_PIN(142, "CNV_BTEN"), + PINCTRL_PIN(143, "CNV_WCEN"), + PINCTRL_PIN(144, "CNV_BT_HOST_WAKEB"), + PINCTRL_PIN(145, "CNV_BT_IF_SELECT"), + PINCTRL_PIN(146, "vCNV_BT_UART_TXD"), + PINCTRL_PIN(147, "vCNV_BT_UART_RXD"), + PINCTRL_PIN(148, "vCNV_BT_UART_CTS_B"), + PINCTRL_PIN(149, "vCNV_BT_UART_RTS_B"), + PINCTRL_PIN(150, "vCNV_MFUART1_TXD"), + PINCTRL_PIN(151, "vCNV_MFUART1_RXD"), + PINCTRL_PIN(152, "vCNV_MFUART1_CTS_B"), + PINCTRL_PIN(153, "vCNV_MFUART1_RTS_B"), + PINCTRL_PIN(154, "vUART0_TXD"), + PINCTRL_PIN(155, "vUART0_RXD"), + PINCTRL_PIN(156, "vUART0_CTS_B"), + PINCTRL_PIN(157, "vUART0_RTS_B"), + PINCTRL_PIN(158, "vISH_UART0_TXD"), + PINCTRL_PIN(159, "vISH_UART0_RXD"), + PINCTRL_PIN(160, "vISH_UART0_CTS_B"), + PINCTRL_PIN(161, "vISH_UART0_RTS_B"), + PINCTRL_PIN(162, "vCNV_BT_I2S_BCLK"), + PINCTRL_PIN(163, "vCNV_BT_I2S_WS_SYNC"), + PINCTRL_PIN(164, "vCNV_BT_I2S_SDO"), + PINCTRL_PIN(165, "vCNV_BT_I2S_SDI"), + PINCTRL_PIN(166, "vI2S2_SCLK"), + PINCTRL_PIN(167, "vI2S2_SFRM"), + PINCTRL_PIN(168, "vI2S2_TXD"), + PINCTRL_PIN(169, "vI2S2_RXD"), + PINCTRL_PIN(170, "vSD3_CD_B"), /* GPP_C */ - PINCTRL_PIN(162, "GPPC_C_0"), - PINCTRL_PIN(163, "GPPC_C_1"), - PINCTRL_PIN(164, "GPPC_C_2"), - PINCTRL_PIN(165, "GPPC_C_3"), - PINCTRL_PIN(166, "GPPC_C_4"), - PINCTRL_PIN(167, "GPPC_C_5"), - PINCTRL_PIN(168, "SUSWARNB_SUSPWRDNACK"), - PINCTRL_PIN(169, "SUSACKB"), - PINCTRL_PIN(170, "UART0_RXD"), - PINCTRL_PIN(171, "UART0_TXD"), - PINCTRL_PIN(172, "UART0_RTSB"), - PINCTRL_PIN(173, "UART0_CTSB"), - PINCTRL_PIN(174, "UART1_RXD"), - PINCTRL_PIN(175, "UART1_TXD"), - PINCTRL_PIN(176, "UART1_RTSB"), - PINCTRL_PIN(177, "UART1_CTSB"), - PINCTRL_PIN(178, "I2C0_SDA"), - PINCTRL_PIN(179, "I2C0_SCL"), - PINCTRL_PIN(180, "I2C1_SDA"), - PINCTRL_PIN(181, "I2C1_SCL"), - PINCTRL_PIN(182, "UART2_RXD"), - PINCTRL_PIN(183, "UART2_TXD"), - PINCTRL_PIN(184, "UART2_RTSB"), - PINCTRL_PIN(185, "UART2_CTSB"), + PINCTRL_PIN(171, "GPPC_C_0"), + PINCTRL_PIN(172, "GPPC_C_1"), + PINCTRL_PIN(173, "GPPC_C_2"), + PINCTRL_PIN(174, "GPPC_C_3"), + PINCTRL_PIN(175, "GPPC_C_4"), + PINCTRL_PIN(176, "GPPC_C_5"), + PINCTRL_PIN(177, "SUSWARNB_SUSPWRDNACK"), + PINCTRL_PIN(178, "SUSACKB"), + PINCTRL_PIN(179, "UART0_RXD"), + PINCTRL_PIN(180, "UART0_TXD"), + PINCTRL_PIN(181, "UART0_RTSB"), + PINCTRL_PIN(182, "UART0_CTSB"), + PINCTRL_PIN(183, "UART1_RXD"), + PINCTRL_PIN(184, "UART1_TXD"), + PINCTRL_PIN(185, "UART1_RTSB"), + PINCTRL_PIN(186, "UART1_CTSB"), + PINCTRL_PIN(187, "I2C0_SDA"), + PINCTRL_PIN(188, "I2C0_SCL"), + PINCTRL_PIN(189, "I2C1_SDA"), + PINCTRL_PIN(190, "I2C1_SCL"), + PINCTRL_PIN(191, "UART2_RXD"), + PINCTRL_PIN(192, "UART2_TXD"), + PINCTRL_PIN(193, "UART2_RTSB"), + PINCTRL_PIN(194, "UART2_CTSB"), /* HVCMOS */ - PINCTRL_PIN(186, "L_BKLTEN"), - PINCTRL_PIN(187, "L_BKLTCTL"), - PINCTRL_PIN(188, "L_VDDEN"), - PINCTRL_PIN(189, "SYS_PWROK"), - PINCTRL_PIN(190, "SYS_RESETB"), - PINCTRL_PIN(191, "MLK_RSTB"), + PINCTRL_PIN(195, "L_BKLTEN"), + PINCTRL_PIN(196, "L_BKLTCTL"), + PINCTRL_PIN(197, "L_VDDEN"), + PINCTRL_PIN(198, "SYS_PWROK"), + PINCTRL_PIN(199, "SYS_RESETB"), + PINCTRL_PIN(200, "MLK_RSTB"), /* GPP_E */ - PINCTRL_PIN(192, "ISH_GP_0"), - PINCTRL_PIN(193, "ISH_GP_1"), - PINCTRL_PIN(194, "IMGCLKOUT_1"), - PINCTRL_PIN(195, "ISH_GP_2"), - PINCTRL_PIN(196, "IMGCLKOUT_2"), - PINCTRL_PIN(197, "SATA_LEDB"), - PINCTRL_PIN(198, "IMGCLKOUT_3"), - PINCTRL_PIN(199, "ISH_GP_3"), - PINCTRL_PIN(200, "ISH_GP_4"), - PINCTRL_PIN(201, "ISH_GP_5"), - PINCTRL_PIN(202, "ISH_GP_6"), - PINCTRL_PIN(203, "ISH_GP_7"), - PINCTRL_PIN(204, "IMGCLKOUT_4"), - PINCTRL_PIN(205, "DDPA_CTRLCLK"), - PINCTRL_PIN(206, "DDPA_CTRLDATA"), - PINCTRL_PIN(207, "DDPB_CTRLCLK"), - PINCTRL_PIN(208, "DDPB_CTRLDATA"), - PINCTRL_PIN(209, "DDPC_CTRLCLK"), - PINCTRL_PIN(210, "DDPC_CTRLDATA"), - PINCTRL_PIN(211, "IMGCLKOUT_5"), - PINCTRL_PIN(212, "CNV_BRI_DT"), - PINCTRL_PIN(213, "CNV_BRI_RSP"), - PINCTRL_PIN(214, "CNV_RGI_DT"), - PINCTRL_PIN(215, "CNV_RGI_RSP"), + PINCTRL_PIN(201, "ISH_GP_0"), + PINCTRL_PIN(202, "ISH_GP_1"), + PINCTRL_PIN(203, "IMGCLKOUT_1"), + PINCTRL_PIN(204, "ISH_GP_2"), + PINCTRL_PIN(205, "IMGCLKOUT_2"), + PINCTRL_PIN(206, "SATA_LEDB"), + PINCTRL_PIN(207, "IMGCLKOUT_3"), + PINCTRL_PIN(208, "ISH_GP_3"), + PINCTRL_PIN(209, "ISH_GP_4"), + PINCTRL_PIN(210, "ISH_GP_5"), + PINCTRL_PIN(211, "ISH_GP_6"), + PINCTRL_PIN(212, "ISH_GP_7"), + PINCTRL_PIN(213, "IMGCLKOUT_4"), + PINCTRL_PIN(214, "DDPA_CTRLCLK"), + PINCTRL_PIN(215, "DDPA_CTRLDATA"), + PINCTRL_PIN(216, "DDPB_CTRLCLK"), + PINCTRL_PIN(217, "DDPB_CTRLDATA"), + PINCTRL_PIN(218, "DDPC_CTRLCLK"), + PINCTRL_PIN(219, "DDPC_CTRLDATA"), + PINCTRL_PIN(220, "IMGCLKOUT_5"), + PINCTRL_PIN(221, "CNV_BRI_DT"), + PINCTRL_PIN(222, "CNV_BRI_RSP"), + PINCTRL_PIN(223, "CNV_RGI_DT"), + PINCTRL_PIN(224, "CNV_RGI_RSP"), /* GPP_G */ - PINCTRL_PIN(216, "SD3_CMD"), - PINCTRL_PIN(217, "SD3_D0"), - PINCTRL_PIN(218, "SD3_D1"), - PINCTRL_PIN(219, "SD3_D2"), - PINCTRL_PIN(220, "SD3_D3"), - PINCTRL_PIN(221, "SD3_CDB"), - PINCTRL_PIN(222, "SD3_CLK"), - PINCTRL_PIN(223, "SD3_WP"), + PINCTRL_PIN(225, "SD3_CMD"), + PINCTRL_PIN(226, "SD3_D0"), + PINCTRL_PIN(227, "SD3_D1"), + PINCTRL_PIN(228, "SD3_D2"), + PINCTRL_PIN(229, "SD3_D3"), + PINCTRL_PIN(230, "SD3_CDB"), + PINCTRL_PIN(231, "SD3_CLK"), + PINCTRL_PIN(232, "SD3_WP"), }; static const struct intel_padgroup jsl_community0_gpps[] = { JSL_GPP(0, 0, 19, 320), /* GPP_F */ - JSL_GPP(1, 20, 45, 32), /* GPP_B */ - JSL_GPP(2, 46, 66, 64), /* GPP_A */ - JSL_GPP(3, 67, 74, 96), /* GPP_S */ - JSL_GPP(4, 75, 82, 128), /* GPP_R */ + JSL_GPP(1, 20, 28, INTEL_GPIO_BASE_NOMAP), /* SPI */ + JSL_GPP(2, 29, 54, 32), /* GPP_B */ + JSL_GPP(3, 55, 75, 64), /* GPP_A */ + JSL_GPP(4, 76, 83, 96), /* GPP_S */ + JSL_GPP(5, 84, 91, 128), /* GPP_R */ }; static const struct intel_padgroup jsl_community1_gpps[] = { - JSL_GPP(0, 83, 106, 160), /* GPP_H */ - JSL_GPP(1, 107, 132, 192), /* GPP_D */ - JSL_GPP(2, 133, 161, 224), /* vGPIO */ - JSL_GPP(3, 162, 185, 256), /* GPP_C */ + JSL_GPP(0, 92, 115, 160), /* GPP_H */ + JSL_GPP(1, 116, 141, 192), /* GPP_D */ + JSL_GPP(2, 142, 170, 224), /* vGPIO */ + JSL_GPP(3, 171, 194, 256), /* GPP_C */ }; static const struct intel_padgroup jsl_community4_gpps[] = { - JSL_GPP(0, 186, 191, INTEL_GPIO_BASE_NOMAP), /* HVCMOS */ - JSL_GPP(1, 192, 215, 288), /* GPP_E */ + JSL_GPP(0, 195, 200, INTEL_GPIO_BASE_NOMAP), /* HVCMOS */ + JSL_GPP(1, 201, 224, 288), /* GPP_E */ }; static const struct intel_padgroup jsl_community5_gpps[] = { - JSL_GPP(0, 216, 223, INTEL_GPIO_BASE_ZERO), /* GPP_G */ + JSL_GPP(0, 225, 232, INTEL_GPIO_BASE_ZERO), /* GPP_G */ }; static const struct intel_community jsl_communities[] = { - JSL_COMMUNITY(0, 0, 82, jsl_community0_gpps), - JSL_COMMUNITY(1, 83, 185, jsl_community1_gpps), - JSL_COMMUNITY(2, 186, 215, jsl_community4_gpps), - JSL_COMMUNITY(3, 216, 223, jsl_community5_gpps), + JSL_COMMUNITY(0, 0, 91, jsl_community0_gpps), + JSL_COMMUNITY(1, 92, 194, jsl_community1_gpps), + JSL_COMMUNITY(2, 195, 224, jsl_community4_gpps), + JSL_COMMUNITY(3, 225, 232, jsl_community5_gpps), }; static const struct intel_pinctrl_soc_data jsl_soc_data = { @@ -336,7 +347,6 @@ static struct platform_driver jsl_pinctrl_driver = { .pm = &jsl_pinctrl_pm_ops, }, }; - module_platform_driver(jsl_pinctrl_driver); MODULE_AUTHOR("Andy Shevchenko "); From cdd8fc2dd64e3f1b22a6636e242d0eff49c4ba22 Mon Sep 17 00:00:00 2001 From: Evan Green Date: Wed, 11 Nov 2020 15:17:28 -0800 Subject: [PATCH 017/296] pinctrl: jasperlake: Fix HOSTSW_OWN offset GPIOs that attempt to use interrupts get thwarted with a message like: "pin 161 cannot be used as IRQ" (for instance with SD_CD). This is because the HOSTSW_OWN offset is incorrect, so every GPIO looks like it's owned by ACPI. Fixes: e278dcb7048b1 ("pinctrl: intel: Add Intel Jasper Lake pin controller support") Cc: stable@vger.kernel.org Signed-off-by: Evan Green Acked-by: Mika Westerberg Signed-off-by: Andy Shevchenko --- drivers/pinctrl/intel/pinctrl-jasperlake.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/intel/pinctrl-jasperlake.c b/drivers/pinctrl/intel/pinctrl-jasperlake.c index c5e204c8da9c..ec435b7ab392 100644 --- a/drivers/pinctrl/intel/pinctrl-jasperlake.c +++ b/drivers/pinctrl/intel/pinctrl-jasperlake.c @@ -16,7 +16,7 @@ #define JSL_PAD_OWN 0x020 #define JSL_PADCFGLOCK 0x080 -#define JSL_HOSTSW_OWN 0x0b0 +#define JSL_HOSTSW_OWN 0x0c0 #define JSL_GPI_IS 0x100 #define JSL_GPI_IE 0x120 From 03a61f11c00213394e3c7ac62ae416d034dc728f Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 11 Nov 2020 14:06:05 +0200 Subject: [PATCH 018/296] pinctrl: merrifield: Set default bias in case no particular value given When GPIO library asks pin control to set the bias, it doesn't pass any value of it and argument is considered boolean (and this is true for ACPI GpioIo() / GpioInt() resources, by the way). Thus, individual drivers must behave well, when they got the resistance value of 1 Ohm, i.e. transforming it to sane default. In case of Intel Merrifield pin control hardware the 20 kOhm sounds plausible because it gives a good trade off between weakness and minimization of leakage current (will be only 50 uA with the above choice). Fixes: 4e80c8f50574 ("pinctrl: intel: Add Intel Merrifield pin controller support") Depends-on: 2956b5d94a76 ("pinctrl / gpio: Introduce .set_config() callback for GPIO chips") Signed-off-by: Andy Shevchenko Acked-by: Mika Westerberg --- drivers/pinctrl/intel/pinctrl-merrifield.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/pinctrl/intel/pinctrl-merrifield.c b/drivers/pinctrl/intel/pinctrl-merrifield.c index e4ff8da1b894..3ae141e0b421 100644 --- a/drivers/pinctrl/intel/pinctrl-merrifield.c +++ b/drivers/pinctrl/intel/pinctrl-merrifield.c @@ -745,6 +745,10 @@ static int mrfld_config_set_pin(struct mrfld_pinctrl *mp, unsigned int pin, mask |= BUFCFG_Px_EN_MASK | BUFCFG_PUPD_VAL_MASK; bits |= BUFCFG_PU_EN; + /* Set default strength value in case none is given */ + if (arg == 1) + arg = 20000; + switch (arg) { case 50000: bits |= BUFCFG_PUPD_VAL_50K << BUFCFG_PUPD_VAL_SHIFT; @@ -765,6 +769,10 @@ static int mrfld_config_set_pin(struct mrfld_pinctrl *mp, unsigned int pin, mask |= BUFCFG_Px_EN_MASK | BUFCFG_PUPD_VAL_MASK; bits |= BUFCFG_PD_EN; + /* Set default strength value in case none is given */ + if (arg == 1) + arg = 20000; + switch (arg) { case 50000: bits |= BUFCFG_PUPD_VAL_50K << BUFCFG_PUPD_VAL_SHIFT; From 5f714771d01e0e0d410f06d4d192fb27b1ca0edd Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 12 Nov 2020 21:03:01 +0200 Subject: [PATCH 019/296] pinctrl: baytrail: Avoid clearing debounce value when turning it off Baytrail pin control has a common register to set up debounce timeout. When a pin configuration requested debounce to be disabled, the rest of the pins may still want to have debounce enabled and thus rely on the common timeout value. Avoid clearing debounce value when turning it off for one pin while others may still use it. Fixes: 658b476c742f ("pinctrl: baytrail: Add debounce configuration") Depends-on: 04ff5a095d66 ("pinctrl: baytrail: Rectify debounce support") Depends-on: 827e1579e1d5 ("pinctrl: baytrail: Rectify debounce support (part 2)") Signed-off-by: Andy Shevchenko Acked-by: Mika Westerberg --- drivers/pinctrl/intel/pinctrl-baytrail.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/pinctrl/intel/pinctrl-baytrail.c b/drivers/pinctrl/intel/pinctrl-baytrail.c index d49aab3cfbaa..394a421a19d5 100644 --- a/drivers/pinctrl/intel/pinctrl-baytrail.c +++ b/drivers/pinctrl/intel/pinctrl-baytrail.c @@ -1049,7 +1049,6 @@ static int byt_pin_config_set(struct pinctrl_dev *pctl_dev, break; case PIN_CONFIG_INPUT_DEBOUNCE: debounce = readl(db_reg); - debounce &= ~BYT_DEBOUNCE_PULSE_MASK; if (arg) conf |= BYT_DEBOUNCE_EN; @@ -1058,24 +1057,31 @@ static int byt_pin_config_set(struct pinctrl_dev *pctl_dev, switch (arg) { case 375: + debounce &= ~BYT_DEBOUNCE_PULSE_MASK; debounce |= BYT_DEBOUNCE_PULSE_375US; break; case 750: + debounce &= ~BYT_DEBOUNCE_PULSE_MASK; debounce |= BYT_DEBOUNCE_PULSE_750US; break; case 1500: + debounce &= ~BYT_DEBOUNCE_PULSE_MASK; debounce |= BYT_DEBOUNCE_PULSE_1500US; break; case 3000: + debounce &= ~BYT_DEBOUNCE_PULSE_MASK; debounce |= BYT_DEBOUNCE_PULSE_3MS; break; case 6000: + debounce &= ~BYT_DEBOUNCE_PULSE_MASK; debounce |= BYT_DEBOUNCE_PULSE_6MS; break; case 12000: + debounce &= ~BYT_DEBOUNCE_PULSE_MASK; debounce |= BYT_DEBOUNCE_PULSE_12MS; break; case 24000: + debounce &= ~BYT_DEBOUNCE_PULSE_MASK; debounce |= BYT_DEBOUNCE_PULSE_24MS; break; default: From a7a10bce8a04f48238a8306ec97d430b77917015 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 15 Oct 2020 13:21:44 -0400 Subject: [PATCH 020/296] dm integrity: don't use drivers that have CRYPTO_ALG_ALLOCATES_MEMORY Don't use crypto drivers that have the flag CRYPTO_ALG_ALLOCATES_MEMORY set. These drivers allocate memory and thus they are not suitable for block I/O processing. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-integrity.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 3fc3757def55..5a7a1b90e671 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -3462,7 +3462,7 @@ static int get_mac(struct crypto_shash **hash, struct alg_spec *a, char **error, int r; if (a->alg_string) { - *hash = crypto_alloc_shash(a->alg_string, 0, 0); + *hash = crypto_alloc_shash(a->alg_string, 0, CRYPTO_ALG_ALLOCATES_MEMORY); if (IS_ERR(*hash)) { *error = error_alg; r = PTR_ERR(*hash); @@ -3519,7 +3519,7 @@ static int create_journal(struct dm_integrity_c *ic, char **error) struct journal_completion comp; comp.ic = ic; - ic->journal_crypt = crypto_alloc_skcipher(ic->journal_crypt_alg.alg_string, 0, 0); + ic->journal_crypt = crypto_alloc_skcipher(ic->journal_crypt_alg.alg_string, 0, CRYPTO_ALG_ALLOCATES_MEMORY); if (IS_ERR(ic->journal_crypt)) { *error = "Invalid journal cipher"; r = PTR_ERR(ic->journal_crypt); From e5d41cbca1b2036362c9e29d705d3a175a01eff8 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 10 Nov 2020 07:44:01 -0500 Subject: [PATCH 021/296] dm writecache: advance the number of arguments when reporting max_age When reporting the "max_age" value the number of arguments must advance by two. Signed-off-by: Mikulas Patocka Fixes: 3923d4854e18 ("dm writecache: implement gradual cleanup") Cc: stable@vger.kernel.org # v5.7+ Signed-off-by: Mike Snitzer --- drivers/md/dm-writecache.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c index 9ae4ce7df95c..1ea923af47c6 100644 --- a/drivers/md/dm-writecache.c +++ b/drivers/md/dm-writecache.c @@ -2479,6 +2479,8 @@ static void writecache_status(struct dm_target *ti, status_type_t type, extra_args += 2; if (wc->autocommit_time_set) extra_args += 2; + if (wc->max_age != MAX_AGE_UNSPECIFIED) + extra_args += 2; if (wc->cleaner) extra_args++; if (wc->writeback_fua_set) From 67aa3ec3dbc43d6e34401d9b2a40040ff7bb57af Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 10 Nov 2020 07:45:13 -0500 Subject: [PATCH 022/296] dm writecache: fix the maximum number of arguments Advance the maximum number of arguments to 16. This fixes issue where certain operations, combined with table configured args, exceed 10 arguments. Signed-off-by: Mikulas Patocka Fixes: 48debafe4f2f ("dm: add writecache target") Cc: stable@vger.kernel.org # v4.18+ Signed-off-by: Mike Snitzer --- drivers/md/dm-writecache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c index 1ea923af47c6..7d277de26b3a 100644 --- a/drivers/md/dm-writecache.c +++ b/drivers/md/dm-writecache.c @@ -2041,7 +2041,7 @@ static int writecache_ctr(struct dm_target *ti, unsigned argc, char **argv) struct wc_memory_superblock s; static struct dm_arg _args[] = { - {0, 10, "Invalid number of feature args"}, + {0, 16, "Invalid number of feature args"}, }; as.argc = argc; From 600c0849cf86b75d86352f59745226273290986a Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Wed, 18 Nov 2020 13:08:21 +0200 Subject: [PATCH 023/296] thunderbolt: Fix use-after-free in remove_unplugged_switch() Paulian reported a crash that happens when a dock is unplugged during hibernation: [78436.228217] thunderbolt 0-1: device disconnected [78436.228365] BUG: kernel NULL pointer dereference, address: 00000000000001e0 ... [78436.228397] RIP: 0010:icm_free_unplugged_children+0x109/0x1a0 ... [78436.228432] Call Trace: [78436.228439] icm_rescan_work+0x24/0x30 [78436.228444] process_one_work+0x1a3/0x3a0 [78436.228449] worker_thread+0x30/0x370 [78436.228454] ? process_one_work+0x3a0/0x3a0 [78436.228457] kthread+0x13d/0x160 [78436.228461] ? kthread_park+0x90/0x90 [78436.228465] ret_from_fork+0x1f/0x30 This happens because remove_unplugged_switch() calls tb_switch_remove() that releases the memory pointed by sw so the following lines reference to a memory that might be released already. Fix this by saving pointer to the parent device before calling tb_switch_remove(). Reported-by: Paulian Bogdan Marinca Fixes: 4f7c2e0d8765 ("thunderbolt: Make sure device runtime resume completes before taking domain lock") Cc: stable@vger.kernel.org Signed-off-by: Mika Westerberg Reviewed-by: Greg Kroah-Hartman --- drivers/thunderbolt/icm.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/thunderbolt/icm.c b/drivers/thunderbolt/icm.c index 977ba91f4d0e..82c46b200c34 100644 --- a/drivers/thunderbolt/icm.c +++ b/drivers/thunderbolt/icm.c @@ -1976,7 +1976,9 @@ static int complete_rpm(struct device *dev, void *data) static void remove_unplugged_switch(struct tb_switch *sw) { - pm_runtime_get_sync(sw->dev.parent); + struct device *parent = get_device(sw->dev.parent); + + pm_runtime_get_sync(parent); /* * Signal this and switches below for rpm_complete because @@ -1987,8 +1989,10 @@ static void remove_unplugged_switch(struct tb_switch *sw) bus_for_each_dev(&tb_bus_type, &sw->dev, NULL, complete_rpm); tb_switch_remove(sw); - pm_runtime_mark_last_busy(sw->dev.parent); - pm_runtime_put_autosuspend(sw->dev.parent); + pm_runtime_mark_last_busy(parent); + pm_runtime_put_autosuspend(parent); + + put_device(parent); } static void icm_free_unplugged_children(struct tb_switch *sw) From 5e4d659b10fde14403adb2e215df4a3168fe8465 Mon Sep 17 00:00:00 2001 From: Vincent Palatin Date: Fri, 20 Nov 2020 10:28:28 +0100 Subject: [PATCH 024/296] USB: serial: option: add Fibocom NL668 variants Update the USB serial option driver support for the Fibocom NL668 Cat.4 LTE modules as there are actually several different variants. Got clarifications from Fibocom, there are distinct products: - VID:PID 1508:1001, NL668 for IOT (no MBIM interface) - VID:PID 2cb7:01a0, NL668-AM and NL652-EU are laptop M.2 cards (with MBIM interfaces for Windows/Linux/Chrome OS), respectively for Americas and Europe. usb-devices output for the laptop M.2 cards: T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 4 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=ef(misc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=2cb7 ProdID=01a0 Rev=03.18 S: Manufacturer=Fibocom Wireless Inc. S: Product=Fibocom NL652-EU Modem S: SerialNumber=0123456789ABCDEF C: #Ifs= 5 Cfg#= 1 Atr=a0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 1 Cls=02(commc) Sub=0e Prot=00 Driver=cdc_mbim I: If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=(none) I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=(none) I: If#= 4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) Signed-off-by: Vincent Palatin Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 54ca85cc920d..2aa31173997f 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -2046,12 +2046,13 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(0) | RSVD(1) | RSVD(6) }, { USB_DEVICE(0x0489, 0xe0b5), /* Foxconn T77W968 ESIM */ .driver_info = RSVD(0) | RSVD(1) | RSVD(6) }, - { USB_DEVICE(0x1508, 0x1001), /* Fibocom NL668 */ + { USB_DEVICE(0x1508, 0x1001), /* Fibocom NL668 (IOT version) */ .driver_info = RSVD(4) | RSVD(5) | RSVD(6) }, { USB_DEVICE(0x2cb7, 0x0104), /* Fibocom NL678 series */ .driver_info = RSVD(4) | RSVD(5) }, { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0105, 0xff), /* Fibocom NL678 series */ .driver_info = RSVD(6) }, + { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a0, 0xff) }, /* Fibocom NL668-AM/NL652-EU (laptop MBIM) */ { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1404, 0xff) }, /* GosunCn GM500 RNDIS */ { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1405, 0xff) }, /* GosunCn GM500 MBIM */ { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1406, 0xff) }, /* GosunCn GM500 ECM/NCM */ From a98fd117a2553ab1a6d2fe3c7acae88c1eca4372 Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Fri, 20 Nov 2020 13:08:51 +0800 Subject: [PATCH 025/296] ARM: dts: sun8i: v3s: fix GIC node memory range Currently the GIC node in V3s DTSI follows some old DT examples, and being broken. This leads a warning at boot. Fix this. Fixes: f989086ccbc6 ("ARM: dts: sunxi: add dtsi file for V3s SoC") Signed-off-by: Icenowy Zheng Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/20201120050851.4123759-1-icenowy@aosc.io --- arch/arm/boot/dts/sun8i-v3s.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/sun8i-v3s.dtsi b/arch/arm/boot/dts/sun8i-v3s.dtsi index 0c7341676921..89abd4cc7e23 100644 --- a/arch/arm/boot/dts/sun8i-v3s.dtsi +++ b/arch/arm/boot/dts/sun8i-v3s.dtsi @@ -539,7 +539,7 @@ gic: interrupt-controller@1c81000 { compatible = "arm,gic-400"; reg = <0x01c81000 0x1000>, - <0x01c82000 0x1000>, + <0x01c82000 0x2000>, <0x01c84000 0x2000>, <0x01c86000 0x2000>; interrupt-controller; From c0a2074ac575fff2848c8ef804bdc8590466c36c Mon Sep 17 00:00:00 2001 From: Wenbin Mei Date: Wed, 18 Nov 2020 14:34:05 +0800 Subject: [PATCH 026/296] mmc: mediatek: Fix system suspend/resume support for CQHCI Before we got these errors on MT8192 platform: [ 59.153891] Restarting tasks ... [ 59.154540] done. [ 59.159175] PM: suspend exit [ 59.218724] mtk-msdc 11f60000.mmc: phase: [map:fffffffe] [maxlen:31] [final:16] [ 119.776083] mmc0: cqhci: timeout for tag 9 [ 119.780196] mmc0: cqhci: ============ CQHCI REGISTER DUMP =========== [ 119.786709] mmc0: cqhci: Caps: 0x100020b6 | Version: 0x00000510 [ 119.793225] mmc0: cqhci: Config: 0x00000101 | Control: 0x00000000 [ 119.799706] mmc0: cqhci: Int stat: 0x00000000 | Int enab: 0x00000000 [ 119.806177] mmc0: cqhci: Int sig: 0x00000000 | Int Coal: 0x00000000 [ 119.812670] mmc0: cqhci: TDL base: 0x00000000 | TDL up32: 0x00000000 [ 119.819149] mmc0: cqhci: Doorbell: 0x003ffc00 | TCN: 0x00000200 [ 119.825656] mmc0: cqhci: Dev queue: 0x00000000 | Dev Pend: 0x00000000 [ 119.832155] mmc0: cqhci: Task clr: 0x00000000 | SSC1: 0x00001000 [ 119.838627] mmc0: cqhci: SSC2: 0x00000000 | DCMD rsp: 0x00000000 [ 119.845174] mmc0: cqhci: RED mask: 0xfdf9a080 | TERRI: 0x0000891c [ 119.851654] mmc0: cqhci: Resp idx: 0x00000000 | Resp arg: 0x00000000 [ 119.865773] mmc0: cqhci: : =========================================== [ 119.872358] mmc0: running CQE recovery From these logs, we found TDL base was back to the default value. After suspend, the mmc host is powered off by HW, and bring CQE register to the default value, so we add system suspend/resume interface, then bring CQE to deactivated state before suspend, it will be enabled by CQE first request after resume. Signed-off-by: Wenbin Mei Link: https://lore.kernel.org/r/20201118063405.24906-1-wenbin.mei@mediatek.com Fixes: 88bd652b3c74 ("mmc: mediatek: command queue support") Cc: stable@vger.kernel.org [Ulf: Renamed functions] Signed-off-by: Ulf Hansson --- drivers/mmc/host/mtk-sd.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c index a704745e5882..b40ec966288e 100644 --- a/drivers/mmc/host/mtk-sd.c +++ b/drivers/mmc/host/mtk-sd.c @@ -2681,11 +2681,29 @@ static int msdc_runtime_resume(struct device *dev) msdc_restore_reg(host); return 0; } + +static int msdc_suspend(struct device *dev) +{ + struct mmc_host *mmc = dev_get_drvdata(dev); + int ret; + + if (mmc->caps2 & MMC_CAP2_CQE) { + ret = cqhci_suspend(mmc); + if (ret) + return ret; + } + + return pm_runtime_force_suspend(dev); +} + +static int msdc_resume(struct device *dev) +{ + return pm_runtime_force_resume(dev); +} #endif static const struct dev_pm_ops msdc_dev_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, - pm_runtime_force_resume) + SET_SYSTEM_SLEEP_PM_OPS(msdc_suspend, msdc_resume) SET_RUNTIME_PM_OPS(msdc_runtime_suspend, msdc_runtime_resume, NULL) }; From 903a72eca4abf241293dcc1385896fd428e15fe9 Mon Sep 17 00:00:00 2001 From: yong mao Date: Thu, 19 Nov 2020 11:02:37 +0800 Subject: [PATCH 027/296] mmc: mediatek: Extend recheck_sdio_irq fix to more variants The SDIO recheck fix is required for more of the supported variants. Let's add it to those that needs it. Reported-by: Fabien Parent Reported-by: Mattijs Korpershoek Signed-off-by: Yong Mao Link: https://lore.kernel.org/r/20201119030237.9414-1-yong.mao@mediatek.com Fixes: 9e2582e57407 ("mmc: mediatek: fix SDIO irq issue") Cc: stable@vger.kernel.org [Ulf: Clarified commitmsg ] Signed-off-by: Ulf Hansson --- drivers/mmc/host/mtk-sd.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c index b40ec966288e..7eb99255ae3d 100644 --- a/drivers/mmc/host/mtk-sd.c +++ b/drivers/mmc/host/mtk-sd.c @@ -446,7 +446,7 @@ struct msdc_host { static const struct mtk_mmc_compatible mt8135_compat = { .clk_div_bits = 8, - .recheck_sdio_irq = false, + .recheck_sdio_irq = true, .hs400_tune = false, .pad_tune_reg = MSDC_PAD_TUNE, .async_fifo = false, @@ -485,7 +485,7 @@ static const struct mtk_mmc_compatible mt8183_compat = { static const struct mtk_mmc_compatible mt2701_compat = { .clk_div_bits = 12, - .recheck_sdio_irq = false, + .recheck_sdio_irq = true, .hs400_tune = false, .pad_tune_reg = MSDC_PAD_TUNE0, .async_fifo = true, @@ -511,7 +511,7 @@ static const struct mtk_mmc_compatible mt2712_compat = { static const struct mtk_mmc_compatible mt7622_compat = { .clk_div_bits = 12, - .recheck_sdio_irq = false, + .recheck_sdio_irq = true, .hs400_tune = false, .pad_tune_reg = MSDC_PAD_TUNE0, .async_fifo = true, @@ -524,7 +524,7 @@ static const struct mtk_mmc_compatible mt7622_compat = { static const struct mtk_mmc_compatible mt8516_compat = { .clk_div_bits = 12, - .recheck_sdio_irq = false, + .recheck_sdio_irq = true, .hs400_tune = false, .pad_tune_reg = MSDC_PAD_TUNE0, .async_fifo = true, @@ -535,7 +535,7 @@ static const struct mtk_mmc_compatible mt8516_compat = { static const struct mtk_mmc_compatible mt7620_compat = { .clk_div_bits = 8, - .recheck_sdio_irq = false, + .recheck_sdio_irq = true, .hs400_tune = false, .pad_tune_reg = MSDC_PAD_TUNE, .async_fifo = false, @@ -548,6 +548,7 @@ static const struct mtk_mmc_compatible mt7620_compat = { static const struct mtk_mmc_compatible mt6779_compat = { .clk_div_bits = 12, + .recheck_sdio_irq = false, .hs400_tune = false, .pad_tune_reg = MSDC_PAD_TUNE0, .async_fifo = true, From a42a7ec9bb99a17869c3b9f3d365aaf2bdb1a554 Mon Sep 17 00:00:00 2001 From: Muhammad Husaini Zulkifli Date: Wed, 18 Nov 2020 20:01:20 +0800 Subject: [PATCH 028/296] mmc: sdhci-of-arasan: Fix clock registration error for Keem Bay SOC The commit 16ada730a759 ("mmc: sdhci-of-arasan: Modify clock operations handling") introduced support for platform specific clock operations. Around the same point in time the commit 36c6aadaae86 ("mmc: sdhci-of-arasan: Add support for Intel Keem Bay") was also merged. Unfortunate it was not really tested on top of the previously mentioned commit, which causes clock registration failures for Keem Bay SOC devices. Let's fix this, by properly declaring the clock operation for Keem Bay SOC devices. Fixes: 36c6aadaae86 ("mmc: sdhci-of-arasan: Add support for Intel Keem Bay") Signed-off-by: Muhammad Husaini Zulkifli Reviewed-by: Adrian Hunter Link: https://lore.kernel.org/r/20201118120120.24908-2-muhammad.husaini.zulkifli@intel.com Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-of-arasan.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/mmc/host/sdhci-of-arasan.c b/drivers/mmc/host/sdhci-of-arasan.c index d25a4b50c2f3..3b8d456e857d 100644 --- a/drivers/mmc/host/sdhci-of-arasan.c +++ b/drivers/mmc/host/sdhci-of-arasan.c @@ -1186,16 +1186,19 @@ static struct sdhci_arasan_of_data sdhci_arasan_versal_data = { static struct sdhci_arasan_of_data intel_keembay_emmc_data = { .soc_ctl_map = &intel_keembay_soc_ctl_map, .pdata = &sdhci_keembay_emmc_pdata, + .clk_ops = &arasan_clk_ops, }; static struct sdhci_arasan_of_data intel_keembay_sd_data = { .soc_ctl_map = &intel_keembay_soc_ctl_map, .pdata = &sdhci_keembay_sd_pdata, + .clk_ops = &arasan_clk_ops, }; static struct sdhci_arasan_of_data intel_keembay_sdio_data = { .soc_ctl_map = &intel_keembay_soc_ctl_map, .pdata = &sdhci_keembay_sdio_pdata, + .clk_ops = &arasan_clk_ops, }; static const struct of_device_id sdhci_arasan_of_match[] = { From a7361b9c4615951f52ffd2b1afa09a1384c7b4e4 Mon Sep 17 00:00:00 2001 From: Adam Sampson Date: Mon, 23 Nov 2020 17:47:39 +0000 Subject: [PATCH 029/296] ARM: dts: sun7i: pcduino3-nano: enable RGMII RX/TX delay on PHY The RX/TX delays for the Ethernet PHY on the Linksprite pcDuino 3 Nano are configured in hardware, using resistors that are populated to pull the RTL8211E's RXDLY/TXDLY pins low or high as needed. phy-mode should be set to rgmii-id to reflect this. Previously it was set to rgmii, which used to work but now results in the delays being disabled again as a result of the bugfix in commit bbc4d71d6354 ("net: phy: realtek: fix rtl8211e rx/tx delay config"). Tested on two pcDuino 3 Nano boards purchased in 2015. Without this fix, Ethernet works unreliably on one board and doesn't work at all on the other. Fixes: 061035d456c9 ("ARM: dts: sun7i: Add dts file for pcDuino 3 Nano board") Signed-off-by: Adam Sampson Signed-off-by: Maxime Ripard Reviewed-by: Andrew Lunn Acked-by: Chen-Yu Tsai Link: https://lore.kernel.org/r/20201123174739.6809-1-ats@offog.org --- arch/arm/boot/dts/sun7i-a20-pcduino3-nano.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/sun7i-a20-pcduino3-nano.dts b/arch/arm/boot/dts/sun7i-a20-pcduino3-nano.dts index fce2f7fcd084..bf38c66c1815 100644 --- a/arch/arm/boot/dts/sun7i-a20-pcduino3-nano.dts +++ b/arch/arm/boot/dts/sun7i-a20-pcduino3-nano.dts @@ -1,5 +1,5 @@ /* - * Copyright 2015 Adam Sampson + * Copyright 2015-2020 Adam Sampson * * This file is dual-licensed: you can use it either under the terms * of the GPL or the X11 license, at your option. Note that this dual @@ -115,7 +115,7 @@ pinctrl-names = "default"; pinctrl-0 = <&gmac_rgmii_pins>; phy-handle = <&phy1>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; From 095fbca0a94930b58f977284ef1b759b98700f8b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 26 Oct 2020 23:03:13 +0100 Subject: [PATCH 030/296] Makefile.extrawarn: move -Wcast-align to W=3 This warning behaves differently depending on the architecture and compiler. Using x86 gcc, we get no output at all because gcc knows the architecture can handle unaligned accesses. Using x86 clang, or gcc on an architecture that needs to manually deal with unaligned accesses, the build log is completely flooded with these warnings, as they are commonly invoked by inline functions of networking headers, e.g. include/linux/skbuff.h:1426:26: warning: cast increases required alignment of target type [-Wcast-align] The compiler is correct to point this out, as we are dealing with undefined behavior that does cause problems in practice, but there is also no good way to rewrite the code in commonly included headers to a safer method. Signed-off-by: Arnd Bergmann Reviewed-by: Nathan Chancellor Signed-off-by: Masahiro Yamada --- scripts/Makefile.extrawarn | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn index 95e4cdb94fe9..6baee1200615 100644 --- a/scripts/Makefile.extrawarn +++ b/scripts/Makefile.extrawarn @@ -60,7 +60,6 @@ endif # ifneq ($(findstring 2, $(KBUILD_EXTRA_WARN)),) -KBUILD_CFLAGS += -Wcast-align KBUILD_CFLAGS += -Wdisabled-optimization KBUILD_CFLAGS += -Wnested-externs KBUILD_CFLAGS += -Wshadow @@ -80,6 +79,7 @@ endif ifneq ($(findstring 3, $(KBUILD_EXTRA_WARN)),) KBUILD_CFLAGS += -Wbad-function-cast +KBUILD_CFLAGS += -Wcast-align KBUILD_CFLAGS += -Wcast-qual KBUILD_CFLAGS += -Wconversion KBUILD_CFLAGS += -Wpacked From a716bd7432106aed82a751409d7be851a23022ac Mon Sep 17 00:00:00 2001 From: Denys Zagorui Date: Mon, 2 Nov 2020 04:08:53 -0800 Subject: [PATCH 031/296] kbuild: use -fmacro-prefix-map for .S sources Follow-up to commit a73619a845d5 ("kbuild: use -fmacro-prefix-map to make __FILE__ a relative path"). Assembler sources also use __FILE__ macro so this flag should be also applied to those sources. Signed-off-by: Denys Zagorui Signed-off-by: Masahiro Yamada --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index ed081e3eb800..87d659d3c8de 100644 --- a/Makefile +++ b/Makefile @@ -946,7 +946,7 @@ KBUILD_CFLAGS += $(call cc-option,-Werror=incompatible-pointer-types) KBUILD_CFLAGS += $(call cc-option,-Werror=designated-init) # change __FILE__ to the relative path from the srctree -KBUILD_CFLAGS += $(call cc-option,-fmacro-prefix-map=$(srctree)/=) +KBUILD_CPPFLAGS += $(call cc-option,-fmacro-prefix-map=$(srctree)/=) # ensure -fcf-protection is disabled when using retpoline as it is # incompatible with -mindirect-branch=thunk-extern From b8a9092330da2030496ff357272f342eb970d51b Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Mon, 9 Nov 2020 10:35:28 -0800 Subject: [PATCH 032/296] Kbuild: do not emit debug info for assembly with LLVM_IAS=1 Clang's integrated assembler produces the warning for assembly files: warning: DWARF2 only supports one section per compilation unit If -Wa,-gdwarf-* is unspecified, then debug info is not emitted for assembly sources (it is still emitted for C sources). This will be re-enabled for newer DWARF versions in a follow up patch. Enables defconfig+CONFIG_DEBUG_INFO to build cleanly with LLVM=1 LLVM_IAS=1 for x86_64 and arm64. Cc: Link: https://github.com/ClangBuiltLinux/linux/issues/716 Reported-by: Dmitry Golovin Reported-by: Nathan Chancellor Suggested-by: Dmitry Golovin Suggested-by: Nathan Chancellor Suggested-by: Sedat Dilek Reviewed-by: Fangrui Song Reviewed-by: Nathan Chancellor Signed-off-by: Nick Desaulniers Signed-off-by: Masahiro Yamada --- Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Makefile b/Makefile index 87d659d3c8de..ae1592c1f5d6 100644 --- a/Makefile +++ b/Makefile @@ -828,7 +828,9 @@ else DEBUG_CFLAGS += -g endif +ifneq ($(LLVM_IAS),1) KBUILD_AFLAGS += -Wa,-gdwarf-2 +endif ifdef CONFIG_DEBUG_INFO_DWARF4 DEBUG_CFLAGS += -gdwarf-4 From fa248db082270200863d254e0f39bbb29923d6b1 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 24 Nov 2020 17:42:57 -0800 Subject: [PATCH 033/296] Input: soc_button_array - add missing include This fixes the following build errors: CC [M] drivers/input/misc/soc_button_array.o drivers/input/misc/soc_button_array.c:156:4: error: implicit declaration of function 'irq_set_irq_type' [-Werror,-Wimplicit-function-declaration] irq_set_irq_type(irq, IRQ_TYPE_LEVEL_LOW); ^ drivers/input/misc/soc_button_array.c:156:26: error: use of undeclared identifier 'IRQ_TYPE_LEVEL_LOW' irq_set_irq_type(irq, IRQ_TYPE_LEVEL_LOW); ^ 2 errors generated. Fixes: 78a5b53e9fb4 ("Input: soc_button_array - work around DSDTs which modify the irqflags") Reported-by: kernel test robot Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20201123061508.GA1009828@dtor-ws Signed-off-by: Dmitry Torokhov --- drivers/input/misc/soc_button_array.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/misc/soc_button_array.c b/drivers/input/misc/soc_button_array.c index cae1a3fae83a..d14a65683c5e 100644 --- a/drivers/input/misc/soc_button_array.c +++ b/drivers/input/misc/soc_button_array.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include From 855b69857830f8d918d715014f05e59a3f7491a0 Mon Sep 17 00:00:00 2001 From: Luo Meng Date: Tue, 24 Nov 2020 17:45:23 -0800 Subject: [PATCH 034/296] Input: i8042 - fix error return code in i8042_setup_aux() Fix to return a negative error code from the error handling case instead of 0 in function i8042_setup_aux(), as done elsewhere in this function. Fixes: f81134163fc7 ("Input: i8042 - use platform_driver_probe") Reported-by: Hulk Robot Signed-off-by: Luo Meng Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20201123133420.4071187-1-luomeng12@huawei.com Signed-off-by: Dmitry Torokhov --- drivers/input/serio/i8042.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c index 944cbb519c6d..abae23af0791 100644 --- a/drivers/input/serio/i8042.c +++ b/drivers/input/serio/i8042.c @@ -1471,7 +1471,8 @@ static int __init i8042_setup_aux(void) if (error) goto err_free_ports; - if (aux_enable()) + error = aux_enable(); + if (error) goto err_free_irq; i8042_aux_irq_registered = true; From 777ee15e88616c275ba59db88d3ece20eae0ca9a Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Fri, 20 Nov 2020 22:13:06 +0100 Subject: [PATCH 035/296] drm: mxsfb: fix fence synchronization The conversion away from the simple display pipeline helper missed to convert the prepare_fb plane callback, so no fences are attached to the atomic state, breaking synchronization with other devices. Fix this by plugging in the drm_gem_fb_prepare_fb helper function. Fixes: ae1ed0093281 ("drm: mxsfb: Stop using DRM simple display pipeline helper") Signed-off-by: Lucas Stach Reviewed-by: Stefan Agner Signed-off-by: Stefan Agner Link: https://patchwork.freedesktop.org/patch/msgid/20201120211306.325841-1-l.stach@pengutronix.de --- drivers/gpu/drm/mxsfb/mxsfb_kms.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/mxsfb/mxsfb_kms.c b/drivers/gpu/drm/mxsfb/mxsfb_kms.c index b721b8b262ce..4d556532281a 100644 --- a/drivers/gpu/drm/mxsfb/mxsfb_kms.c +++ b/drivers/gpu/drm/mxsfb/mxsfb_kms.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -485,11 +486,13 @@ static void mxsfb_plane_overlay_atomic_update(struct drm_plane *plane, } static const struct drm_plane_helper_funcs mxsfb_plane_primary_helper_funcs = { + .prepare_fb = drm_gem_fb_prepare_fb, .atomic_check = mxsfb_plane_atomic_check, .atomic_update = mxsfb_plane_primary_atomic_update, }; static const struct drm_plane_helper_funcs mxsfb_plane_overlay_helper_funcs = { + .prepare_fb = drm_gem_fb_prepare_fb, .atomic_check = mxsfb_plane_atomic_check, .atomic_update = mxsfb_plane_overlay_atomic_update, }; From c70582bbf299986234ecf06d128454b4b38ecd2e Mon Sep 17 00:00:00 2001 From: Daniel Abrecht Date: Sun, 8 Nov 2020 21:00:01 +0000 Subject: [PATCH 036/296] drm: mxsfb: Implement .format_mod_supported This will make sure applications which use the IN_FORMATS blob to figure out which modifiers they can use will pick up the linear modifier which is needed by mxsfb. Such applications will not work otherwise if an incompatible implicit modifier ends up being selected. Before commit ae1ed0093281 ("drm: mxsfb: Stop using DRM simple display pipeline helper"), the DRM simple display pipeline helper took care of this. Signed-off-by: Daniel Abrecht Fixes: ae1ed0093281 ("drm: mxsfb: Stop using DRM simple display pipeline helper") Reviewed-by: Stefan Agner Signed-off-by: Stefan Agner Link: https://patchwork.freedesktop.org/patch/msgid/2a99ffffc2378209307e0992a6e97e70@nodmarc.danielabrecht.ch --- drivers/gpu/drm/mxsfb/mxsfb_kms.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/mxsfb/mxsfb_kms.c b/drivers/gpu/drm/mxsfb/mxsfb_kms.c index 4d556532281a..9e1224d54729 100644 --- a/drivers/gpu/drm/mxsfb/mxsfb_kms.c +++ b/drivers/gpu/drm/mxsfb/mxsfb_kms.c @@ -485,6 +485,13 @@ static void mxsfb_plane_overlay_atomic_update(struct drm_plane *plane, writel(ctrl, mxsfb->base + LCDC_AS_CTRL); } +static bool mxsfb_format_mod_supported(struct drm_plane *plane, + uint32_t format, + uint64_t modifier) +{ + return modifier == DRM_FORMAT_MOD_LINEAR; +} + static const struct drm_plane_helper_funcs mxsfb_plane_primary_helper_funcs = { .prepare_fb = drm_gem_fb_prepare_fb, .atomic_check = mxsfb_plane_atomic_check, @@ -498,6 +505,7 @@ static const struct drm_plane_helper_funcs mxsfb_plane_overlay_helper_funcs = { }; static const struct drm_plane_funcs mxsfb_plane_funcs = { + .format_mod_supported = mxsfb_format_mod_supported, .update_plane = drm_atomic_helper_update_plane, .disable_plane = drm_atomic_helper_disable_plane, .destroy = drm_plane_cleanup, From 5844cc25fd121074de7895181a2fa1ce100a0fdd Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 26 Nov 2020 20:25:27 +1000 Subject: [PATCH 037/296] powerpc/64s: Fix hash ISA v3.0 TLBIEL instruction generation A typo has the R field of the instruction assigned by lucky dip a la register allocator. Fixes: d4748276ae14c ("powerpc/64s: Improve local TLB flush for boot and MCE on POWER9") Signed-off-by: Nicholas Piggin Reviewed-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201126102530.691335-2-npiggin@gmail.com --- arch/powerpc/mm/book3s64/hash_native.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/mm/book3s64/hash_native.c b/arch/powerpc/mm/book3s64/hash_native.c index 0203cdf48c54..97fa42d7027e 100644 --- a/arch/powerpc/mm/book3s64/hash_native.c +++ b/arch/powerpc/mm/book3s64/hash_native.c @@ -68,7 +68,7 @@ static __always_inline void tlbiel_hash_set_isa300(unsigned int set, unsigned in rs = ((unsigned long)pid << PPC_BITLSHIFT(31)); asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4) - : : "r"(rb), "r"(rs), "i"(ric), "i"(prs), "r"(r) + : : "r"(rb), "r"(rs), "i"(ric), "i"(prs), "i"(r) : "memory"); } From c0b27c517acf8a2b359dd373a7e7e88b01a8308e Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 26 Nov 2020 20:25:28 +1000 Subject: [PATCH 038/296] powerpc/64s/pseries: Fix hash tlbiel_all_isa300 for guest kernels tlbiel_all() can not be usable in !HVMODE when running hash presently, remove HV privileged flushes when running in guest to make it usable. Signed-off-by: Nicholas Piggin Reviewed-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201126102530.691335-3-npiggin@gmail.com --- arch/powerpc/mm/book3s64/hash_native.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/mm/book3s64/hash_native.c b/arch/powerpc/mm/book3s64/hash_native.c index 97fa42d7027e..52e170bd95ae 100644 --- a/arch/powerpc/mm/book3s64/hash_native.c +++ b/arch/powerpc/mm/book3s64/hash_native.c @@ -92,16 +92,15 @@ static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is) asm volatile("ptesync": : :"memory"); /* - * Flush the first set of the TLB, and any caching of partition table - * entries. Then flush the remaining sets of the TLB. Hash mode uses - * partition scoped TLB translations. + * Flush the partition table cache if this is HV mode. */ - tlbiel_hash_set_isa300(0, is, 0, 2, 0); - for (set = 1; set < num_sets; set++) - tlbiel_hash_set_isa300(set, is, 0, 0, 0); + if (early_cpu_has_feature(CPU_FTR_HVMODE)) + tlbiel_hash_set_isa300(0, is, 0, 2, 0); /* - * Now invalidate the process table cache. + * Now invalidate the process table cache. UPRT=0 HPT modes (what + * current hardware implements) do not use the process table, but + * add the flushes anyway. * * From ISA v3.0B p. 1078: * The following forms are invalid. @@ -110,6 +109,14 @@ static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is) */ tlbiel_hash_set_isa300(0, is, 0, 2, 1); + /* + * Then flush the sets of the TLB proper. Hash mode uses + * partition scoped TLB translations, which may be flushed + * in !HV mode. + */ + for (set = 0; set < num_sets; set++) + tlbiel_hash_set_isa300(set, is, 0, 0, 0); + ppc_after_tlbiel_barrier(); asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT "; isync" : : :"memory"); From 8ff00399b153440c1c83e20c43020385b416415b Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 26 Nov 2020 20:25:29 +1000 Subject: [PATCH 039/296] kernel/cpu: add arch override for clear_tasks_mm_cpumask() mm handling powerpc/64s keeps a counter in the mm which counts bits set in mm_cpumask as well as other things. This means it can't use generic code to clear bits out of the mask and doesn't adjust the arch specific counter. Add an arch override that allows powerpc/64s to use clear_tasks_mm_cpumask(). Signed-off-by: Nicholas Piggin Reviewed-by: Aneesh Kumar K.V Acked-by: Peter Zijlstra (Intel) Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201126102530.691335-4-npiggin@gmail.com --- kernel/cpu.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/cpu.c b/kernel/cpu.c index 6ff2578ecf17..2b8d7a5db383 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -815,6 +815,10 @@ void __init cpuhp_threads_init(void) } #ifdef CONFIG_HOTPLUG_CPU +#ifndef arch_clear_mm_cpumask_cpu +#define arch_clear_mm_cpumask_cpu(cpu, mm) cpumask_clear_cpu(cpu, mm_cpumask(mm)) +#endif + /** * clear_tasks_mm_cpumask - Safely clear tasks' mm_cpumask for a CPU * @cpu: a CPU id @@ -850,7 +854,7 @@ void clear_tasks_mm_cpumask(int cpu) t = find_lock_task_mm(p); if (!t) continue; - cpumask_clear_cpu(cpu, mm_cpumask(t->mm)); + arch_clear_mm_cpumask_cpu(cpu, t->mm); task_unlock(t); } rcu_read_unlock(); From 01b0f0eae0812e80efeee4ee17687e5386335e08 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 26 Nov 2020 20:25:30 +1000 Subject: [PATCH 040/296] powerpc/64s: Trim offlined CPUs from mm_cpumasks When offlining a CPU, powerpc/64s does not flush TLBs, rather it just leaves the CPU set in mm_cpumasks, so it continues to receive TLBIEs to manage its TLBs. However the exit_flush_lazy_tlbs() function expects that after returning, all CPUs (except self) have flushed TLBs for that mm, in which case TLBIEL can be used for this flush. This breaks for offline CPUs because they don't get the IPI to flush their TLB. This can lead to stale translations. Fix this by clearing the CPU from mm_cpumasks, then flushing all TLBs before going offline. These offlined CPU bits stuck in the cpumask also prevents the cpumask from being trimmed back to local mode, which means continual broadcast IPIs or TLBIEs are needed for TLB flushing. This patch prevents that situation too. A cast of many were involved in working this out, but in particular Milton, Aneesh, Paul made key discoveries. Fixes: 0cef77c7798a7 ("powerpc/64s/radix: flush remote CPUs out of single-threaded mm_cpumask") Signed-off-by: Nicholas Piggin Reviewed-by: Aneesh Kumar K.V Debugged-by: Milton Miller Debugged-by: Aneesh Kumar K.V Debugged-by: Paul Mackerras Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201126102530.691335-5-npiggin@gmail.com --- arch/powerpc/include/asm/book3s/64/mmu.h | 12 ++++++++++++ arch/powerpc/mm/book3s64/mmu_context.c | 20 ++++++++++++++++++++ arch/powerpc/platforms/powermac/smp.c | 2 ++ arch/powerpc/platforms/powernv/smp.c | 3 +++ arch/powerpc/platforms/pseries/hotplug-cpu.c | 3 +++ 5 files changed, 40 insertions(+) diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index e0b52940e43c..750918451dd2 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -242,6 +242,18 @@ extern void radix_init_pseries(void); static inline void radix_init_pseries(void) { }; #endif +#ifdef CONFIG_HOTPLUG_CPU +#define arch_clear_mm_cpumask_cpu(cpu, mm) \ + do { \ + if (cpumask_test_cpu(cpu, mm_cpumask(mm))) { \ + atomic_dec(&(mm)->context.active_cpus); \ + cpumask_clear_cpu(cpu, mm_cpumask(mm)); \ + } \ + } while (0) + +void cleanup_cpu_mmu_context(void); +#endif + static inline int get_user_context(mm_context_t *ctx, unsigned long ea) { int index = ea >> MAX_EA_BITS_PER_CONTEXT; diff --git a/arch/powerpc/mm/book3s64/mmu_context.c b/arch/powerpc/mm/book3s64/mmu_context.c index 1c54821de7bf..0c8557220ae2 100644 --- a/arch/powerpc/mm/book3s64/mmu_context.c +++ b/arch/powerpc/mm/book3s64/mmu_context.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -307,3 +308,22 @@ void radix__switch_mmu_context(struct mm_struct *prev, struct mm_struct *next) isync(); } #endif + +/** + * cleanup_cpu_mmu_context - Clean up MMU details for this CPU (newly offlined) + * + * This clears the CPU from mm_cpumask for all processes, and then flushes the + * local TLB to ensure TLB coherency in case the CPU is onlined again. + * + * KVM guest translations are not necessarily flushed here. If KVM started + * using mm_cpumask or the Linux APIs which do, this would have to be resolved. + */ +#ifdef CONFIG_HOTPLUG_CPU +void cleanup_cpu_mmu_context(void) +{ + int cpu = smp_processor_id(); + + clear_tasks_mm_cpumask(cpu); + tlbiel_all(); +} +#endif diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c index 74ebe664b016..adae2a6712e1 100644 --- a/arch/powerpc/platforms/powermac/smp.c +++ b/arch/powerpc/platforms/powermac/smp.c @@ -911,6 +911,8 @@ static int smp_core99_cpu_disable(void) mpic_cpu_set_priority(0xf); + cleanup_cpu_mmu_context(); + return 0; } diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c index 54c4ba45c7ce..cbb67813cd5d 100644 --- a/arch/powerpc/platforms/powernv/smp.c +++ b/arch/powerpc/platforms/powernv/smp.c @@ -143,6 +143,9 @@ static int pnv_smp_cpu_disable(void) xive_smp_disable_cpu(); else xics_migrate_irqs_away(); + + cleanup_cpu_mmu_context(); + return 0; } diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index f2837e33bf5d..a02012f1b04a 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -90,6 +90,9 @@ static int pseries_cpu_disable(void) xive_smp_disable_cpu(); else xics_migrate_irqs_away(); + + cleanup_cpu_mmu_context(); + return 0; } From aea656b0d05ec5b8ed5beb2f94c4dd42ea834e9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 26 Nov 2020 13:35:08 +0100 Subject: [PATCH 041/296] drm/nouveau: make sure ret is initialized in nouveau_ttm_io_mem_reserve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This wasn't initialized for pre NV50 hardware. Signed-off-by: Christian König Reported-and-Tested-by: Mark Hounschell Reviewed-by: Karol Herbst Link: https://patchwork.freedesktop.org/series/84298/ --- drivers/gpu/drm/nouveau/nouveau_bo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 2ee75646ad6f..d02f3a58adb6 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -1215,8 +1215,8 @@ retry: } reg->bus.offset = handle; - ret = 0; } + ret = 0; break; default: ret = -EINVAL; From bf3a3cdcad40e5928a22ea0fd200d17fd6d6308d Mon Sep 17 00:00:00 2001 From: Qinglang Miao Date: Fri, 30 Oct 2020 09:34:24 +0800 Subject: [PATCH 042/296] drm/tegra: sor: Disable clocks on error in tegra_sor_init() Fix the missing clk_disable_unprepare() before return from tegra_sor_init() in the error handling case. Signed-off-by: Qinglang Miao Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/sor.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/tegra/sor.c b/drivers/gpu/drm/tegra/sor.c index 00656d8b98e2..cc2aa2308a51 100644 --- a/drivers/gpu/drm/tegra/sor.c +++ b/drivers/gpu/drm/tegra/sor.c @@ -3143,6 +3143,7 @@ static int tegra_sor_init(struct host1x_client *client) if (err < 0) { dev_err(sor->dev, "failed to deassert SOR reset: %d\n", err); + clk_disable_unprepare(sor->clk); return err; } @@ -3150,12 +3151,17 @@ static int tegra_sor_init(struct host1x_client *client) } err = clk_prepare_enable(sor->clk_safe); - if (err < 0) + if (err < 0) { + clk_disable_unprepare(sor->clk); return err; + } err = clk_prepare_enable(sor->clk_dp); - if (err < 0) + if (err < 0) { + clk_disable_unprepare(sor->clk_safe); + clk_disable_unprepare(sor->clk); return err; + } return 0; } From 6d6556c04ebaeaf4e7fa8b791c97e2a7c41b38a3 Mon Sep 17 00:00:00 2001 From: Giacinto Cifelli Date: Wed, 25 Nov 2020 15:53:04 +0100 Subject: [PATCH 043/296] USB: serial: option: add support for Thales Cinterion EXS82 There is a single option port in this modem, and it is used as debug port. lsusb -v for this device: Bus 001 Device 002: ID 1e2d:006c Device Descriptor: bLength 18 bDescriptorType 1 bcdUSB 2.00 bDeviceClass 239 Miscellaneous Device bDeviceSubClass 2 ? bDeviceProtocol 1 Interface Association bMaxPacketSize0 64 idVendor 0x1e2d idProduct 0x006c bcdDevice 0.00 iManufacturer 4 iProduct 3 iSerial 5 bNumConfigurations 1 Configuration Descriptor: bLength 9 bDescriptorType 2 wTotalLength 243 bNumInterfaces 7 bConfigurationValue 1 iConfiguration 2 bmAttributes 0xe0 Self Powered Remote Wakeup MaxPower 500mA Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 0 bAlternateSetting 0 bNumEndpoints 2 bInterfaceClass 255 Vendor Specific Class bInterfaceSubClass 255 Vendor Specific Subclass bInterfaceProtocol 255 Vendor Specific Protocol iInterface 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x81 EP 1 IN bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x01 EP 1 OUT bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 0 Interface Association: bLength 8 bDescriptorType 11 bFirstInterface 1 bInterfaceCount 2 bFunctionClass 2 Communications bFunctionSubClass 2 Abstract (modem) bFunctionProtocol 1 AT-commands (v.25ter) iFunction 0 Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 1 bAlternateSetting 0 bNumEndpoints 1 bInterfaceClass 2 Communications bInterfaceSubClass 2 Abstract (modem) bInterfaceProtocol 1 AT-commands (v.25ter) iInterface 0 CDC Header: bcdCDC 1.10 CDC ACM: bmCapabilities 0x02 line coding and serial state CDC Call Management: bmCapabilities 0x03 call management use DataInterface bDataInterface 2 CDC Union: bMasterInterface 1 bSlaveInterface 2 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x82 EP 2 IN bmAttributes 3 Transfer Type Interrupt Synch Type None Usage Type Data wMaxPacketSize 0x0040 1x 64 bytes bInterval 5 Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 2 bAlternateSetting 0 bNumEndpoints 2 bInterfaceClass 10 CDC Data bInterfaceSubClass 0 Unused bInterfaceProtocol 0 iInterface 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x83 EP 3 IN bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x02 EP 2 OUT bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 0 Interface Association: bLength 8 bDescriptorType 11 bFirstInterface 3 bInterfaceCount 2 bFunctionClass 2 Communications bFunctionSubClass 2 Abstract (modem) bFunctionProtocol 1 AT-commands (v.25ter) iFunction 0 Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 3 bAlternateSetting 0 bNumEndpoints 1 bInterfaceClass 2 Communications bInterfaceSubClass 2 Abstract (modem) bInterfaceProtocol 1 AT-commands (v.25ter) iInterface 0 CDC Header: bcdCDC 1.10 CDC ACM: bmCapabilities 0x02 line coding and serial state CDC Call Management: bmCapabilities 0x03 call management use DataInterface bDataInterface 4 CDC Union: bMasterInterface 3 bSlaveInterface 4 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x84 EP 4 IN bmAttributes 3 Transfer Type Interrupt Synch Type None Usage Type Data wMaxPacketSize 0x0040 1x 64 bytes bInterval 5 Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 4 bAlternateSetting 0 bNumEndpoints 2 bInterfaceClass 10 CDC Data bInterfaceSubClass 0 Unused bInterfaceProtocol 0 iInterface 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x85 EP 5 IN bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x03 EP 3 OUT bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 0 Interface Association: bLength 8 bDescriptorType 11 bFirstInterface 5 bInterfaceCount 2 bFunctionClass 2 Communications bFunctionSubClass 2 Abstract (modem) bFunctionProtocol 1 AT-commands (v.25ter) iFunction 0 Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 5 bAlternateSetting 0 bNumEndpoints 1 bInterfaceClass 2 Communications bInterfaceSubClass 6 Ethernet Networking bInterfaceProtocol 0 iInterface 0 CDC Header: bcdCDC 1.10 CDC Ethernet: iMacAddress 1 (??) bmEthernetStatistics 0x00000000 wMaxSegmentSize 16384 wNumberMCFilters 0x0001 bNumberPowerFilters 0 CDC Union: bMasterInterface 5 bSlaveInterface 6 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x86 EP 6 IN bmAttributes 3 Transfer Type Interrupt Synch Type None Usage Type Data wMaxPacketSize 0x0040 1x 64 bytes bInterval 5 Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 6 bAlternateSetting 0 bNumEndpoints 0 bInterfaceClass 10 CDC Data bInterfaceSubClass 0 Unused bInterfaceProtocol 0 iInterface 0 Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 6 bAlternateSetting 1 bNumEndpoints 2 bInterfaceClass 10 CDC Data bInterfaceSubClass 0 Unused bInterfaceProtocol 0 iInterface 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x87 EP 7 IN bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x04 EP 4 OUT bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 0 Signed-off-by: Giacinto Cifelli Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 2aa31173997f..653da1d2945c 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -419,6 +419,7 @@ static void option_instat_callback(struct urb *urb); #define CINTERION_PRODUCT_PH8 0x0053 #define CINTERION_PRODUCT_AHXX 0x0055 #define CINTERION_PRODUCT_PLXX 0x0060 +#define CINTERION_PRODUCT_EXS82 0x006c #define CINTERION_PRODUCT_PH8_2RMNET 0x0082 #define CINTERION_PRODUCT_PH8_AUDIO 0x0083 #define CINTERION_PRODUCT_AHXX_2RMNET 0x0084 @@ -1902,6 +1903,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_AHXX_AUDIO, 0xff) }, { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_CLS8, 0xff), .driver_info = RSVD(0) | RSVD(4) }, + { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_EXS82, 0xff) }, { USB_DEVICE(CINTERION_VENDOR_ID, CINTERION_PRODUCT_HC28_MDM) }, { USB_DEVICE(CINTERION_VENDOR_ID, CINTERION_PRODUCT_HC28_MDMNET) }, { USB_DEVICE(SIEMENS_VENDOR_ID, CINTERION_PRODUCT_HC25_MDM) }, From 10f78fd0dabbc3856ddd67b09a46abdedb045913 Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Fri, 27 Nov 2020 11:07:38 +0530 Subject: [PATCH 044/296] powerpc/numa: Fix a regression on memoryless node 0 Commit e75130f20b1f ("powerpc/numa: Offline memoryless cpuless node 0") offlines node 0 and expects nodes to be subsequently onlined when CPUs or nodes are detected. Commit 6398eaa26816 ("powerpc/numa: Prefer node id queried from vphn") skips onlining node 0 when CPUs are associated with node 0. On systems with node 0 having CPUs but no memory, this causes node 0 be marked offline. This causes issues at boot time when trying to set memory node for online CPUs while building the zonelist. 0:mon> t [link register ] c000000000400354 __build_all_zonelists+0x164/0x280 [c00000000161bda0] c0000000016533c8 node_states+0x20/0xa0 (unreliable) [c00000000161bdc0] c000000000400384 __build_all_zonelists+0x194/0x280 [c00000000161be30] c000000001041800 build_all_zonelists_init+0x4c/0x118 [c00000000161be80] c0000000004020d0 build_all_zonelists+0x190/0x1b0 [c00000000161bef0] c000000001003cf8 start_kernel+0x18c/0x6a8 [c00000000161bf90] c00000000000adb4 start_here_common+0x1c/0x3e8 0:mon> r R00 = c000000000400354 R16 = 000000000b57a0e8 R01 = c00000000161bda0 R17 = 000000000b57a6b0 R02 = c00000000161ce00 R18 = 000000000b5afee8 R03 = 0000000000000000 R19 = 000000000b6448a0 R04 = 0000000000000000 R20 = fffffffffffffffd R05 = 0000000000000000 R21 = 0000000001400000 R06 = 0000000000000000 R22 = 000000001ec00000 R07 = 0000000000000001 R23 = c000000001175580 R08 = 0000000000000000 R24 = c000000001651ed8 R09 = c0000000017e84d8 R25 = c000000001652480 R10 = 0000000000000000 R26 = c000000001175584 R11 = c000000c7fac0d10 R27 = c0000000019568d0 R12 = c000000000400180 R28 = 0000000000000000 R13 = c000000002200000 R29 = c00000000164dd78 R14 = 000000000b579f78 R30 = 0000000000000000 R15 = 000000000b57a2b8 R31 = c000000001175584 pc = c000000000400194 local_memory_node+0x24/0x80 cfar= c000000000074334 mcount+0xc/0x10 lr = c000000000400354 __build_all_zonelists+0x164/0x280 msr = 8000000002001033 cr = 44002284 ctr = c000000000400180 xer = 0000000000000001 trap = 380 dar = 0000000000001388 dsisr = c00000000161bc90 0:mon> Fix this by setting node to be online while onlining CPUs that belong to node 0. Fixes: e75130f20b1f ("powerpc/numa: Offline memoryless cpuless node 0") Fixes: 6398eaa26816 ("powerpc/numa: Prefer node id queried from vphn") Reported-by: Milan Mohanty Signed-off-by: Srikar Dronamraju Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201127053738.10085-1-srikar@linux.vnet.ibm.com --- arch/powerpc/mm/numa.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 63f61d8b55e5..f2bf98bdcea2 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -742,8 +742,7 @@ static int __init parse_numa_properties(void) of_node_put(cpu); } - if (likely(nid > 0)) - node_set_online(nid); + node_set_online(nid); } get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells); From aec9fe892812ed10d0bffcf309d2a8fc380d8ce6 Mon Sep 17 00:00:00 2001 From: Paul Kocialkowski Date: Tue, 10 Nov 2020 21:04:30 +0100 Subject: [PATCH 045/296] drm/rockchip: Avoid uninitialized use of endpoint id in LVDS In the Rockchip DRM LVDS component driver, the endpoint id provided to drm_of_find_panel_or_bridge is grabbed from the endpoint's reg property. However, the property may be missing in the case of a single endpoint. Initialize the endpoint_id variable to 0 to avoid using an uninitialized variable in that case. Fixes: 34cc0aa25456 ("drm/rockchip: Add support for Rockchip Soc LVDS") Signed-off-by: Paul Kocialkowski Signed-off-by: Heiko Stuebner Link: https://patchwork.freedesktop.org/patch/msgid/20201110200430.1713467-1-paul.kocialkowski@bootlin.com --- drivers/gpu/drm/rockchip/rockchip_lvds.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/rockchip/rockchip_lvds.c b/drivers/gpu/drm/rockchip/rockchip_lvds.c index f292c6a6e20f..41edd0a421b2 100644 --- a/drivers/gpu/drm/rockchip/rockchip_lvds.c +++ b/drivers/gpu/drm/rockchip/rockchip_lvds.c @@ -544,7 +544,7 @@ static int rockchip_lvds_bind(struct device *dev, struct device *master, struct device_node *port, *endpoint; int ret = 0, child_count = 0; const char *name; - u32 endpoint_id; + u32 endpoint_id = 0; lvds->drm_dev = drm_dev; port = of_graph_get_port_by_id(dev->of_node, 1); From 7c4bada12d320d8648ba3ede6f9b6f9e10f1126a Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Fri, 27 Nov 2020 21:04:29 +0100 Subject: [PATCH 046/296] drm/panel: sony-acx565akm: Fix race condition in probe The probe routine acquires the reset GPIO using GPIOD_OUT_LOW. Directly afterwards it calls acx565akm_detect(), which sets the GPIO value to HIGH. If the bootloader initialized the GPIO to HIGH before the probe routine was called, there is only a very short time period of a few instructions where the reset signal is LOW. Exact time depends on compiler optimizations, kernel configuration and alignment of the stars, but I expect it to be always way less than 10us. There are no public datasheets for the panel, but acx565akm_power_on() has a comment with timings and reset period should be at least 10us. So this potentially brings the panel into a half-reset state. The result is, that panel may not work after boot and can get into a working state by re-enabling it (e.g. by blanking + unblanking), since that does a clean reset cycle. This bug has recently been hit by Ivaylo Dimitrov, but there are some older reports which are probably the same bug. At least Tony Lindgren, Peter Ujfalusi and Jarkko Nikula have experienced it in 2017 describing the blank/unblank procedure as possible workaround. Note, that the bug really goes back in time. It has originally been introduced in the predecessor of the omapfb driver in commit 3c45d05be382 ("OMAPDSS: acx565akm panel: handle gpios in panel driver") in 2012. That driver eventually got replaced by a newer one, which had the bug from the beginning in commit 84192742d9c2 ("OMAPDSS: Add Sony ACX565AKM panel driver") and still exists in fbdev world. That driver has later been copied to omapdrm and then was used as a basis for this driver. Last but not least the omapdrm specific driver has been removed in commit 45f16c82db7e ("drm/omap: displays: Remove unused panel drivers"). Reported-by: Jarkko Nikula Reported-by: Peter Ujfalusi Reported-by: Tony Lindgren Reported-by: Aaro Koskinen Reported-by: Ivaylo Dimitrov Cc: Merlijn Wajer Cc: Laurent Pinchart Cc: Tomi Valkeinen Fixes: 1c8fc3f0c5d2 ("drm/panel: Add driver for the Sony ACX565AKM panel") Signed-off-by: Sebastian Reichel Tested-by: Ivaylo Dimitrov Tested-by: Aaro Koskinen Tested-by: Jarkko Nikula Signed-off-by: Sam Ravnborg Link: https://patchwork.freedesktop.org/patch/msgid/20201127200429.129868-1-sebastian.reichel@collabora.com --- drivers/gpu/drm/panel/panel-sony-acx565akm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/panel/panel-sony-acx565akm.c b/drivers/gpu/drm/panel/panel-sony-acx565akm.c index e95fdfb16b6c..ba0b3ead150f 100644 --- a/drivers/gpu/drm/panel/panel-sony-acx565akm.c +++ b/drivers/gpu/drm/panel/panel-sony-acx565akm.c @@ -629,7 +629,7 @@ static int acx565akm_probe(struct spi_device *spi) lcd->spi = spi; mutex_init(&lcd->mutex); - lcd->reset_gpio = devm_gpiod_get(&spi->dev, "reset", GPIOD_OUT_LOW); + lcd->reset_gpio = devm_gpiod_get(&spi->dev, "reset", GPIOD_OUT_HIGH); if (IS_ERR(lcd->reset_gpio)) { dev_err(&spi->dev, "failed to get reset GPIO\n"); return PTR_ERR(lcd->reset_gpio); From 6703052fe30fa0d85f1fbbf50171486cb0148d2d Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Wed, 25 Nov 2020 14:49:36 +0200 Subject: [PATCH 047/296] usb: cdns3: Fix hardware based role switch Hardware based role switch is broken as the driver always skips it. Fix this by registering for SW role switch only if 'usb-role-switch' property is present in the device tree. Fixes: 50642709f659 ("usb: cdns3: core: quit if it uses role switch class") Signed-off-by: Roger Quadros Signed-off-by: Peter Chen --- drivers/usb/cdns3/core.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/drivers/usb/cdns3/core.c b/drivers/usb/cdns3/core.c index a0f73d4711ae..170deb3eacf0 100644 --- a/drivers/usb/cdns3/core.c +++ b/drivers/usb/cdns3/core.c @@ -427,7 +427,6 @@ static irqreturn_t cdns3_wakeup_irq(int irq, void *data) */ static int cdns3_probe(struct platform_device *pdev) { - struct usb_role_switch_desc sw_desc = { }; struct device *dev = &pdev->dev; struct resource *res; struct cdns3 *cdns; @@ -529,18 +528,21 @@ static int cdns3_probe(struct platform_device *pdev) if (ret) goto err2; - sw_desc.set = cdns3_role_set; - sw_desc.get = cdns3_role_get; - sw_desc.allow_userspace_control = true; - sw_desc.driver_data = cdns; - if (device_property_read_bool(dev, "usb-role-switch")) + if (device_property_read_bool(dev, "usb-role-switch")) { + struct usb_role_switch_desc sw_desc = { }; + + sw_desc.set = cdns3_role_set; + sw_desc.get = cdns3_role_get; + sw_desc.allow_userspace_control = true; + sw_desc.driver_data = cdns; sw_desc.fwnode = dev->fwnode; - cdns->role_sw = usb_role_switch_register(dev, &sw_desc); - if (IS_ERR(cdns->role_sw)) { - ret = PTR_ERR(cdns->role_sw); - dev_warn(dev, "Unable to register Role Switch\n"); - goto err3; + cdns->role_sw = usb_role_switch_register(dev, &sw_desc); + if (IS_ERR(cdns->role_sw)) { + ret = PTR_ERR(cdns->role_sw); + dev_warn(dev, "Unable to register Role Switch\n"); + goto err3; + } } if (cdns->wakeup_irq) { @@ -582,7 +584,8 @@ static int cdns3_probe(struct platform_device *pdev) return 0; err4: cdns3_drd_exit(cdns); - usb_role_switch_unregister(cdns->role_sw); + if (cdns->role_sw) + usb_role_switch_unregister(cdns->role_sw); err3: set_phy_power_off(cdns); err2: From 24fdaeeb294c14ea743ec99ada92380c060a428a Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Wed, 25 Nov 2020 20:35:23 +0800 Subject: [PATCH 048/296] usb: cdns3: gadget: clear trb->length as zero after preparing every trb It clears trb->length as zero before preparing td, but if scatter buffer is used for td, there are several trbs within td, it needs to clear every trb->length as zero, otherwise, the default value for trb->length may not be zero after it begins to use the second round of trb rings. Fixes: abc6b579048e ("usb: cdns3: gadget: using correct sg operations") Signed-off-by: Peter Chen --- drivers/usb/cdns3/gadget.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/cdns3/gadget.c b/drivers/usb/cdns3/gadget.c index 365f30fb1159..0aa85cc07ff1 100644 --- a/drivers/usb/cdns3/gadget.c +++ b/drivers/usb/cdns3/gadget.c @@ -1260,6 +1260,7 @@ static int cdns3_ep_run_transfer(struct cdns3_endpoint *priv_ep, priv_req->end_trb = priv_ep->enqueue; cdns3_ep_inc_enq(priv_ep); trb = priv_ep->trb_pool + priv_ep->enqueue; + trb->length = 0; } while (sg_iter < num_trb); trb = priv_req->trb; From 6b8137517e70f6e96d0251a98930b1f29d0be161 Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Thu, 26 Nov 2020 14:02:38 +0800 Subject: [PATCH 049/296] usb: cdns3: core: fix goto label for error path The usb_role_switch_register has been already called, so if the devm_request_irq has failed, it needs to call usb_role_switch_unregister. Fixes: b1234e3b3b26 ("usb: cdns3: add runtime PM support") Signed-off-by: Peter Chen --- drivers/usb/cdns3/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/cdns3/core.c b/drivers/usb/cdns3/core.c index 170deb3eacf0..039ab5d2435e 100644 --- a/drivers/usb/cdns3/core.c +++ b/drivers/usb/cdns3/core.c @@ -553,7 +553,7 @@ static int cdns3_probe(struct platform_device *pdev) if (ret) { dev_err(cdns->dev, "couldn't register wakeup irq handler\n"); - goto err3; + goto err4; } } From f0992098cadb4c9c6a00703b66cafe604e178fea Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Sun, 29 Nov 2020 20:35:23 +0100 Subject: [PATCH 050/296] speakup: Reject setting the speakup line discipline outside of speakup Speakup exposing a line discipline allows userland to try to use it, while it is deemed to be useless, and thus uselessly exposes potential bugs. One of them is simply that in such a case if the line sends data, spk_ttyio_receive_buf2 is called and crashes since spk_ttyio_synth is NULL. This change restricts the use of the speakup line discipline to speakup drivers, thus avoiding such kind of issues altogether. Cc: stable@vger.kernel.org Reported-by: Shisong Qin Signed-off-by: Samuel Thibault Tested-by: Shisong Qin Link: https://lore.kernel.org/r/20201129193523.hm3f6n5xrn6fiyyc@function Signed-off-by: Greg Kroah-Hartman --- drivers/accessibility/speakup/spk_ttyio.c | 37 ++++++++++++++--------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/drivers/accessibility/speakup/spk_ttyio.c b/drivers/accessibility/speakup/spk_ttyio.c index 669392f31d4e..6284aff434a1 100644 --- a/drivers/accessibility/speakup/spk_ttyio.c +++ b/drivers/accessibility/speakup/spk_ttyio.c @@ -47,27 +47,20 @@ static int spk_ttyio_ldisc_open(struct tty_struct *tty) { struct spk_ldisc_data *ldisc_data; + if (tty != speakup_tty) + /* Somebody tried to use this line discipline outside speakup */ + return -ENODEV; + if (!tty->ops->write) return -EOPNOTSUPP; - mutex_lock(&speakup_tty_mutex); - if (speakup_tty) { - mutex_unlock(&speakup_tty_mutex); - return -EBUSY; - } - speakup_tty = tty; - ldisc_data = kmalloc(sizeof(*ldisc_data), GFP_KERNEL); - if (!ldisc_data) { - speakup_tty = NULL; - mutex_unlock(&speakup_tty_mutex); + if (!ldisc_data) return -ENOMEM; - } init_completion(&ldisc_data->completion); ldisc_data->buf_free = true; - speakup_tty->disc_data = ldisc_data; - mutex_unlock(&speakup_tty_mutex); + tty->disc_data = ldisc_data; return 0; } @@ -191,9 +184,25 @@ static int spk_ttyio_initialise_ldisc(struct spk_synth *synth) tty_unlock(tty); + mutex_lock(&speakup_tty_mutex); + speakup_tty = tty; ret = tty_set_ldisc(tty, N_SPEAKUP); if (ret) - pr_err("speakup: Failed to set N_SPEAKUP on tty\n"); + speakup_tty = NULL; + mutex_unlock(&speakup_tty_mutex); + + if (!ret) + /* Success */ + return 0; + + pr_err("speakup: Failed to set N_SPEAKUP on tty\n"); + + tty_lock(tty); + if (tty->ops->close) + tty->ops->close(tty, NULL); + tty_unlock(tty); + + tty_kclose(tty); return ret; } From c8c39fbd01d42c30454e42c16bcd69c17260b90a Mon Sep 17 00:00:00 2001 From: Ofir Bitton Date: Thu, 26 Nov 2020 13:01:11 +0200 Subject: [PATCH 051/296] habanalabs: free host huge va_range if not used If huge range is not valid, driver uses the host range also for huge page allocations, but driver never frees its allocation. This introduces a memory leak every time a user closes its context. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/memory.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c index 84227819e4d1..bfe223abf142 100644 --- a/drivers/misc/habanalabs/common/memory.c +++ b/drivers/misc/habanalabs/common/memory.c @@ -1626,6 +1626,7 @@ static int vm_ctx_init_with_ranges(struct hl_ctx *ctx, goto host_hpage_range_err; } } else { + kfree(ctx->host_huge_va_range); ctx->host_huge_va_range = ctx->host_va_range; } From 5555b7c56bdec7a29c789fec27f84d40f52fbdfa Mon Sep 17 00:00:00 2001 From: Ofir Bitton Date: Sun, 29 Nov 2020 09:34:12 +0200 Subject: [PATCH 052/296] habanalabs: put devices before driver removal Driver never puts its device and control_device objects, hence a memory leak is introduced every driver removal. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/device.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index 20572224099a..783bbdcb1e61 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -231,16 +231,16 @@ delete_cdev_device: static void device_cdev_sysfs_del(struct hl_device *hdev) { - /* device_release() won't be called so must free devices explicitly */ - if (!hdev->cdev_sysfs_created) { - kfree(hdev->dev_ctrl); - kfree(hdev->dev); - return; - } + if (!hdev->cdev_sysfs_created) + goto put_devices; hl_sysfs_fini(hdev); cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl); cdev_device_del(&hdev->cdev, hdev->dev); + +put_devices: + put_device(hdev->dev); + put_device(hdev->dev_ctrl); } /* @@ -1371,9 +1371,9 @@ sw_fini: early_fini: device_early_fini(hdev); free_dev_ctrl: - kfree(hdev->dev_ctrl); + put_device(hdev->dev_ctrl); free_dev: - kfree(hdev->dev); + put_device(hdev->dev); out_disabled: hdev->disabled = true; if (add_cdev_sysfs_on_err) From 509920aee72ae23235615a009c5148cdb38794c3 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sat, 28 Nov 2020 10:37:07 +0000 Subject: [PATCH 053/296] MAINTAINERS: Move Jason Cooper to CREDITS Jason's email address has now been bouncing for weeks, and no reply was received when trying to reach out on other addresses. We really hope he is OK. But until we hear of his whereabouts, let's move him to the CREDITS file so that people stop Cc-ing him. Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Acked-by: Gregory CLEMENT Acked-by: Andrew Lunn Link: https://lore.kernel.org/r/20201128103707.332874-1-maz@kernel.org --- CREDITS | 5 +++++ MAINTAINERS | 4 ---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/CREDITS b/CREDITS index 748301954ab7..e88d1a783a80 100644 --- a/CREDITS +++ b/CREDITS @@ -740,6 +740,11 @@ S: (ask for current address) S: Portland, Oregon S: USA +N: Jason Cooper +D: ARM/Marvell SOC co-maintainer +D: irqchip co-maintainer +D: MVEBU PCI DRIVER co-maintainer + N: Robin Cornelius E: robincornelius@users.sourceforge.net D: Ralink rt2x00 WLAN driver diff --git a/MAINTAINERS b/MAINTAINERS index 2daa6ee673f7..4f27f43b2e0c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2014,7 +2014,6 @@ M: Philipp Zabel S: Maintained ARM/Marvell Dove/MV78xx0/Orion SOC support -M: Jason Cooper M: Andrew Lunn M: Sebastian Hesselbarth M: Gregory Clement @@ -2031,7 +2030,6 @@ F: arch/arm/plat-orion/ F: drivers/soc/dove/ ARM/Marvell Kirkwood and Armada 370, 375, 38x, 39x, XP, 3700, 7K/8K, CN9130 SOC support -M: Jason Cooper M: Andrew Lunn M: Gregory Clement M: Sebastian Hesselbarth @@ -9248,7 +9246,6 @@ F: kernel/irq/ IRQCHIP DRIVERS M: Thomas Gleixner -M: Jason Cooper M: Marc Zyngier L: linux-kernel@vger.kernel.org S: Maintained @@ -13394,7 +13391,6 @@ F: drivers/pci/controller/mobiveil/pcie-mobiveil* PCI DRIVER FOR MVEBU (Marvell Armada 370 and Armada XP SOC support) M: Thomas Petazzoni -M: Jason Cooper L: linux-pci@vger.kernel.org L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained From fd4e788e971ce763e50762d7b1a0048992949dd0 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Fri, 27 Nov 2020 10:52:41 +0200 Subject: [PATCH 054/296] drm/omap: sdi: fix bridge enable/disable When the SDI output was converted to DRM bridge, the atomic versions of enable and disable funcs were used. This was not intended, as that would require implementing other atomic funcs too. This leads to: WARNING: CPU: 0 PID: 18 at drivers/gpu/drm/drm_bridge.c:708 drm_atomic_helper_commit_modeset_enables+0x134/0x268 and display not working. Fix this by using the legacy enable/disable funcs. Fixes: 8bef8a6d5da81b909a190822b96805a47348146f ("drm/omap: sdi: Register a drm_bridge") Reported-by: Aaro Koskinen Signed-off-by: Tomi Valkeinen Tested-by: Ivaylo Dimitrov Tested-by: Aaro Koskinen Reviewed-by: Laurent Pinchart Cc: stable@vger.kernel.org # v5.7+ Link: https://patchwork.freedesktop.org/patch/msgid/20201127085241.848461-1-tomi.valkeinen@ti.com --- drivers/gpu/drm/omapdrm/dss/sdi.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/omapdrm/dss/sdi.c b/drivers/gpu/drm/omapdrm/dss/sdi.c index 033fd30074b0..282e4c837cd9 100644 --- a/drivers/gpu/drm/omapdrm/dss/sdi.c +++ b/drivers/gpu/drm/omapdrm/dss/sdi.c @@ -195,8 +195,7 @@ static void sdi_bridge_mode_set(struct drm_bridge *bridge, sdi->pixelclock = adjusted_mode->clock * 1000; } -static void sdi_bridge_enable(struct drm_bridge *bridge, - struct drm_bridge_state *bridge_state) +static void sdi_bridge_enable(struct drm_bridge *bridge) { struct sdi_device *sdi = drm_bridge_to_sdi(bridge); struct dispc_clock_info dispc_cinfo; @@ -259,8 +258,7 @@ err_get_dispc: regulator_disable(sdi->vdds_sdi_reg); } -static void sdi_bridge_disable(struct drm_bridge *bridge, - struct drm_bridge_state *bridge_state) +static void sdi_bridge_disable(struct drm_bridge *bridge) { struct sdi_device *sdi = drm_bridge_to_sdi(bridge); @@ -278,8 +276,8 @@ static const struct drm_bridge_funcs sdi_bridge_funcs = { .mode_valid = sdi_bridge_mode_valid, .mode_fixup = sdi_bridge_mode_fixup, .mode_set = sdi_bridge_mode_set, - .atomic_enable = sdi_bridge_enable, - .atomic_disable = sdi_bridge_disable, + .enable = sdi_bridge_enable, + .disable = sdi_bridge_disable, }; static void sdi_bridge_init(struct sdi_device *sdi) From bb4c6910c8b41623104c2e64a30615682689a54d Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Thu, 26 Nov 2020 09:28:51 +0100 Subject: [PATCH 055/296] genirq/irqdomain: Add an irq_create_mapping_affinity() function There is currently no way to convey the affinity of an interrupt via irq_create_mapping(), which creates issues for devices that expect that affinity to be managed by the kernel. In order to sort this out, rename irq_create_mapping() to irq_create_mapping_affinity() with an additional affinity parameter that can be passed down to irq_domain_alloc_descs(). irq_create_mapping() is re-implemented as a wrapper around irq_create_mapping_affinity(). No functional change. Fixes: e75eafb9b039 ("genirq/msi: Switch to new irq spreading infrastructure") Signed-off-by: Laurent Vivier Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kurz Cc: Michael Ellerman Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20201126082852.1178497-2-lvivier@redhat.com --- include/linux/irqdomain.h | 12 ++++++++++-- kernel/irq/irqdomain.c | 13 ++++++++----- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 71535e87109f..ea5a337e0f8b 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -384,11 +384,19 @@ extern void irq_domain_associate_many(struct irq_domain *domain, extern void irq_domain_disassociate(struct irq_domain *domain, unsigned int irq); -extern unsigned int irq_create_mapping(struct irq_domain *host, - irq_hw_number_t hwirq); +extern unsigned int irq_create_mapping_affinity(struct irq_domain *host, + irq_hw_number_t hwirq, + const struct irq_affinity_desc *affinity); extern unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec); extern void irq_dispose_mapping(unsigned int virq); +static inline unsigned int irq_create_mapping(struct irq_domain *host, + irq_hw_number_t hwirq) +{ + return irq_create_mapping_affinity(host, hwirq, NULL); +} + + /** * irq_linear_revmap() - Find a linux irq from a hw irq number. * @domain: domain owning this hardware interrupt diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index cf8b374b892d..e4ca69608f3b 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -624,17 +624,19 @@ unsigned int irq_create_direct_mapping(struct irq_domain *domain) EXPORT_SYMBOL_GPL(irq_create_direct_mapping); /** - * irq_create_mapping() - Map a hardware interrupt into linux irq space + * irq_create_mapping_affinity() - Map a hardware interrupt into linux irq space * @domain: domain owning this hardware interrupt or NULL for default domain * @hwirq: hardware irq number in that domain space + * @affinity: irq affinity * * Only one mapping per hardware interrupt is permitted. Returns a linux * irq number. * If the sense/trigger is to be specified, set_irq_type() should be called * on the number returned from that call. */ -unsigned int irq_create_mapping(struct irq_domain *domain, - irq_hw_number_t hwirq) +unsigned int irq_create_mapping_affinity(struct irq_domain *domain, + irq_hw_number_t hwirq, + const struct irq_affinity_desc *affinity) { struct device_node *of_node; int virq; @@ -660,7 +662,8 @@ unsigned int irq_create_mapping(struct irq_domain *domain, } /* Allocate a virtual interrupt number */ - virq = irq_domain_alloc_descs(-1, 1, hwirq, of_node_to_nid(of_node), NULL); + virq = irq_domain_alloc_descs(-1, 1, hwirq, of_node_to_nid(of_node), + affinity); if (virq <= 0) { pr_debug("-> virq allocation failed\n"); return 0; @@ -676,7 +679,7 @@ unsigned int irq_create_mapping(struct irq_domain *domain, return virq; } -EXPORT_SYMBOL_GPL(irq_create_mapping); +EXPORT_SYMBOL_GPL(irq_create_mapping_affinity); /** * irq_create_strict_mappings() - Map a range of hw irqs to fixed linux irqs From 9ea69a55b3b9a71cded9726af591949c1138f235 Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Thu, 26 Nov 2020 09:28:52 +0100 Subject: [PATCH 056/296] powerpc/pseries: Pass MSI affinity to irq_create_mapping() With virtio multiqueue, normally each queue IRQ is mapped to a CPU. Commit 0d9f0a52c8b9f ("virtio_scsi: use virtio IRQ affinity") exposed an existing shortcoming of the arch code by moving virtio_scsi to the automatic IRQ affinity assignment. The affinity is correctly computed in msi_desc but this is not applied to the system IRQs. It appears the affinity is correctly passed to rtas_setup_msi_irqs() but lost at this point and never passed to irq_domain_alloc_descs() (see commit 06ee6d571f0e ("genirq: Add affinity hint to irq allocation")) because irq_create_mapping() doesn't take an affinity parameter. Use the new irq_create_mapping_affinity() function, which allows to forward the affinity setting from rtas_setup_msi_irqs() to irq_domain_alloc_descs(). With this change, the virtqueues are correctly dispatched between the CPUs on pseries. Fixes: e75eafb9b039 ("genirq/msi: Switch to new irq spreading infrastructure") Signed-off-by: Laurent Vivier Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kurz Acked-by: Michael Ellerman Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20201126082852.1178497-3-lvivier@redhat.com --- arch/powerpc/platforms/pseries/msi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c index 133f6adcb39c..b3ac2455faad 100644 --- a/arch/powerpc/platforms/pseries/msi.c +++ b/arch/powerpc/platforms/pseries/msi.c @@ -458,7 +458,8 @@ again: return hwirq; } - virq = irq_create_mapping(NULL, hwirq); + virq = irq_create_mapping_affinity(NULL, hwirq, + entry->affinity); if (!virq) { pr_debug("rtas_msi: Failed mapping hwirq %d\n", hwirq); From 63e2fffa59a9dd91e443b08832656399fd80b7f0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 15 Nov 2020 17:37:37 -0500 Subject: [PATCH 057/296] pNFS/flexfiles: Fix array overflow when flexfiles mirroring is enabled If the flexfiles mirroring is enabled, then the read code expects to be able to set pgio->pg_mirror_idx to point to the data server that is being used for this particular read. However it does not change the pg_mirror_count because we only need to send a single read. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/flexfilelayout/flexfilelayout.c | 27 ++++++++++++++----- fs/nfs/pagelist.c | 36 +++++++++++++++++++------- include/linux/nfs_page.h | 4 +++ 3 files changed, 52 insertions(+), 15 deletions(-) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index a163533446fa..24bf5797f88a 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -838,7 +838,7 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_pgio_mirror *pgm; struct nfs4_ff_layout_mirror *mirror; struct nfs4_pnfs_ds *ds; - u32 ds_idx, i; + u32 ds_idx; retry: ff_layout_pg_check_layout(pgio, req); @@ -864,11 +864,9 @@ retry: goto retry; } - for (i = 0; i < pgio->pg_mirror_count; i++) { - mirror = FF_LAYOUT_COMP(pgio->pg_lseg, i); - pgm = &pgio->pg_mirrors[i]; - pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].rsize; - } + mirror = FF_LAYOUT_COMP(pgio->pg_lseg, ds_idx); + pgm = &pgio->pg_mirrors[0]; + pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].rsize; pgio->pg_mirror_idx = ds_idx; @@ -985,6 +983,21 @@ out: return 1; } +static u32 +ff_layout_pg_set_mirror_write(struct nfs_pageio_descriptor *desc, u32 idx) +{ + u32 old = desc->pg_mirror_idx; + + desc->pg_mirror_idx = idx; + return old; +} + +static struct nfs_pgio_mirror * +ff_layout_pg_get_mirror_write(struct nfs_pageio_descriptor *desc, u32 idx) +{ + return &desc->pg_mirrors[idx]; +} + static const struct nfs_pageio_ops ff_layout_pg_read_ops = { .pg_init = ff_layout_pg_init_read, .pg_test = pnfs_generic_pg_test, @@ -998,6 +1011,8 @@ static const struct nfs_pageio_ops ff_layout_pg_write_ops = { .pg_doio = pnfs_generic_pg_writepages, .pg_get_mirror_count = ff_layout_pg_get_mirror_count_write, .pg_cleanup = pnfs_generic_pg_cleanup, + .pg_get_mirror = ff_layout_pg_get_mirror_write, + .pg_set_mirror = ff_layout_pg_set_mirror_write, }; static void ff_layout_reset_write(struct nfs_pgio_header *hdr, bool retry_pnfs) diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 6985cacf4700..78c9c4bdef2b 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -31,13 +31,29 @@ static struct kmem_cache *nfs_page_cachep; static const struct rpc_call_ops nfs_pgio_common_ops; +static struct nfs_pgio_mirror * +nfs_pgio_get_mirror(struct nfs_pageio_descriptor *desc, u32 idx) +{ + if (desc->pg_ops->pg_get_mirror) + return desc->pg_ops->pg_get_mirror(desc, idx); + return &desc->pg_mirrors[0]; +} + struct nfs_pgio_mirror * nfs_pgio_current_mirror(struct nfs_pageio_descriptor *desc) { - return &desc->pg_mirrors[desc->pg_mirror_idx]; + return nfs_pgio_get_mirror(desc, desc->pg_mirror_idx); } EXPORT_SYMBOL_GPL(nfs_pgio_current_mirror); +static u32 +nfs_pgio_set_current_mirror(struct nfs_pageio_descriptor *desc, u32 idx) +{ + if (desc->pg_ops->pg_set_mirror) + return desc->pg_ops->pg_set_mirror(desc, idx); + return desc->pg_mirror_idx; +} + void nfs_pgheader_init(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr, void (*release)(struct nfs_pgio_header *hdr)) @@ -1259,7 +1275,7 @@ static void nfs_pageio_error_cleanup(struct nfs_pageio_descriptor *desc) return; for (midx = 0; midx < desc->pg_mirror_count; midx++) { - mirror = &desc->pg_mirrors[midx]; + mirror = nfs_pgio_get_mirror(desc, midx); desc->pg_completion_ops->error_cleanup(&mirror->pg_list, desc->pg_error); } @@ -1293,12 +1309,12 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, goto out_failed; } - desc->pg_mirror_idx = midx; + nfs_pgio_set_current_mirror(desc, midx); if (!nfs_pageio_add_request_mirror(desc, dupreq)) goto out_cleanup_subreq; } - desc->pg_mirror_idx = 0; + nfs_pgio_set_current_mirror(desc, 0); if (!nfs_pageio_add_request_mirror(desc, req)) goto out_failed; @@ -1320,10 +1336,12 @@ out_failed: static void nfs_pageio_complete_mirror(struct nfs_pageio_descriptor *desc, u32 mirror_idx) { - struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[mirror_idx]; - u32 restore_idx = desc->pg_mirror_idx; + struct nfs_pgio_mirror *mirror; + u32 restore_idx; + + restore_idx = nfs_pgio_set_current_mirror(desc, mirror_idx); + mirror = nfs_pgio_current_mirror(desc); - desc->pg_mirror_idx = mirror_idx; for (;;) { nfs_pageio_doio(desc); if (desc->pg_error < 0 || !mirror->pg_recoalesce) @@ -1331,7 +1349,7 @@ static void nfs_pageio_complete_mirror(struct nfs_pageio_descriptor *desc, if (!nfs_do_recoalesce(desc)) break; } - desc->pg_mirror_idx = restore_idx; + nfs_pgio_set_current_mirror(desc, restore_idx); } /* @@ -1405,7 +1423,7 @@ void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index) u32 midx; for (midx = 0; midx < desc->pg_mirror_count; midx++) { - mirror = &desc->pg_mirrors[midx]; + mirror = nfs_pgio_get_mirror(desc, midx); if (!list_empty(&mirror->pg_list)) { prev = nfs_list_entry(mirror->pg_list.prev); if (index != prev->wb_index + 1) { diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index c32c15216da3..f0373a6cb5fb 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -55,6 +55,7 @@ struct nfs_page { unsigned short wb_nio; /* Number of I/O attempts */ }; +struct nfs_pgio_mirror; struct nfs_pageio_descriptor; struct nfs_pageio_ops { void (*pg_init)(struct nfs_pageio_descriptor *, struct nfs_page *); @@ -64,6 +65,9 @@ struct nfs_pageio_ops { unsigned int (*pg_get_mirror_count)(struct nfs_pageio_descriptor *, struct nfs_page *); void (*pg_cleanup)(struct nfs_pageio_descriptor *); + struct nfs_pgio_mirror * + (*pg_get_mirror)(struct nfs_pageio_descriptor *, u32); + u32 (*pg_set_mirror)(struct nfs_pageio_descriptor *, u32); }; struct nfs_rw_ops { From 2d280bc8930ba9ed1705cfd548c6c8924949eaf1 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sun, 29 Nov 2020 18:33:32 +0000 Subject: [PATCH 058/296] io_uring: fix recvmsg setup with compat buf-select __io_compat_recvmsg_copy_hdr() with REQ_F_BUFFER_SELECT reads out iov len but never assigns it to iov/fast_iov, leaving sr->len with garbage. Hopefully, following io_buffer_select() truncates it to the selected buffer size, but the value is still may be under what was specified. Cc: # 5.7 Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 1023f7b44cea..a2a7c65a77aa 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -4499,7 +4499,8 @@ static int __io_compat_recvmsg_copy_hdr(struct io_kiocb *req, return -EFAULT; if (clen < 0) return -EINVAL; - sr->len = iomsg->iov[0].iov_len; + sr->len = clen; + iomsg->iov[0].iov_len = clen; iomsg->iov = NULL; } else { ret = __import_iovec(READ, (struct iovec __user *)uiov, len, From 271e0c9dce1b02a825b3cc1a7aa1fab7c381d44b Mon Sep 17 00:00:00 2001 From: Libo Chen Date: Fri, 20 Nov 2020 18:12:43 -0800 Subject: [PATCH 059/296] ktest.pl: Fix incorrect reboot for grub2bls This issue was first noticed when I was testing different kernels on Oracle Linux 8 which as Fedora 30+ adopts BLS as default. Even though a kernel entry was added successfully and the index of that kernel entry was retrieved correctly, ktest still wouldn't reboot the system into user-specified kernel. The bug was spotted in subroutine reboot_to where the if-statement never checks for REBOOT_TYPE "grub2bls", therefore the desired entry will not be set for the next boot. Add a check for "grub2bls" so that $grub_reboot $grub_number can be run before a reboot if REBOOT_TYPE is "grub2bls" then we can boot to the correct kernel. Link: https://lkml.kernel.org/r/20201121021243.1532477-1-libo.chen@oracle.com Cc: stable@vger.kernel.org Fixes: ac2466456eaa ("ktest: introduce grub2bls REBOOT_TYPE option") Signed-off-by: Libo Chen Signed-off-by: Steven Rostedt (VMware) --- tools/testing/ktest/ktest.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index cb16d2aac51c..54188ee16c48 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -2040,7 +2040,7 @@ sub reboot_to { if ($reboot_type eq "grub") { run_ssh "'(echo \"savedefault --default=$grub_number --once\" | grub --batch)'"; - } elsif ($reboot_type eq "grub2") { + } elsif (($reboot_type eq "grub2") or ($reboot_type eq "grub2bls")) { run_ssh "$grub_reboot $grub_number"; } elsif ($reboot_type eq "syslinux") { run_ssh "$syslinux --once \\\"$syslinux_label\\\" $syslinux_path"; From ed1182dc004dbcc7cfe64fb0e8ac520b25431715 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Fri, 27 Nov 2020 18:17:26 +0100 Subject: [PATCH 060/296] xdp: Handle MEM_TYPE_XSK_BUFF_POOL correctly in xdp_return_buff() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It turns out that it does exist a path where xdp_return_buff() is being passed an XDP buffer of type MEM_TYPE_XSK_BUFF_POOL. This path is when AF_XDP zero-copy mode is enabled, and a buffer is redirected to a DEVMAP with an attached XDP program that drops the buffer. This change simply puts the handling of MEM_TYPE_XSK_BUFF_POOL back into xdp_return_buff(). Fixes: 82c41671ca4f ("xdp: Simplify xdp_return_{frame, frame_rx_napi, buff}") Reported-by: Maxim Mikityanskiy Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann Acked-by: Maxim Mikityanskiy Link: https://lore.kernel.org/bpf/20201127171726.123627-1-bjorn.topel@gmail.com --- net/core/xdp.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/net/core/xdp.c b/net/core/xdp.c index 48aba933a5a8..491ad569a79c 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -335,11 +335,10 @@ EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model); * scenarios (e.g. queue full), it is possible to return the xdp_frame * while still leveraging this protection. The @napi_direct boolean * is used for those calls sites. Thus, allowing for faster recycling - * of xdp_frames/pages in those cases. This path is never used by the - * MEM_TYPE_XSK_BUFF_POOL memory type, so it's explicitly not part of - * the switch-statement. + * of xdp_frames/pages in those cases. */ -static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct) +static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct, + struct xdp_buff *xdp) { struct xdp_mem_allocator *xa; struct page *page; @@ -361,6 +360,10 @@ static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct) page = virt_to_page(data); /* Assumes order0 page*/ put_page(page); break; + case MEM_TYPE_XSK_BUFF_POOL: + /* NB! Only valid from an xdp_buff! */ + xsk_buff_free(xdp); + break; default: /* Not possible, checked in xdp_rxq_info_reg_mem_model() */ WARN(1, "Incorrect XDP memory type (%d) usage", mem->type); @@ -370,19 +373,19 @@ static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct) void xdp_return_frame(struct xdp_frame *xdpf) { - __xdp_return(xdpf->data, &xdpf->mem, false); + __xdp_return(xdpf->data, &xdpf->mem, false, NULL); } EXPORT_SYMBOL_GPL(xdp_return_frame); void xdp_return_frame_rx_napi(struct xdp_frame *xdpf) { - __xdp_return(xdpf->data, &xdpf->mem, true); + __xdp_return(xdpf->data, &xdpf->mem, true, NULL); } EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi); void xdp_return_buff(struct xdp_buff *xdp) { - __xdp_return(xdp->data, &xdp->rxq->mem, true); + __xdp_return(xdp->data, &xdp->rxq->mem, true, xdp); } /* Only called for MEM_TYPE_PAGE_POOL see xdp.h */ From 70e734fed740939704d1b3b76d6f2e6909698586 Mon Sep 17 00:00:00 2001 From: Robert Karszniewicz Date: Fri, 20 Nov 2020 18:51:24 +0100 Subject: [PATCH 061/296] ARM: imx: Use correct SRC base address Commit 4a4fb66119eb ("ARM: imx: Add missing of_node_put()") accidentally forgot to rename a variable, which caused the wrong address to be used and, in our case, the ULL getting falsely identified as ULZ. Fixes: 4a4fb66119eb ("ARM: imx: Add missing of_node_put()") Signed-off-by: Robert Karszniewicz Reviewed-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm/mach-imx/anatop.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-imx/anatop.c b/arch/arm/mach-imx/anatop.c index d841bed8664d..7bb47eb3fc07 100644 --- a/arch/arm/mach-imx/anatop.c +++ b/arch/arm/mach-imx/anatop.c @@ -136,7 +136,7 @@ void __init imx_init_revision_from_anatop(void) src_np = of_find_compatible_node(NULL, NULL, "fsl,imx6ul-src"); - src_base = of_iomap(np, 0); + src_base = of_iomap(src_np, 0); of_node_put(src_np); WARN_ON(!src_base); sbmr2 = readl_relaxed(src_base + SRC_SBMR2); From 777a7717d60ccdc9b84f35074f848d3f746fc3bf Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 26 Nov 2020 14:08:41 +0000 Subject: [PATCH 062/296] drm/i915/gt: Program mocs:63 for cache eviction on gen9 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ville noticed that the last mocs entry is used unconditionally by the HW when it performs cache evictions, and noted that while the value is not meant to be writable by the driver, we should program it to a reasonable value nevertheless. As it turns out, we can change the value of mocs:63 and the value we were programming into it would cause hard hangs in conjunction with atomic operations. v2: Add details from bspec about how it is used by HW Suggested-by: Ville Syrjälä Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2707 Fixes: 3bbaba0ceaa2 ("drm/i915: Added Programming of the MOCS") Signed-off-by: Chris Wilson Cc: Ville Syrjälä Cc: Jason Ekstrand Cc: # v4.3+ Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20201126140841.1982-1-chris@chris-wilson.co.uk (cherry picked from commit 977933b5da7c16f39295c4c1d4259a58ece65dbe) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_mocs.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 313e51e7d4f7..4f74706967fd 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -131,7 +131,19 @@ static const struct drm_i915_mocs_entry skl_mocs_table[] = { GEN9_MOCS_ENTRIES, MOCS_ENTRY(I915_MOCS_CACHED, LE_3_WB | LE_TC_2_LLC_ELLC | LE_LRUM(3), - L3_3_WB) + L3_3_WB), + + /* + * mocs:63 + * - used by the L3 for all of its evictions. + * Thus it is expected to allow LLC cacheability to enable coherent + * flows to be maintained. + * - used to force L3 uncachable cycles. + * Thus it is expected to make the surface L3 uncacheable. + */ + MOCS_ENTRY(63, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_1_UC) }; /* NOTE: the LE_TGT_CACHE is not used on Broxton */ From 58d6bca5efc73235b0f84c0d53321737177c651e Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Mon, 30 Nov 2020 17:54:47 -0300 Subject: [PATCH 063/296] ARM: dts: imx6qdl-wandboard-revd1: Remove PAD_GPIO_6 from enetgrp Since commit 8ad2d1dcce54 ("ARM: dts: imx6qdl-wandboard: Add OV5645 camera support") the PAD_GPIO_6 is used for providing the camera sensor clock. Remove it from the enetgrp to fix the following IOMXU conflict: [ 9.972414] imx6q-pinctrl 20e0000.pinctrl: pin MX6Q_PAD_GPIO_6 already requested by 2188000.ethernet; cannot claim for 1-003c [ 9.983857] imx6q-pinctrl 20e0000.pinctrl: pin-140 (1-003c) status -22 [ 9.990514] imx6q-pinctrl 20e0000.pinctrl: could not request pin 140 (MX6Q_PAD_GPIO_6) from group ov5645grp on device 20e0000.pinctrl Fixes: 8ad2d1dcce54 ("ARM: dts: imx6qdl-wandboard: Add OV5645 camera support") Signed-off-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx6qdl-wandboard-revd1.dtsi | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm/boot/dts/imx6qdl-wandboard-revd1.dtsi b/arch/arm/boot/dts/imx6qdl-wandboard-revd1.dtsi index 93909796885a..b9b698f72b26 100644 --- a/arch/arm/boot/dts/imx6qdl-wandboard-revd1.dtsi +++ b/arch/arm/boot/dts/imx6qdl-wandboard-revd1.dtsi @@ -166,7 +166,6 @@ MX6QDL_PAD_RGMII_RD2__RGMII_RD2 0x1b030 MX6QDL_PAD_RGMII_RD3__RGMII_RD3 0x1b030 MX6QDL_PAD_RGMII_RX_CTL__RGMII_RX_CTL 0x1b030 - MX6QDL_PAD_GPIO_6__ENET_IRQ 0x000b1 >; }; From 19ba8fb810c60b46869acc9f455613de454e0fca Mon Sep 17 00:00:00 2001 From: Bernd Bauer Date: Thu, 26 Nov 2020 18:56:28 +0100 Subject: [PATCH 064/296] ARM: dts: imx6qdl-kontron-samx6i: fix I2C_PM scl pin Use the correct pin for the i2c scl signal else we can't access the SoM eeprom. Fixes: 2a51f9dae13d ("ARM: dts: imx6qdl-kontron-samx6i: Add iMX6-based Kontron SMARC-sAMX6i module") Signed-off-by: Bernd Bauer [m.felsch@pengutronix.de: Adapt commit message] Signed-off-by: Marco Felsch Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx6qdl-kontron-samx6i.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/imx6qdl-kontron-samx6i.dtsi b/arch/arm/boot/dts/imx6qdl-kontron-samx6i.dtsi index 265f5f3dbff6..24f793ca2886 100644 --- a/arch/arm/boot/dts/imx6qdl-kontron-samx6i.dtsi +++ b/arch/arm/boot/dts/imx6qdl-kontron-samx6i.dtsi @@ -551,7 +551,7 @@ pinctrl_i2c3: i2c3grp { fsl,pins = < - MX6QDL_PAD_GPIO_3__I2C3_SCL 0x4001b8b1 + MX6QDL_PAD_GPIO_5__I2C3_SCL 0x4001b8b1 MX6QDL_PAD_GPIO_16__I2C3_SDA 0x4001b8b1 >; }; From 6112ff4e8f393e7e297dff04eff0987f94d37fa1 Mon Sep 17 00:00:00 2001 From: Jing Xiangfeng Date: Fri, 27 Nov 2020 11:02:06 +0800 Subject: [PATCH 065/296] scsi: storvsc: Fix error return in storvsc_probe() Return -ENOMEM from the error handling case instead of 0. Link: https://lore.kernel.org/r/20201127030206.104616-1-jingxiangfeng@huawei.com Fixes: 436ad9413353 ("scsi: storvsc: Allow only one remove lun work item to be issued per lun") Reviewed-by: Michael Kelley Signed-off-by: Jing Xiangfeng Signed-off-by: Martin K. Petersen --- drivers/scsi/storvsc_drv.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index 0c65fbd41035..ded00a89bfc4 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -1994,8 +1994,10 @@ static int storvsc_probe(struct hv_device *device, alloc_ordered_workqueue("storvsc_error_wq_%d", WQ_MEM_RECLAIM, host->host_no); - if (!host_dev->handle_error_wq) + if (!host_dev->handle_error_wq) { + ret = -ENOMEM; goto err_out2; + } INIT_WORK(&host_dev->host_scan_work, storvsc_host_scan); /* Register the HBA and start the scsi bus scan */ ret = scsi_add_host(host, &device->device); From 3b8c72d076c42bf27284cda7b2b2b522810686f8 Mon Sep 17 00:00:00 2001 From: "Andrea Parri (Microsoft)" Date: Wed, 18 Nov 2020 15:53:48 +0100 Subject: [PATCH 066/296] scsi: storvsc: Validate length of incoming packet in storvsc_on_channel_callback() Check that the packet is of the expected size at least, don't copy data past the packet. Link: https://lore.kernel.org/r/20201118145348.109879-1-parri.andrea@gmail.com Cc: "James E.J. Bottomley" Cc: "Martin K. Petersen" Cc: linux-scsi@vger.kernel.org Reported-by: Saruhan Karademir Signed-off-by: Andrea Parri (Microsoft) Signed-off-by: Martin K. Petersen --- drivers/scsi/storvsc_drv.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index ded00a89bfc4..99c8ff81de74 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -1246,6 +1246,11 @@ static void storvsc_on_channel_callback(void *context) request = (struct storvsc_cmd_request *) ((unsigned long)desc->trans_id); + if (hv_pkt_datalen(desc) < sizeof(struct vstor_packet) - vmscsi_size_delta) { + dev_err(&device->device, "Invalid packet len\n"); + continue; + } + if (request == &stor_device->init_request || request == &stor_device->reset_request) { memcpy(&request->vstor_packet, packet, From 42f687038bcc34aa919e0e4c29b04e4cda3f6a79 Mon Sep 17 00:00:00 2001 From: Suganath Prabu S Date: Wed, 25 Nov 2020 15:18:38 +0530 Subject: [PATCH 067/296] scsi: mpt3sas: Fix ioctl timeout Commit c1a6c5ac4278 ("scsi: mpt3sas: For NVME device, issue a protocol level reset") modified the ioctl path 'timeout' variable type to u8 from unsigned long, limiting the maximum timeout value that the driver can support to 255 seconds. If the management application is requesting a higher value the resulting timeout will be zero. The operation times out immediately and the ioctl request fails. Change datatype back to unsigned long. Link: https://lore.kernel.org/r/20201125094838.4340-1-suganath-prabu.subramani@broadcom.com Fixes: c1a6c5ac4278 ("scsi: mpt3sas: For NVME device, issue a protocol level reset") Cc: #v4.18+ Signed-off-by: Suganath Prabu S Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_ctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_ctl.c b/drivers/scsi/mpt3sas/mpt3sas_ctl.c index 0f2b681449e6..edd26a2570fa 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_ctl.c +++ b/drivers/scsi/mpt3sas/mpt3sas_ctl.c @@ -664,7 +664,7 @@ _ctl_do_mpt_command(struct MPT3SAS_ADAPTER *ioc, struct mpt3_ioctl_command karg, Mpi26NVMeEncapsulatedRequest_t *nvme_encap_request = NULL; struct _pcie_device *pcie_device = NULL; u16 smid; - u8 timeout; + unsigned long timeout; u8 issue_reset; u32 sz, sz_arg; void *psge; From 85dad327d9b58b4c9ce08189a2707167de392d23 Mon Sep 17 00:00:00 2001 From: Sreekanth Reddy Date: Mon, 30 Nov 2020 13:57:33 +0530 Subject: [PATCH 068/296] scsi: mpt3sas: Increase IOCInit request timeout to 30s Currently the IOCInit request message timeout is set to 10s. This is not sufficient in some scenarios such as during HBA FW downgrade operations. Increase the IOCInit request timeout to 30s. Link: https://lore.kernel.org/r/20201130082733.26120-1-sreekanth.reddy@broadcom.com Signed-off-by: Sreekanth Reddy Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index e4cc92bc4d94..bb940cbcbb5d 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -6459,7 +6459,7 @@ _base_send_ioc_init(struct MPT3SAS_ADAPTER *ioc) r = _base_handshake_req_reply_wait(ioc, sizeof(Mpi2IOCInitRequest_t), (u32 *)&mpi_request, - sizeof(Mpi2IOCInitReply_t), (u16 *)&mpi_reply, 10); + sizeof(Mpi2IOCInitReply_t), (u16 *)&mpi_reply, 30); if (r != 0) { ioc_err(ioc, "%s: handshake failed (r=%d)\n", __func__, r); From a48491c65b513e5cdc3e7a886a4db915f848a5f5 Mon Sep 17 00:00:00 2001 From: Po-Hsu Lin Date: Mon, 30 Nov 2020 22:39:40 -0800 Subject: [PATCH 069/296] Input: i8042 - add ByteSpeed touchpad to noloop table It looks like the C15B laptop got another vendor: ByteSpeed LLC. Avoid AUX loopback on this touchpad as well, thus input subsystem will be able to recognize a Synaptics touchpad in the AUX port. BugLink: https://bugs.launchpad.net/bugs/1906128 Signed-off-by: Po-Hsu Lin Link: https://lore.kernel.org/r/20201201054723.5939-1-po-hsu.lin@canonical.com Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/serio/i8042-x86ia64io.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h index a4c9b9652560..7ecb65176c1a 100644 --- a/drivers/input/serio/i8042-x86ia64io.h +++ b/drivers/input/serio/i8042-x86ia64io.h @@ -219,6 +219,10 @@ static const struct dmi_system_id __initconst i8042_dmi_noloop_table[] = { DMI_MATCH(DMI_SYS_VENDOR, "PEGATRON CORPORATION"), DMI_MATCH(DMI_PRODUCT_NAME, "C15B"), }, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ByteSpeed LLC"), + DMI_MATCH(DMI_PRODUCT_NAME, "ByteSpeed Laptop C15B"), + }, }, { } }; From 2aab1561439032be2e98811dd0ddbeb5b2ae4c61 Mon Sep 17 00:00:00 2001 From: Sanjay Govind Date: Mon, 30 Nov 2020 23:41:48 -0800 Subject: [PATCH 070/296] Input: xpad - support Ardwiino Controllers This commit adds support for Ardwiino Controllers Signed-off-by: Sanjay Govind Link: https://lore.kernel.org/r/20201201071922.131666-1-sanjay.govind9@gmail.com Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/xpad.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index c77cdb3b62b5..8c73377ac82c 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -241,6 +241,7 @@ static const struct xpad_device { { 0x1038, 0x1430, "SteelSeries Stratus Duo", 0, XTYPE_XBOX360 }, { 0x1038, 0x1431, "SteelSeries Stratus Duo", 0, XTYPE_XBOX360 }, { 0x11c9, 0x55f0, "Nacon GC-100XF", 0, XTYPE_XBOX360 }, + { 0x1209, 0x2882, "Ardwiino Controller", 0, XTYPE_XBOX360 }, { 0x12ab, 0x0004, "Honey Bee Xbox360 dancepad", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 }, { 0x12ab, 0x0301, "PDP AFTERGLOW AX.1", 0, XTYPE_XBOX360 }, { 0x12ab, 0x0303, "Mortal Kombat Klassic FightStick", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, @@ -418,6 +419,7 @@ static const struct usb_device_id xpad_table[] = { XPAD_XBOXONE_VENDOR(0x0f0d), /* Hori Controllers */ XPAD_XBOX360_VENDOR(0x1038), /* SteelSeries Controllers */ XPAD_XBOX360_VENDOR(0x11c9), /* Nacon GC100XF */ + XPAD_XBOX360_VENDOR(0x1209), /* Ardwiino Controllers */ XPAD_XBOX360_VENDOR(0x12ab), /* X-Box 360 dance pads */ XPAD_XBOX360_VENDOR(0x1430), /* RedOctane X-Box 360 controllers */ XPAD_XBOX360_VENDOR(0x146b), /* BigBen Interactive Controllers */ From c98fff7332dbd6e028969f8c2bda3d7bc7a024d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Tue, 1 Dec 2020 11:03:18 +0100 Subject: [PATCH 071/296] USB: serial: option: fix Quectel BG96 matching MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a partial revert of commit 2bb70f0a4b23 ("USB: serial: option: support dynamic Quectel USB compositions") The Quectel BG96 is different from most other modern Quectel modems, having serial functions with 3 endpoints using ff/ff/ff and ff/fe/ff class/subclass/protocol. Including it in the change to accommodate dynamic function mapping was incorrect. Revert to interface number matching for the BG96, assuming static layout of the RMNET function on interface 4. This restores support for the serial functions on interfaces 2 and 3. Full lsusb output for the BG96: Bus 002 Device 003: ID 2c7c:0296 Device Descriptor: bLength 18 bDescriptorType 1 bcdUSB 2.00 bDeviceClass 0 (Defined at Interface level) bDeviceSubClass 0 bDeviceProtocol 0 bMaxPacketSize0 64 idVendor 0x2c7c idProduct 0x0296 bcdDevice 0.00 iManufacturer 3 Qualcomm, Incorporated iProduct 2 Qualcomm CDMA Technologies MSM iSerial 4 d1098243 bNumConfigurations 1 Configuration Descriptor: bLength 9 bDescriptorType 2 wTotalLength 145 bNumInterfaces 5 bConfigurationValue 1 iConfiguration 1 Qualcomm Configuration bmAttributes 0xe0 Self Powered Remote Wakeup MaxPower 500mA Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 0 bAlternateSetting 0 bNumEndpoints 2 bInterfaceClass 255 Vendor Specific Class bInterfaceSubClass 255 Vendor Specific Subclass bInterfaceProtocol 255 Vendor Specific Protocol iInterface 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x81 EP 1 IN bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x01 EP 1 OUT bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 0 Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 1 bAlternateSetting 0 bNumEndpoints 2 bInterfaceClass 255 Vendor Specific Class bInterfaceSubClass 255 Vendor Specific Subclass bInterfaceProtocol 255 Vendor Specific Protocol iInterface 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x82 EP 2 IN bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x02 EP 2 OUT bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 0 Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 2 bAlternateSetting 0 bNumEndpoints 3 bInterfaceClass 255 Vendor Specific Class bInterfaceSubClass 255 Vendor Specific Subclass bInterfaceProtocol 255 Vendor Specific Protocol iInterface 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x83 EP 3 IN bmAttributes 3 Transfer Type Interrupt Synch Type None Usage Type Data wMaxPacketSize 0x0040 1x 64 bytes bInterval 5 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x84 EP 4 IN bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x03 EP 3 OUT bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 0 Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 3 bAlternateSetting 0 bNumEndpoints 3 bInterfaceClass 255 Vendor Specific Class bInterfaceSubClass 254 bInterfaceProtocol 255 iInterface 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x85 EP 5 IN bmAttributes 3 Transfer Type Interrupt Synch Type None Usage Type Data wMaxPacketSize 0x0040 1x 64 bytes bInterval 5 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x86 EP 6 IN bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x04 EP 4 OUT bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 0 Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 4 bAlternateSetting 0 bNumEndpoints 3 bInterfaceClass 255 Vendor Specific Class bInterfaceSubClass 255 Vendor Specific Subclass bInterfaceProtocol 255 Vendor Specific Protocol iInterface 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x87 EP 7 IN bmAttributes 3 Transfer Type Interrupt Synch Type None Usage Type Data wMaxPacketSize 0x0040 1x 64 bytes bInterval 5 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x88 EP 8 IN bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x05 EP 5 OUT bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 0 Device Qualifier (for other device speed): bLength 10 bDescriptorType 6 bcdUSB 2.00 bDeviceClass 0 (Defined at Interface level) bDeviceSubClass 0 bDeviceProtocol 0 bMaxPacketSize0 64 bNumConfigurations 1 Device Status: 0x0000 (Bus Powered) Cc: Sebastian Sjoholm Fixes: 2bb70f0a4b23 ("USB: serial: option: support dynamic Quectel USB compositions") Signed-off-by: Bjørn Mork Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 653da1d2945c..56d6f6d83bd7 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1106,9 +1106,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EG95, 0xff, 0xff, 0xff), .driver_info = NUMEP2 }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EG95, 0xff, 0, 0) }, - { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_BG96, 0xff, 0xff, 0xff), - .driver_info = NUMEP2 }, - { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_BG96, 0xff, 0, 0) }, + { USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_BG96), + .driver_info = RSVD(4) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06, 0xff, 0xff, 0xff), .driver_info = RSVD(1) | RSVD(2) | RSVD(3) | RSVD(4) | NUMEP2 }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06, 0xff, 0, 0) }, From c2b111e59a7be1534bbd62b3f8f933f714c5ba71 Mon Sep 17 00:00:00 2001 From: Heinrich Schuchardt Date: Sun, 29 Nov 2020 17:26:27 +0100 Subject: [PATCH 072/296] arm64: dts: allwinner: A64 Sopine: phy-mode rgmii-id Since commit bbc4d71d6354 ("net: phy: realtek: fix rtl8211e rx/tx delay config") iSCSI booting fails on the Pine A64 LTS. This patch changes the phy-mode to use internal delays both for RX and TX as has been done for other boards affected by the same commit. Fixes: bbc4d71d6354 ("net: phy: realtek: fix rtl8211e rx/tx delay config") Signed-off-by: Heinrich Schuchardt Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/20201129162627.1244808-1-xypron.glpk@gmx.de --- arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts index 9ebb9e07fae3..d4069749d721 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts @@ -79,7 +79,7 @@ &emac { pinctrl-names = "default"; pinctrl-0 = <&rgmii_pins>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-handle = <&ext_rgmii_phy>; phy-supply = <®_dc1sw>; status = "okay"; From d0c6707ca4235b78d06bcd62f0e24fbeac3e6d10 Mon Sep 17 00:00:00 2001 From: Heinrich Schuchardt Date: Sun, 29 Nov 2020 20:45:12 +0100 Subject: [PATCH 073/296] arm64: dts: allwinner: H5: NanoPi Neo Plus2: phy-mode rgmii-id Since commit bbc4d71d6354 ("net: phy: realtek: fix rtl8211e rx/tx delay config") network is broken on the NanoPi Neo Plus2. This patch changes the phy-mode to use internal delays both for RX and TX as has been done for other boards affected by the same commit. Fixes: bbc4d71d6354 ("net: phy: realtek: fix rtl8211e rx/tx delay config") Signed-off-by: Heinrich Schuchardt Signed-off-by: Maxime Ripard Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20201129194512.1475586-1-xypron.glpk@gmx.de --- arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo-plus2.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo-plus2.dts b/arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo-plus2.dts index 4f9ba53ffaae..9d93fe153689 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo-plus2.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo-plus2.dts @@ -96,7 +96,7 @@ pinctrl-0 = <&emac_rgmii_pins>; phy-supply = <®_gmac_3v3>; phy-handle = <&ext_rgmii_phy>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; From f54db39fbe40731c40aefdd3bc26e7d56d668c64 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Mon, 30 Nov 2020 13:19:27 +0100 Subject: [PATCH 074/296] KVM: PPC: Book3S HV: XIVE: Fix vCPU id sanity check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 062cfab7069f ("KVM: PPC: Book3S HV: XIVE: Make VP block size configurable") updated kvmppc_xive_vcpu_id_valid() in a way that allows userspace to trigger an assertion in skiboot and crash the host: [ 696.186248988,3] XIVE[ IC 08 ] eq_blk != vp_blk (0 vs. 1) for target 0x4300008c/0 [ 696.186314757,0] Assert fail: hw/xive.c:2370:0 [ 696.186342458,0] Aborting! xive-kvCPU 0043 Backtrace: S: 0000000031e2b8f0 R: 0000000030013840 .backtrace+0x48 S: 0000000031e2b990 R: 000000003001b2d0 ._abort+0x4c S: 0000000031e2ba10 R: 000000003001b34c .assert_fail+0x34 S: 0000000031e2ba90 R: 0000000030058984 .xive_eq_for_target.part.20+0xb0 S: 0000000031e2bb40 R: 0000000030059fdc .xive_setup_silent_gather+0x2c S: 0000000031e2bc20 R: 000000003005a334 .opal_xive_set_vp_info+0x124 S: 0000000031e2bd20 R: 00000000300051a4 opal_entry+0x134 --- OPAL call token: 0x8a caller R1: 0xc000001f28563850 --- XIVE maintains the interrupt context state of non-dispatched vCPUs in an internal VP structure. We allocate a bunch of those on startup to accommodate all possible vCPUs. Each VP has an id, that we derive from the vCPU id for efficiency: static inline u32 kvmppc_xive_vp(struct kvmppc_xive *xive, u32 server) { return xive->vp_base + kvmppc_pack_vcpu_id(xive->kvm, server); } The KVM XIVE device used to allocate KVM_MAX_VCPUS VPs. This was limitting the number of concurrent VMs because the VP space is limited on the HW. Since most of the time, VMs run with a lot less vCPUs, commit 062cfab7069f ("KVM: PPC: Book3S HV: XIVE: Make VP block size configurable") gave the possibility for userspace to tune the size of the VP block through the KVM_DEV_XIVE_NR_SERVERS attribute. The check in kvmppc_pack_vcpu_id() was changed from cpu < KVM_MAX_VCPUS * xive->kvm->arch.emul_smt_mode to cpu < xive->nr_servers * xive->kvm->arch.emul_smt_mode The previous check was based on the fact that the VP block had KVM_MAX_VCPUS entries and that kvmppc_pack_vcpu_id() guarantees that packed vCPU ids are below KVM_MAX_VCPUS. We've changed the size of the VP block, but kvmppc_pack_vcpu_id() has nothing to do with it and it certainly doesn't ensure that the packed vCPU ids are below xive->nr_servers. kvmppc_xive_vcpu_id_valid() might thus return true when the VM was configured with a non-standard VSMT mode, even if the packed vCPU id is higher than what we expect. We end up using an unallocated VP id, which confuses OPAL. The assert in OPAL is probably abusive and should be converted to a regular error that the kernel can handle, but we shouldn't really use broken VP ids in the first place. Fix kvmppc_xive_vcpu_id_valid() so that it checks the packed vCPU id is below xive->nr_servers, which is explicitly what we want. Fixes: 062cfab7069f ("KVM: PPC: Book3S HV: XIVE: Make VP block size configurable") Cc: stable@vger.kernel.org # v5.5+ Signed-off-by: Greg Kurz Reviewed-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/160673876747.695514.1809676603724514920.stgit@bahia.lan --- arch/powerpc/kvm/book3s_xive.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index 85215e79db42..a0ebc29f30b2 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -1214,12 +1214,9 @@ void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu) static bool kvmppc_xive_vcpu_id_valid(struct kvmppc_xive *xive, u32 cpu) { /* We have a block of xive->nr_servers VPs. We just need to check - * raw vCPU ids are below the expected limit for this guest's - * core stride ; kvmppc_pack_vcpu_id() will pack them down to an - * index that can be safely used to compute a VP id that belongs - * to the VP block. + * packed vCPU ids are below that. */ - return cpu < xive->nr_servers * xive->kvm->arch.emul_smt_mode; + return kvmppc_pack_vcpu_id(xive->kvm, cpu) < xive->nr_servers; } int kvmppc_xive_compute_vp_id(struct kvmppc_xive *xive, u32 cpu, u32 *vp) From 59612b24f78a0b61fe078ec9dff2e48e9cec52c0 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 19 Nov 2020 13:46:56 -0700 Subject: [PATCH 075/296] kbuild: Hoist '--orphan-handling' into Kconfig Currently, '--orphan-handling=warn' is spread out across four different architectures in their respective Makefiles, which makes it a little unruly to deal with in case it needs to be disabled for a specific linker version (in this case, ld.lld 10.0.1). To make it easier to control this, hoist this warning into Kconfig and the main Makefile so that disabling it is simpler, as the warning will only be enabled in a couple places (main Makefile and a couple of compressed boot folders that blow away LDFLAGS_vmlinx) and making it conditional is easier due to Kconfig syntax. One small additional benefit of this is saving a call to ld-option on incremental builds because we will have already evaluated it for CONFIG_LD_ORPHAN_WARN. To keep the list of supported architectures the same, introduce CONFIG_ARCH_WANT_LD_ORPHAN_WARN, which an architecture can select to gain this automatically after all of the sections are specified and size asserted. A special thanks to Kees Cook for the help text on this config. Link: https://github.com/ClangBuiltLinux/linux/issues/1187 Acked-by: Kees Cook Acked-by: Michael Ellerman (powerpc) Reviewed-by: Nick Desaulniers Tested-by: Nick Desaulniers Signed-off-by: Nathan Chancellor Signed-off-by: Masahiro Yamada --- Makefile | 6 ++++++ arch/Kconfig | 9 +++++++++ arch/arm/Kconfig | 1 + arch/arm/Makefile | 4 ---- arch/arm/boot/compressed/Makefile | 4 +++- arch/arm64/Kconfig | 1 + arch/arm64/Makefile | 4 ---- arch/powerpc/Kconfig | 1 + arch/powerpc/Makefile | 1 - arch/x86/Kconfig | 1 + arch/x86/Makefile | 3 --- arch/x86/boot/compressed/Makefile | 4 +++- init/Kconfig | 5 +++++ 13 files changed, 30 insertions(+), 14 deletions(-) diff --git a/Makefile b/Makefile index ae1592c1f5d6..8327725e5d76 100644 --- a/Makefile +++ b/Makefile @@ -986,6 +986,12 @@ ifeq ($(CONFIG_RELR),y) LDFLAGS_vmlinux += --pack-dyn-relocs=relr endif +# We never want expected sections to be placed heuristically by the +# linker. All sections should be explicitly named in the linker script. +ifdef CONFIG_LD_ORPHAN_WARN +LDFLAGS_vmlinux += --orphan-handling=warn +endif + # Align the bit size of userspace programs with the kernel KBUILD_USERCFLAGS += $(filter -m32 -m64 --target=%, $(KBUILD_CFLAGS)) KBUILD_USERLDFLAGS += $(filter -m32 -m64 --target=%, $(KBUILD_CFLAGS)) diff --git a/arch/Kconfig b/arch/Kconfig index 56b6ccc0e32d..ba4e966484ab 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -1028,6 +1028,15 @@ config HAVE_STATIC_CALL_INLINE bool depends on HAVE_STATIC_CALL +config ARCH_WANT_LD_ORPHAN_WARN + bool + help + An arch should select this symbol once all linker sections are explicitly + included, size-asserted, or discarded in the linker scripts. This is + important because we never want expected sections to be placed heuristically + by the linker, since the locations of such sections can change between linker + versions. + source "kernel/gcov/Kconfig" source "scripts/gcc-plugins/Kconfig" diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index fe2f17eb2b50..002e0cf025f5 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -35,6 +35,7 @@ config ARM select ARCH_USE_CMPXCHG_LOCKREF select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU select ARCH_WANT_IPC_PARSE_VERSION + select ARCH_WANT_LD_ORPHAN_WARN select BINFMT_FLAT_ARGVP_ENVP_ON_STACK select BUILDTIME_TABLE_SORT if MMU select CLONE_BACKWARDS diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 4d76eab2b22d..e15f76ca2887 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -16,10 +16,6 @@ LDFLAGS_vmlinux += --be8 KBUILD_LDFLAGS_MODULE += --be8 endif -# We never want expected sections to be placed heuristically by the -# linker. All sections should be explicitly named in the linker script. -LDFLAGS_vmlinux += $(call ld-option, --orphan-handling=warn) - GZFLAGS :=-9 #KBUILD_CFLAGS +=-pipe diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index 47f001ca5499..e1567418a2b1 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -129,7 +129,9 @@ LDFLAGS_vmlinux += --no-undefined # Delete all temporary local symbols LDFLAGS_vmlinux += -X # Report orphan sections -LDFLAGS_vmlinux += $(call ld-option, --orphan-handling=warn) +ifdef CONFIG_LD_ORPHAN_WARN +LDFLAGS_vmlinux += --orphan-handling=warn +endif # Next argument is a linker script LDFLAGS_vmlinux += -T diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 1515f6f153a0..a6b5b7ef40ae 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -81,6 +81,7 @@ config ARM64 select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT select ARCH_WANT_FRAME_POINTERS select ARCH_WANT_HUGE_PMD_SHARE if ARM64_4K_PAGES || (ARM64_16K_PAGES && !ARM64_VA_BITS_36) + select ARCH_WANT_LD_ORPHAN_WARN select ARCH_HAS_UBSAN_SANITIZE_ALL select ARM_AMBA select ARM_ARCH_TIMER diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 5789c2d18d43..6a87d592bd00 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -28,10 +28,6 @@ LDFLAGS_vmlinux += --fix-cortex-a53-843419 endif endif -# We never want expected sections to be placed heuristically by the -# linker. All sections should be explicitly named in the linker script. -LDFLAGS_vmlinux += $(call ld-option, --orphan-handling=warn) - ifeq ($(CONFIG_ARM64_USE_LSE_ATOMICS), y) ifneq ($(CONFIG_ARM64_LSE_ATOMICS), y) $(warning LSE atomics not supported by binutils) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index e9f13fe08492..5181872f9452 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -152,6 +152,7 @@ config PPC select ARCH_USE_QUEUED_SPINLOCKS if PPC_QUEUED_SPINLOCKS select ARCH_WANT_IPC_PARSE_VERSION select ARCH_WANT_IRQS_OFF_ACTIVATE_MM + select ARCH_WANT_LD_ORPHAN_WARN select ARCH_WEAK_RELEASE_ACQUIRE select BINFMT_ELF select BUILDTIME_TABLE_SORT diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index a4d56f0a41d9..d9eb0da845e1 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -123,7 +123,6 @@ endif LDFLAGS_vmlinux-y := -Bstatic LDFLAGS_vmlinux-$(CONFIG_RELOCATABLE) := -pie LDFLAGS_vmlinux := $(LDFLAGS_vmlinux-y) -LDFLAGS_vmlinux += $(call ld-option,--orphan-handling=warn) ifdef CONFIG_PPC64 ifeq ($(call cc-option-yn,-mcmodel=medium),y) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index f6946b81f74a..fbf26e0f7a6a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -100,6 +100,7 @@ config X86 select ARCH_WANT_DEFAULT_BPF_JIT if X86_64 select ARCH_WANTS_DYNAMIC_TASK_STRUCT select ARCH_WANT_HUGE_PMD_SHARE + select ARCH_WANT_LD_ORPHAN_WARN select ARCH_WANTS_THP_SWAP if X86_64 select BUILDTIME_TABLE_SORT select CLKEVT_I8253 diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 154259f18b8b..1bf21746f4ce 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -209,9 +209,6 @@ ifdef CONFIG_X86_64 LDFLAGS_vmlinux += -z max-page-size=0x200000 endif -# We never want expected sections to be placed heuristically by the -# linker. All sections should be explicitly named in the linker script. -LDFLAGS_vmlinux += $(call ld-option, --orphan-handling=warn) archscripts: scripts_basic $(Q)$(MAKE) $(build)=arch/x86/tools relocs diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index ee249088cbfe..40b8fd375d52 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -61,7 +61,9 @@ KBUILD_LDFLAGS += $(call ld-option,--no-ld-generated-unwind-info) # Compressed kernel should be built as PIE since it may be loaded at any # address by the bootloader. LDFLAGS_vmlinux := -pie $(call ld-option, --no-dynamic-linker) -LDFLAGS_vmlinux += $(call ld-option, --orphan-handling=warn) +ifdef CONFIG_LD_ORPHAN_WARN +LDFLAGS_vmlinux += --orphan-handling=warn +endif LDFLAGS_vmlinux += -T hostprogs := mkpiggy diff --git a/init/Kconfig b/init/Kconfig index c9446911cf41..92c58b45abb8 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1348,6 +1348,11 @@ config LD_DEAD_CODE_DATA_ELIMINATION present. This option is not well tested yet, so use at your own risk. +config LD_ORPHAN_WARN + def_bool y + depends on ARCH_WANT_LD_ORPHAN_WARN + depends on $(ld-option,--orphan-handling=warn) + config SYSCTL bool From d5750cd3c5486e9c0fa11100df01de8ca0c13fa7 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 19 Nov 2020 13:46:58 -0700 Subject: [PATCH 076/296] kbuild: Disable CONFIG_LD_ORPHAN_WARN for ld.lld 10.0.1 ld.lld 10.0.1 spews a bunch of various warnings about .rela sections, along with a few others. Newer versions of ld.lld do not have these warnings. As a result, do not add '--orphan-handling=warn' to LDFLAGS_vmlinux if ld.lld's version is not new enough. Link: https://github.com/ClangBuiltLinux/linux/issues/1187 Link: https://github.com/ClangBuiltLinux/linux/issues/1193 Reported-by: Arvind Sankar Reported-by: kernelci.org bot Reported-by: Mark Brown Reviewed-by: Kees Cook Signed-off-by: Nathan Chancellor Reviewed-by: Nick Desaulniers Signed-off-by: Masahiro Yamada --- MAINTAINERS | 1 + init/Kconfig | 5 +++++ scripts/lld-version.sh | 20 ++++++++++++++++++++ 3 files changed, 26 insertions(+) create mode 100755 scripts/lld-version.sh diff --git a/MAINTAINERS b/MAINTAINERS index a008b70f3c16..286ed8e6b924 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4274,6 +4274,7 @@ B: https://github.com/ClangBuiltLinux/linux/issues C: irc://chat.freenode.net/clangbuiltlinux F: Documentation/kbuild/llvm.rst F: scripts/clang-tools/ +F: scripts/lld-version.sh K: \b(?i:clang|llvm)\b CLEANCACHE API diff --git a/init/Kconfig b/init/Kconfig index 92c58b45abb8..b9037d6c5ab3 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -47,6 +47,10 @@ config CLANG_VERSION int default $(shell,$(srctree)/scripts/clang-version.sh $(CC)) +config LLD_VERSION + int + default $(shell,$(srctree)/scripts/lld-version.sh $(LD)) + config CC_CAN_LINK bool default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(m64-flag)) if 64BIT @@ -1351,6 +1355,7 @@ config LD_DEAD_CODE_DATA_ELIMINATION config LD_ORPHAN_WARN def_bool y depends on ARCH_WANT_LD_ORPHAN_WARN + depends on !LD_IS_LLD || LLD_VERSION >= 110000 depends on $(ld-option,--orphan-handling=warn) config SYSCTL diff --git a/scripts/lld-version.sh b/scripts/lld-version.sh new file mode 100755 index 000000000000..d70edb4d8a4f --- /dev/null +++ b/scripts/lld-version.sh @@ -0,0 +1,20 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# Usage: $ ./scripts/lld-version.sh ld.lld +# +# Print the linker version of `ld.lld' in a 5 or 6-digit form +# such as `100001' for ld.lld 10.0.1 etc. + +linker_string="$($* --version)" + +if ! ( echo $linker_string | grep -q LLD ); then + echo 0 + exit 1 +fi + +VERSION=$(echo $linker_string | cut -d ' ' -f 2) +MAJOR=$(echo $VERSION | cut -d . -f 1) +MINOR=$(echo $VERSION | cut -d . -f 2) +PATCHLEVEL=$(echo $VERSION | cut -d . -f 3) +printf "%d%02d%02d\\n" $MAJOR $MINOR $PATCHLEVEL From d904eb0b351fe5545d9ba5b85844342f49025923 Mon Sep 17 00:00:00 2001 From: Sean Young Date: Sun, 29 Nov 2020 18:01:26 +0100 Subject: [PATCH 077/296] media: mtk-cir: fix calculation of chk period Since commit 528222d853f9 ("media: rc: harmonize infrared durations to microseconds"), the calculation of the chk period is wrong. As a result, all reported IR will have incorrect timings. Now that the calculations are done in microseconds rather than nanoseconds, we can fold the calculations in a simpler form with less rounding error. Tested-by: Frank Wunderlich Fixes: 528222d853f9 ("media: rc: harmonize infrared durations to microseconds") Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab --- drivers/media/rc/mtk-cir.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/media/rc/mtk-cir.c b/drivers/media/rc/mtk-cir.c index 5051a5e5244b..65a136c0fac2 100644 --- a/drivers/media/rc/mtk-cir.c +++ b/drivers/media/rc/mtk-cir.c @@ -151,15 +151,12 @@ static inline u32 mtk_chk_period(struct mtk_ir *ir) { u32 val; - /* Period of raw software sampling in ns */ - val = DIV_ROUND_CLOSEST(1000000000ul, - clk_get_rate(ir->bus) / ir->data->div); - /* * Period for software decoder used in the * unit of raw software sampling */ - val = DIV_ROUND_CLOSEST(MTK_IR_SAMPLE, val); + val = DIV_ROUND_CLOSEST(clk_get_rate(ir->bus), + USEC_PER_SEC * ir->data->div / MTK_IR_SAMPLE); dev_dbg(ir->dev, "@pwm clk = \t%lu\n", clk_get_rate(ir->bus) / ir->data->div); @@ -412,7 +409,7 @@ static int mtk_ir_probe(struct platform_device *pdev) mtk_irq_enable(ir, MTK_IRINT_EN); dev_info(dev, "Initialized MT7623 IR driver, sample period = %dus\n", - DIV_ROUND_CLOSEST(MTK_IR_SAMPLE, 1000)); + MTK_IR_SAMPLE); return 0; From 024e01dead12c2b9fbe31216f2099401ebb78a4a Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Fri, 27 Nov 2020 10:36:32 +0100 Subject: [PATCH 078/296] media: pulse8-cec: fix duplicate free at disconnect or probe error Commit 601282d65b96 ("media: pulse8-cec: use adap_free callback") used the adap_free callback to clean up on disconnect. What I forgot was that in the probe it will call cec_delete_adapter() followed by kfree(pulse8) if an error occurs. But by using the adap_free callback, cec_delete_adapter() is already freeing the pulse8 struct. This wasn't noticed since normally the probe works fine, but Pulse-Eight published a new firmware version that caused a probe error, so now it hits this bug. This affects firmware version 12, but probably any version >= 10. Commit aa9eda76129c ("media: pulse8-cec: close serio in disconnect, not adap_free") made this worse by adding the line 'pulse8->serio = NULL' right after the call to cec_unregister_adapter in the disconnect() function. Unfortunately, cec_unregister_adapter will typically call cec_delete_adapter (unless a filehandle to the cec device is still open), which frees the pulse8 struct. So now it will also crash on a simple unplug of the Pulse-Eight device. With this fix both the unplug issue and a probe() error situation are handled correctly again. It will still fail to probe() with a v12 firmware, that's something to look at separately. Signed-off-by: Hans Verkuil Reported-by: Maxime Ripard Tested-by: Maxime Ripard Fixes: aa9eda76129c ("media: pulse8-cec: close serio in disconnect, not adap_free") Fixes: 601282d65b96 ("media: pulse8-cec: use adap_free callback") Cc: Signed-off-by: Mauro Carvalho Chehab --- drivers/media/cec/usb/pulse8/pulse8-cec.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/media/cec/usb/pulse8/pulse8-cec.c b/drivers/media/cec/usb/pulse8/pulse8-cec.c index e4d8446b87da..5d3a3f775bc8 100644 --- a/drivers/media/cec/usb/pulse8/pulse8-cec.c +++ b/drivers/media/cec/usb/pulse8/pulse8-cec.c @@ -650,7 +650,6 @@ static void pulse8_disconnect(struct serio *serio) struct pulse8 *pulse8 = serio_get_drvdata(serio); cec_unregister_adapter(pulse8->adap); - pulse8->serio = NULL; serio_set_drvdata(serio, NULL); serio_close(serio); } @@ -830,8 +829,10 @@ static int pulse8_connect(struct serio *serio, struct serio_driver *drv) pulse8->adap = cec_allocate_adapter(&pulse8_cec_adap_ops, pulse8, dev_name(&serio->dev), caps, 1); err = PTR_ERR_OR_ZERO(pulse8->adap); - if (err < 0) - goto free_device; + if (err < 0) { + kfree(pulse8); + return err; + } pulse8->dev = &serio->dev; serio_set_drvdata(serio, pulse8); @@ -874,8 +875,6 @@ close_serio: serio_close(serio); delete_adap: cec_delete_adapter(pulse8->adap); -free_device: - kfree(pulse8); return err; } From 45ba1c0ba3e589ad3ef0d0603c822eb27ea16563 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Fri, 27 Nov 2020 12:52:30 +0100 Subject: [PATCH 079/296] media: pulse8-cec: add support for FW v10 and up Starting with firmware version 10 the GET/SET_HDMI_VERSION message was removed and GET/SET_AUTO_POWER_ON was added. The removal of GET/SET_HDMI_VERSION caused the probe of the Pulse-Eight to fail. Add a version check to handle this gracefully. Also show (but do not set) the Auto Power On value. Signed-off-by: Hans Verkuil Reported-by: Maxime Ripard Tested-by: Maxime Ripard Cc: Signed-off-by: Mauro Carvalho Chehab --- drivers/media/cec/usb/pulse8/pulse8-cec.c | 43 ++++++++++++++++------- 1 file changed, 30 insertions(+), 13 deletions(-) diff --git a/drivers/media/cec/usb/pulse8/pulse8-cec.c b/drivers/media/cec/usb/pulse8/pulse8-cec.c index 5d3a3f775bc8..04b13cdc38d2 100644 --- a/drivers/media/cec/usb/pulse8/pulse8-cec.c +++ b/drivers/media/cec/usb/pulse8/pulse8-cec.c @@ -88,13 +88,15 @@ enum pulse8_msgcodes { MSGCODE_SET_PHYSICAL_ADDRESS, /* 0x20 */ MSGCODE_GET_DEVICE_TYPE, MSGCODE_SET_DEVICE_TYPE, - MSGCODE_GET_HDMI_VERSION, + MSGCODE_GET_HDMI_VERSION, /* Removed in FW >= 10 */ MSGCODE_SET_HDMI_VERSION, MSGCODE_GET_OSD_NAME, MSGCODE_SET_OSD_NAME, MSGCODE_WRITE_EEPROM, MSGCODE_GET_ADAPTER_TYPE, /* 0x28 */ MSGCODE_SET_ACTIVE_SOURCE, + MSGCODE_GET_AUTO_POWER_ON, /* New for FW >= 10 */ + MSGCODE_SET_AUTO_POWER_ON, MSGCODE_FRAME_EOM = 0x80, MSGCODE_FRAME_ACK = 0x40, @@ -143,6 +145,8 @@ static const char * const pulse8_msgnames[] = { "WRITE_EEPROM", "GET_ADAPTER_TYPE", "SET_ACTIVE_SOURCE", + "GET_AUTO_POWER_ON", + "SET_AUTO_POWER_ON", }; static const char *pulse8_msgname(u8 cmd) @@ -579,12 +583,14 @@ static int pulse8_cec_adap_log_addr(struct cec_adapter *adap, u8 log_addr) if (err) goto unlock; - cmd[0] = MSGCODE_SET_HDMI_VERSION; - cmd[1] = adap->log_addrs.cec_version; - err = pulse8_send_and_wait(pulse8, cmd, 2, - MSGCODE_COMMAND_ACCEPTED, 0); - if (err) - goto unlock; + if (pulse8->vers < 10) { + cmd[0] = MSGCODE_SET_HDMI_VERSION; + cmd[1] = adap->log_addrs.cec_version; + err = pulse8_send_and_wait(pulse8, cmd, 2, + MSGCODE_COMMAND_ACCEPTED, 0); + if (err) + goto unlock; + } if (adap->log_addrs.osd_name[0]) { size_t osd_len = strlen(adap->log_addrs.osd_name); @@ -691,6 +697,14 @@ static int pulse8_setup(struct pulse8 *pulse8, struct serio *serio, dev_dbg(pulse8->dev, "Autonomous mode: %s", data[0] ? "on" : "off"); + if (pulse8->vers >= 10) { + cmd[0] = MSGCODE_GET_AUTO_POWER_ON; + err = pulse8_send_and_wait(pulse8, cmd, 1, cmd[0], 1); + if (!err) + dev_dbg(pulse8->dev, "Auto Power On: %s", + data[0] ? "on" : "off"); + } + cmd[0] = MSGCODE_GET_DEVICE_TYPE; err = pulse8_send_and_wait(pulse8, cmd, 1, cmd[0], 1); if (err) @@ -752,12 +766,15 @@ static int pulse8_setup(struct pulse8 *pulse8, struct serio *serio, dev_dbg(pulse8->dev, "Physical address: %x.%x.%x.%x\n", cec_phys_addr_exp(*pa)); - cmd[0] = MSGCODE_GET_HDMI_VERSION; - err = pulse8_send_and_wait(pulse8, cmd, 1, cmd[0], 1); - if (err) - return err; - log_addrs->cec_version = data[0]; - dev_dbg(pulse8->dev, "CEC version: %d\n", log_addrs->cec_version); + log_addrs->cec_version = CEC_OP_CEC_VERSION_1_4; + if (pulse8->vers < 10) { + cmd[0] = MSGCODE_GET_HDMI_VERSION; + err = pulse8_send_and_wait(pulse8, cmd, 1, cmd[0], 1); + if (err) + return err; + log_addrs->cec_version = data[0]; + dev_dbg(pulse8->dev, "CEC version: %d\n", log_addrs->cec_version); + } cmd[0] = MSGCODE_GET_OSD_NAME; err = pulse8_send_and_wait(pulse8, cmd, 1, cmd[0], 0); From fae3a13d2a3d49a89391889808428cf1e72afbd7 Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Mon, 30 Nov 2020 09:57:20 -0600 Subject: [PATCH 080/296] x86/resctrl: Fix AMD L3 QOS CDP enable/disable When the AMD QoS feature CDP (code and data prioritization) is enabled or disabled, the CDP bit in MSR 0000_0C81 is written on one of the CPUs in an L3 domain (core complex). That is not correct - the CDP bit needs to be updated on all the logical CPUs in the domain. This was not spelled out clearly in the spec earlier. The specification has been updated and the updated document, "AMD64 Technology Platform Quality of Service Extensions Publication # 56375 Revision: 1.02 Issue Date: October 2020" is available now. Refer the section: Code and Data Prioritization. Fix the issue by adding a new flag arch_has_per_cpu_cfg in rdt_cache data structure. The documentation can be obtained at: https://developer.amd.com/wp-content/resources/56375.pdf Link: https://bugzilla.kernel.org/show_bug.cgi?id=206537 [ bp: Massage commit message. ] Fixes: 4d05bf71f157 ("x86/resctrl: Introduce AMD QOS feature") Signed-off-by: Babu Moger Signed-off-by: Borislav Petkov Reviewed-by: Reinette Chatre Link: https://lkml.kernel.org/r/160675180380.15628.3309402017215002347.stgit@bmoger-ubuntu --- arch/x86/kernel/cpu/resctrl/core.c | 4 ++++ arch/x86/kernel/cpu/resctrl/internal.h | 3 +++ arch/x86/kernel/cpu/resctrl/rdtgroup.c | 9 +++++++-- 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index e5f4ee8f4c3b..e8b5f1cf1ae8 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -570,6 +570,8 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r) if (d) { cpumask_set_cpu(cpu, &d->cpu_mask); + if (r->cache.arch_has_per_cpu_cfg) + rdt_domain_reconfigure_cdp(r); return; } @@ -923,6 +925,7 @@ static __init void rdt_init_res_defs_intel(void) r->rid == RDT_RESOURCE_L2CODE) { r->cache.arch_has_sparse_bitmaps = false; r->cache.arch_has_empty_bitmaps = false; + r->cache.arch_has_per_cpu_cfg = false; } else if (r->rid == RDT_RESOURCE_MBA) { r->msr_base = MSR_IA32_MBA_THRTL_BASE; r->msr_update = mba_wrmsr_intel; @@ -943,6 +946,7 @@ static __init void rdt_init_res_defs_amd(void) r->rid == RDT_RESOURCE_L2CODE) { r->cache.arch_has_sparse_bitmaps = true; r->cache.arch_has_empty_bitmaps = true; + r->cache.arch_has_per_cpu_cfg = true; } else if (r->rid == RDT_RESOURCE_MBA) { r->msr_base = MSR_IA32_MBA_BW_BASE; r->msr_update = mba_wrmsr_amd; diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 80fa997fae60..f65d3c0dbc41 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -360,6 +360,8 @@ struct msr_param { * executing entities * @arch_has_sparse_bitmaps: True if a bitmap like f00f is valid. * @arch_has_empty_bitmaps: True if the '0' bitmap is valid. + * @arch_has_per_cpu_cfg: True if QOS_CFG register for this cache + * level has CPU scope. */ struct rdt_cache { unsigned int cbm_len; @@ -369,6 +371,7 @@ struct rdt_cache { unsigned int shareable_bits; bool arch_has_sparse_bitmaps; bool arch_has_empty_bitmaps; + bool arch_has_per_cpu_cfg; }; /** diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 6f4ca4bea625..f3418428682b 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -1909,8 +1909,13 @@ static int set_cache_qos_cfg(int level, bool enable) r_l = &rdt_resources_all[level]; list_for_each_entry(d, &r_l->domains, list) { - /* Pick one CPU from each domain instance to update MSR */ - cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask); + if (r_l->cache.arch_has_per_cpu_cfg) + /* Pick all the CPUs in the domain instance */ + for_each_cpu(cpu, &d->cpu_mask) + cpumask_set_cpu(cpu, cpu_mask); + else + /* Pick one CPU from each domain instance to update MSR */ + cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask); } cpu = get_cpu(); /* Update QOS_CFG MSR on this cpu if it's in cpu_mask. */ From 1a16af33ba88ef25e206a13366379179cae79d23 Mon Sep 17 00:00:00 2001 From: David Gow Date: Sat, 21 Nov 2020 16:15:49 -0800 Subject: [PATCH 081/296] fpga: Specify HAS_IOMEM dependency for FPGA_DFL Because dfl.c uses the 'devm_ioremap', 'devm_iounmap', 'devm_ioremap_resource', and 'devm_platform_ioremap_resource' functions, it should depend on HAS_IOMEM. This fixes make allyesconfig under UML (ARCH=um), which doesn't provide HAS_IOMEM. [mdf@kernel.org: Removed "drivers: " in commit message] Fixes: 89eb35e810a8 ("fpga: dfl: map feature mmio resources in their own feature drivers") Signed-off-by: David Gow Signed-off-by: Moritz Fischer Link: https://lore.kernel.org/r/20201122001549.107023-2-mdf@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/fpga/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/fpga/Kconfig b/drivers/fpga/Kconfig index 7cd5a29fc437..5645226ca3ce 100644 --- a/drivers/fpga/Kconfig +++ b/drivers/fpga/Kconfig @@ -142,6 +142,7 @@ config FPGA_DFL tristate "FPGA Device Feature List (DFL) support" select FPGA_BRIDGE select FPGA_REGION + depends on HAS_IOMEM help Device Feature List (DFL) defines a feature list structure that creates a linked list of feature headers within the MMIO space From 8c3b55a299c325830a987de21dab6a89ecb71164 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 1 Dec 2020 09:55:29 -0800 Subject: [PATCH 082/296] Input: atmel_mxt_ts - fix lost interrupts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After commit 74d905d2d38a devices requiring the workaround for edge triggered interrupts stopped working. The hardware needs the quirk to be used before even proceeding to check if the quirk is needed because mxt_acquire_irq() is called before mxt_check_retrigen() is called and at this point pending IRQs need to be checked, and if the workaround is not active, all interrupts will be lost from this point. Solve this by switching the calls around. Reported-by: Andre Müller Tested-by: Andre Müller Suggested-by: Dmitry Torokhov Fixes: 74d905d2d38a ("Input: atmel_mxt_ts - only read messages in mxt_acquire_irq() when necessary") Signed-off-by: Linus Walleij Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20201201123026.1416743-1-linus.walleij@linaro.org Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/atmel_mxt_ts.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c index 6b71b0aff115..99a33cd5e675 100644 --- a/drivers/input/touchscreen/atmel_mxt_ts.c +++ b/drivers/input/touchscreen/atmel_mxt_ts.c @@ -2183,11 +2183,11 @@ static int mxt_initialize(struct mxt_data *data) msleep(MXT_FW_RESET_TIME); } - error = mxt_acquire_irq(data); + error = mxt_check_retrigen(data); if (error) return error; - error = mxt_check_retrigen(data); + error = mxt_acquire_irq(data); if (error) return error; From 7e7986f9d3ba69a7375a41080a1f8c8012cb0923 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Tue, 1 Dec 2020 11:07:09 -0500 Subject: [PATCH 083/296] block: use gcd() to fix chunk_sectors limit stacking commit 22ada802ede8 ("block: use lcm_not_zero() when stacking chunk_sectors") broke chunk_sectors limit stacking. chunk_sectors must reflect the most limited of all devices in the IO stack. Otherwise malformed IO may result. E.g.: prior to this fix, ->chunk_sectors = lcm_not_zero(8, 128) would result in blk_max_size_offset() splitting IO at 128 sectors rather than the required more restrictive 8 sectors. And since commit 07d098e6bbad ("block: allow 'chunk_sectors' to be non-power-of-2") care must be taken to properly stack chunk_sectors to be compatible with the possibility that a non-power-of-2 chunk_sectors may be stacked. This is why gcd() is used instead of reverting back to using min_not_zero(). Fixes: 22ada802ede8 ("block: use lcm_not_zero() when stacking chunk_sectors") Fixes: 07d098e6bbad ("block: allow 'chunk_sectors' to be non-power-of-2") Reported-by: John Dorminy Reported-by: Bruce Johnston Signed-off-by: Mike Snitzer Reviewed-by: John Dorminy Cc: stable@vger.kernel.org Reviewed-by: Martin K. Petersen Signed-off-by: Jens Axboe --- block/blk-settings.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/block/blk-settings.c b/block/blk-settings.c index 9741d1d83e98..659cdb8a07fe 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -547,7 +547,10 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, t->io_min = max(t->io_min, b->io_min); t->io_opt = lcm_not_zero(t->io_opt, b->io_opt); - t->chunk_sectors = lcm_not_zero(t->chunk_sectors, b->chunk_sectors); + + /* Set non-power-of-2 compatible chunk_sectors boundary */ + if (b->chunk_sectors) + t->chunk_sectors = gcd(t->chunk_sectors, b->chunk_sectors); /* Physical block size a multiple of the logical block size? */ if (t->physical_block_size & (t->logical_block_size - 1)) { From 35d2835d2ac41dc0b3e3469f8e2b08ce9709ace8 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Tue, 10 Nov 2020 18:41:40 -0800 Subject: [PATCH 084/296] Revert "dm cache: fix arm link errors with inline" This reverts commit 43aeaa29573924df76f44eda2bbd94ca36e407b5. Since commit 0bddd227f3dc ("Documentation: update for gcc 4.9 requirement") the minimum supported version of GCC is gcc-4.9. It's now safe to remove this code. Link: https://github.com/ClangBuiltLinux/linux/issues/427 Signed-off-by: Nick Desaulniers Acked-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-cache-target.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 9644424591da..4bc453f5bbaa 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -712,10 +712,6 @@ static bool block_size_is_power_of_two(struct cache *cache) return cache->sectors_per_block_shift >= 0; } -/* gcc on ARM generates spurious references to __udivdi3 and __umoddi3 */ -#if defined(CONFIG_ARM) && __GNUC__ == 4 && __GNUC_MINOR__ <= 6 -__always_inline -#endif static dm_block_t block_div(dm_block_t b, uint32_t n) { do_div(b, n); From 89478335718c98557f10470a9bc5c555b9261c4e Mon Sep 17 00:00:00 2001 From: Sergei Shtepa Date: Wed, 11 Nov 2020 15:55:46 +0300 Subject: [PATCH 085/296] dm: fix bug with RCU locking in dm_blk_report_zones The dm_get_live_table() function makes RCU read lock so dm_put_live_table() must be called even if dm_table map is not found. Fixes: e76239a3748c9 ("block: add a report_zones method") Cc: stable@vger.kernel.org Signed-off-by: Sergei Shtepa Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index c18fc2548518..98866e725f25 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -476,8 +476,10 @@ static int dm_blk_report_zones(struct gendisk *disk, sector_t sector, return -EAGAIN; map = dm_get_live_table(md, &srcu_idx); - if (!map) - return -EIO; + if (!map) { + ret = -EIO; + goto out; + } do { struct dm_target *tgt; From e7b624183d921b49ef0a96329f21647d38865ee9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 13 Nov 2020 15:19:10 +0100 Subject: [PATCH 086/296] dm table: Remove BUG_ON(in_interrupt()) The BUG_ON(in_interrupt()) in dm_table_event() is a historic leftover from a rework of the dm table code which changed the calling context. Issuing a BUG for a wrong calling context is frowned upon and in_interrupt() is deprecated and only covering parts of the wrong contexts. The sanity check for the context is covered by CONFIG_DEBUG_ATOMIC_SLEEP and other debug facilities already. Signed-off-by: Thomas Gleixner Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Mike Snitzer --- drivers/md/dm-table.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index ce543b761be7..2073ee8d18f4 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1247,12 +1247,6 @@ void dm_table_event_callback(struct dm_table *t, void dm_table_event(struct dm_table *t) { - /* - * You can no longer call dm_table_event() from interrupt - * context, use a bottom half instead. - */ - BUG_ON(in_interrupt()); - mutex_lock(&_event_lock); if (t->event_fn) t->event_fn(t->event_context); From 857c4c0a8b2888d806f4308c58f59a6a81a1dee9 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 13 Nov 2020 14:52:28 -0800 Subject: [PATCH 087/296] dm writecache: remove BUG() and fail gracefully instead Building on arch/s390/ results in this build error: cc1: some warnings being treated as errors ../drivers/md/dm-writecache.c: In function 'persistent_memory_claim': ../drivers/md/dm-writecache.c:323:1: error: no return statement in function returning non-void [-Werror=return-type] Fix this by replacing the BUG() with an -EOPNOTSUPP return. Fixes: 48debafe4f2f ("dm: add writecache target") Reported-by: Randy Dunlap Signed-off-by: Mike Snitzer --- drivers/md/dm-writecache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c index 7d277de26b3a..d5223a0e5cc5 100644 --- a/drivers/md/dm-writecache.c +++ b/drivers/md/dm-writecache.c @@ -319,7 +319,7 @@ err1: #else static int persistent_memory_claim(struct dm_writecache *wc) { - BUG(); + return -EOPNOTSUPP; } #endif From 0191c271debfc3d171e8b2d81875d7036982d02c Mon Sep 17 00:00:00 2001 From: Alok Prasad Date: Fri, 27 Nov 2020 16:32:51 +0000 Subject: [PATCH 088/296] RDMA/qedr: iWARP invalid(zero) doorbell address fix This patch fixes issue introduced by a previous commit where iWARP doorbell address wasn't initialized, causing call trace when any RDMA application wants to use this interface: Illegal doorbell address: 0000000000000000. Legal range for doorbell addresses is [0000000011431e08..00000000ec3799d3] WARNING: CPU: 11 PID: 11990 at drivers/net/ethernet/qlogic/qed/qed_dev.c:93 qed_db_rec_sanity.isra.12+0x48/0x70 [qed] ... hpsa scsi_transport_sas [last unloaded: crc8] CPU: 11 PID: 11990 Comm: rping Tainted: G S 5.10.0-rc1 #29 Hardware name: HP ProLiant DL380 Gen9/ProLiant DL380 Gen9, BIOS P89 01/22/2018 RIP: 0010:qed_db_rec_sanity.isra.12+0x48/0x70 [qed] ... RSP: 0018:ffffafc28458fa88 EFLAGS: 00010286 RAX: 0000000000000000 RBX: ffff8d0d4c620000 RCX: 0000000000000000 RDX: ffff8d10afde7d50 RSI: ffff8d10afdd8b40 RDI: ffff8d10afdd8b40 RBP: ffffafc28458fe38 R08: 0000000000000003 R09: 0000000000007fff R10: 0000000000000001 R11: ffffafc28458f888 R12: 0000000000000000 R13: 0000000000000000 R14: ffff8d0d43ccbbd0 R15: ffff8d0d48dae9c0 FS: 00007fbd5267e740(0000) GS:ffff8d10afdc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fbd4f258fb8 CR3: 0000000108d96003 CR4: 00000000001706e0 Call Trace: qed_db_recovery_add+0x6d/0x1f0 [qed] qedr_create_user_qp+0x57e/0xd30 [qedr] qedr_create_qp+0x5f3/0xab0 [qedr] ? lookup_get_idr_uobject.part.12+0x45/0x90 [ib_uverbs] create_qp+0x45d/0xb30 [ib_uverbs] ? ib_uverbs_cq_event_handler+0x30/0x30 [ib_uverbs] ib_uverbs_create_qp+0xb9/0xe0 [ib_uverbs] ib_uverbs_write+0x3f9/0x570 [ib_uverbs] ? security_mmap_file+0x62/0xe0 vfs_write+0xb7/0x200 ksys_write+0xaf/0xd0 ? syscall_trace_enter.isra.25+0x152/0x200 do_syscall_64+0x2d/0x40 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: 06e8d1df46ed ("RDMA/qedr: Add support for user mode XRC-SRQ's") Link: https://lore.kernel.org/r/20201127163251.14533-1-palok@marvell.com Signed-off-by: Michal Kalderon Signed-off-by: Igor Russkikh Signed-off-by: Alok Prasad Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qedr/verbs.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 019642ff24a7..511c95bb3d01 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -1936,6 +1936,15 @@ static int qedr_create_user_qp(struct qedr_dev *dev, } if (rdma_protocol_iwarp(&dev->ibdev, 1)) { + qp->urq.db_rec_db2_addr = ctx->dpi_addr + uresp.rq_db2_offset; + + /* calculate the db_rec_db2 data since it is constant so no + * need to reflect from user + */ + qp->urq.db_rec_db2_data.data.icid = cpu_to_le16(qp->icid); + qp->urq.db_rec_db2_data.data.value = + cpu_to_le16(DQ_TCM_IWARP_POST_RQ_CF_CMD); + rc = qedr_db_recovery_add(dev, qp->urq.db_rec_db2_addr, &qp->urq.db_rec_db2_data, DB_REC_WIDTH_32B, From 93416ab0f994f6cf16fa0c695577f8b19d30c533 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Tue, 1 Dec 2020 11:17:24 +0200 Subject: [PATCH 089/296] RDMA/efa: Use the correct current and new states in modify QP The local variables cur_state and new_state hold the state that should be used for the modify QP operation instead of the ones in the ib_qp_attr struct. Fixes: 40909f664d27 ("RDMA/efa: Add EFA verbs implementation") Link: https://lore.kernel.org/r/20201201091724.37016-1-galpress@amazon.com Reviewed-by: Firas JahJah Reviewed-by: Yossi Leybovich Signed-off-by: Gal Pressman Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/efa/efa_verbs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index 191e0843f090..4e940fc50bba 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -940,8 +940,8 @@ int efa_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, 1); EFA_SET(¶ms.modify_mask, EFA_ADMIN_MODIFY_QP_CMD_CUR_QP_STATE, 1); - params.cur_qp_state = qp_attr->cur_qp_state; - params.qp_state = qp_attr->qp_state; + params.cur_qp_state = cur_state; + params.qp_state = new_state; } if (qp_attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY) { From f6a8250ea1e42ad1f4f3bab01c851ec5fd48f0e7 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 30 Nov 2020 14:33:35 -0800 Subject: [PATCH 090/296] libbpf: Fix ring_buffer__poll() to return number of consumed samples Fix ring_buffer__poll() to return the number of non-discarded records consumed, just like its documentation states. It's also consistent with ring_buffer__consume() return. Fix up selftests with wrong expected results. Fixes: bf99c936f947 ("libbpf: Add BPF ring buffer support") Fixes: cb1c9ddd5525 ("selftests/bpf: Add BPF ringbuf selftests") Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20201130223336.904192-1-andrii@kernel.org --- tools/lib/bpf/ringbuf.c | 2 +- tools/testing/selftests/bpf/prog_tests/ringbuf.c | 2 +- tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c index 5c6522c89af1..98537ff2679e 100644 --- a/tools/lib/bpf/ringbuf.c +++ b/tools/lib/bpf/ringbuf.c @@ -278,7 +278,7 @@ int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms) err = ringbuf_process_ring(ring); if (err < 0) return err; - res += cnt; + res += err; } return cnt < 0 ? -errno : res; } diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c index c1650548433c..1a48c6f7f54e 100644 --- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c +++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c @@ -217,7 +217,7 @@ void test_ringbuf(void) if (CHECK(err, "join_bg", "err %d\n", err)) goto cleanup; - if (CHECK(bg_ret != 1, "bg_ret", "epoll_wait result: %ld", bg_ret)) + if (CHECK(bg_ret <= 0, "bg_ret", "epoll_wait result: %ld", bg_ret)) goto cleanup; /* 3 rounds, 2 samples each */ diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c index 78e450609803..d37161e59bb2 100644 --- a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c +++ b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c @@ -81,7 +81,7 @@ void test_ringbuf_multi(void) /* poll for samples, should get 2 ringbufs back */ err = ring_buffer__poll(ringbuf, -1); - if (CHECK(err != 4, "poll_res", "expected 4 records, got %d\n", err)) + if (CHECK(err != 2, "poll_res", "expected 2 records, got %d\n", err)) goto cleanup; /* expect extra polling to return nothing */ From 156c9b70dbfb83eeeff39e9202eb5f8bb6d0fd04 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 30 Nov 2020 14:33:36 -0800 Subject: [PATCH 091/296] selftests/bpf: Drain ringbuf samples at the end of test Avoid occasional test failures due to the last sample being delayed to another ring_buffer__poll() call. Instead, drain samples completely with ring_buffer__consume(). This is supposed to fix a rare and non-deterministic test failure in libbpf CI. Fixes: cb1c9ddd5525 ("selftests/bpf: Add BPF ringbuf selftests") Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20201130223336.904192-2-andrii@kernel.org --- tools/testing/selftests/bpf/prog_tests/ringbuf.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c index 1a48c6f7f54e..fddbc5db5d6a 100644 --- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c +++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c @@ -220,6 +220,12 @@ void test_ringbuf(void) if (CHECK(bg_ret <= 0, "bg_ret", "epoll_wait result: %ld", bg_ret)) goto cleanup; + /* due to timing variations, there could still be non-notified + * samples, so consume them here to collect all the samples + */ + err = ring_buffer__consume(ringbuf); + CHECK(err < 0, "rb_consume", "failed: %d\b", err); + /* 3 rounds, 2 samples each */ cnt = atomic_xchg(&sample_cnt, 0); CHECK(cnt != 6, "cnt", "exp %d samples, got %d\n", 6, cnt); From 0ac22098de6f9fd460ac1b1cbf6dbd324312161c Mon Sep 17 00:00:00 2001 From: Zheng Liang Date: Tue, 24 Nov 2020 11:06:06 +0800 Subject: [PATCH 092/296] gpio: arizona: disable pm_runtime in case of failure pm_runtime_enable will increase power disable depth. Thus a pairing decrement is needed on the error handling path to keep it balanced. Fixes:27a49ed17e224(gpio: arizona: Add support for GPIOs that) Reported-by: Hulk Robot Signed-off-by: Zheng Liang Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-arizona.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpio/gpio-arizona.c b/drivers/gpio/gpio-arizona.c index 5bda38e0780f..2bc173c352ce 100644 --- a/drivers/gpio/gpio-arizona.c +++ b/drivers/gpio/gpio-arizona.c @@ -192,6 +192,7 @@ static int arizona_gpio_probe(struct platform_device *pdev) ret = devm_gpiochip_add_data(&pdev->dev, &arizona_gpio->gpio_chip, arizona_gpio); if (ret < 0) { + pm_runtime_disable(&pdev->dev); dev_err(&pdev->dev, "Could not register gpiochip, %d\n", ret); return ret; From 60593df667e087b009ee0fc20d92e9c4c096a9b5 Mon Sep 17 00:00:00 2001 From: Luo Jiaxing Date: Fri, 27 Nov 2020 16:50:02 +0800 Subject: [PATCH 093/296] gpio: dwapb: fix NULL pointer dereference at dwapb_gpio_suspend() Following Calltrace is found when running echo freeze > /sys/power/state. [ 272.755506] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000010 [ 272.755585] Call trace: [ 272.755587] dwapb_gpio_suspend+0x18/0x318 [ 272.755588] pm_generic_suspend+0x2c/0x48 [ 272.755595] acpi_subsys_suspend+0x60/0x70 [ 272.755599] dpm_run_callback.isra.18+0x40/0xe0 [ 272.755601] __device_suspend+0xf4/0x360 The reason is platform_set_drvdata() is deleted, and dwapb_gpio_suspend() get *gpio by dev_get_drvdata(). Fixes: feeaefd378ca ("gpio: dwapb: Use resource managed GPIO-chip add data method") Signed-off-by: Luo Jiaxing Acked-by: Serge Semin Reviewed-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-dwapb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpio/gpio-dwapb.c b/drivers/gpio/gpio-dwapb.c index 2a9046c0fb16..4275c18a097a 100644 --- a/drivers/gpio/gpio-dwapb.c +++ b/drivers/gpio/gpio-dwapb.c @@ -724,6 +724,8 @@ static int dwapb_gpio_probe(struct platform_device *pdev) return err; } + platform_set_drvdata(pdev, gpio); + return 0; } From 6dbbf84603961d4e8eaea46e3530373c8cffee67 Mon Sep 17 00:00:00 2001 From: Edmond Chung Date: Mon, 30 Nov 2020 22:47:53 +0000 Subject: [PATCH 094/296] gpiolib: Don't free if pin ranges are not defined A similar check was added in gpiochip_generic_request, but not in free. This has caused an imbalance count of request vs. free calls to the pinctrl driver. This patch is targeted to fix that issue. Fixes: 2ab73c6d8323 ("gpio: Support GPIO controllers without pin-ranges") Signed-off-by: Edmond Chung Signed-off-by: Andrew Chant Signed-off-by: Will McVicker Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 089ddcaa9bc6..6e3c4d7a7d14 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1806,6 +1806,11 @@ EXPORT_SYMBOL_GPL(gpiochip_generic_request); */ void gpiochip_generic_free(struct gpio_chip *gc, unsigned offset) { +#ifdef CONFIG_PINCTRL + if (list_empty(&gc->gpiodev->pin_ranges)) + return; +#endif + pinctrl_gpio_free(gc->gpiodev->base + offset); } EXPORT_SYMBOL_GPL(gpiochip_generic_free); From 7f57b295f990c0fa07f96d51ca1c82c52dbf79cc Mon Sep 17 00:00:00 2001 From: Qinglang Miao Date: Fri, 27 Nov 2020 17:44:45 +0800 Subject: [PATCH 095/296] gpio: zynq: fix reference leak in zynq_gpio functions pm_runtime_get_sync will increment pm usage counter even it failed. Forgetting to putting operation will result in a reference leak here. A new function pm_runtime_resume_and_get is introduced in [0] to keep usage counter balanced. So We fix the reference leak by replacing it with new funtion. [0] dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter") Fixes: c2df3de0d07e ("gpio: zynq: properly support runtime PM for GPIO used as interrupts") Reported-by: Hulk Robot Signed-off-by: Qinglang Miao Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-zynq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpio-zynq.c b/drivers/gpio/gpio-zynq.c index 0b5a17ab996f..3521c1dc3ac0 100644 --- a/drivers/gpio/gpio-zynq.c +++ b/drivers/gpio/gpio-zynq.c @@ -574,7 +574,7 @@ static int zynq_gpio_irq_reqres(struct irq_data *d) struct gpio_chip *chip = irq_data_get_irq_chip_data(d); int ret; - ret = pm_runtime_get_sync(chip->parent); + ret = pm_runtime_resume_and_get(chip->parent); if (ret < 0) return ret; @@ -942,7 +942,7 @@ static int zynq_gpio_probe(struct platform_device *pdev) pm_runtime_set_active(&pdev->dev); pm_runtime_enable(&pdev->dev); - ret = pm_runtime_get_sync(&pdev->dev); + ret = pm_runtime_resume_and_get(&pdev->dev); if (ret < 0) goto err_pm_dis; From 7ee1a01e47403f72b9f38839a737692f6991263e Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Wed, 2 Dec 2020 09:15:32 +0200 Subject: [PATCH 096/296] gpio: mvebu: fix potential user-after-free on probe When mvebu_pwm_probe() fails IRQ domain is not released. Move pwm probe before IRQ domain allocation. Add pwm cleanup code to the failure path. Fixes: 757642f9a584 ("gpio: mvebu: Add limited PWM support") Reported-by: Andrew Lunn Signed-off-by: Baruch Siach Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-mvebu.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/gpio/gpio-mvebu.c b/drivers/gpio/gpio-mvebu.c index 433e2c3f3fd5..2f245594a90a 100644 --- a/drivers/gpio/gpio-mvebu.c +++ b/drivers/gpio/gpio-mvebu.c @@ -1197,6 +1197,13 @@ static int mvebu_gpio_probe(struct platform_device *pdev) devm_gpiochip_add_data(&pdev->dev, &mvchip->chip, mvchip); + /* Some MVEBU SoCs have simple PWM support for GPIO lines */ + if (IS_ENABLED(CONFIG_PWM)) { + err = mvebu_pwm_probe(pdev, mvchip, id); + if (err) + return err; + } + /* Some gpio controllers do not provide irq support */ if (!have_irqs) return 0; @@ -1206,7 +1213,8 @@ static int mvebu_gpio_probe(struct platform_device *pdev) if (!mvchip->domain) { dev_err(&pdev->dev, "couldn't allocate irq domain %s (DT).\n", mvchip->chip.label); - return -ENODEV; + err = -ENODEV; + goto err_pwm; } err = irq_alloc_domain_generic_chips( @@ -1254,14 +1262,12 @@ static int mvebu_gpio_probe(struct platform_device *pdev) mvchip); } - /* Some MVEBU SoCs have simple PWM support for GPIO lines */ - if (IS_ENABLED(CONFIG_PWM)) - return mvebu_pwm_probe(pdev, mvchip, id); - return 0; err_domain: irq_domain_remove(mvchip->domain); +err_pwm: + pwmchip_remove(&mvchip->mvpwm->chip); return err; } From bac63f1239aceb092e94fee3ef112dbaea79b372 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Mon, 30 Nov 2020 16:00:54 +0100 Subject: [PATCH 097/296] media: vb2: set cache sync hints when init buffers We need to set ->need_cache_sync_on_prepare and ->need_cache_sync_on_finish when we initialize vb2 buffer. Currently these flags are set/adjusted only in V4L2's vb2_queue_or_prepare_buf(), which means that for the code paths that don't use V4L2 vb2 will always tell videobuf2 core to skip ->prepare() and ->finish() cache syncs/flushes. Fix this by setting cache sync hints for new buffers; except VB2_MEMORY_DMABUF buffers, for which DMA exporter syncs caches. Fixes: f5f5fa73fbfb ("media: videobuf2: handle V4L2 buffer cache flags") Reported-by: Tomasz Figa Signed-off-by: Sergey Senozhatsky Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/common/videobuf2/videobuf2-core.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/media/common/videobuf2/videobuf2-core.c b/drivers/media/common/videobuf2/videobuf2-core.c index 4eab6d81cce1..89e38392509c 100644 --- a/drivers/media/common/videobuf2/videobuf2-core.c +++ b/drivers/media/common/videobuf2/videobuf2-core.c @@ -414,6 +414,17 @@ static int __vb2_queue_alloc(struct vb2_queue *q, enum vb2_memory memory, vb->index = q->num_buffers + buffer; vb->type = q->type; vb->memory = memory; + /* + * We need to set these flags here so that the videobuf2 core + * will call ->prepare()/->finish() cache sync/flush on vb2 + * buffers when appropriate. However, we can avoid explicit + * ->prepare() and ->finish() cache sync for DMABUF buffers, + * because DMA exporter takes care of it. + */ + if (q->memory != VB2_MEMORY_DMABUF) { + vb->need_cache_sync_on_prepare = 1; + vb->need_cache_sync_on_finish = 1; + } for (plane = 0; plane < num_planes; ++plane) { vb->planes[plane].length = plane_sizes[plane]; vb->planes[plane].min_length = plane_sizes[plane]; From 3c0dde35e6aee456abc8d8549ff7ee0963274214 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 27 Nov 2020 14:28:32 +0100 Subject: [PATCH 098/296] media: [next] media: vidtv: fix a read from an object after it has been freed Currently the call to vidtv_psi_pat_table_destroy frees the object m->si.pat however m->si.pat->num_pmt is being accessed after the free. Fix this by destroying m->si.pat after the m->si.pmt_secs[] objects have been freed. Addresses-Coverity: ("Read from pointer after free") Reported-by: Cengiz Can # sent a similar fix about the same time Fixes: 039b7caed173 ("media: vidtv: add a PID entry for the NIT table") Signed-off-by: Colin Ian King Signed-off-by: Mauro Carvalho Chehab --- drivers/media/test-drivers/vidtv/vidtv_channel.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/media/test-drivers/vidtv/vidtv_channel.c b/drivers/media/test-drivers/vidtv/vidtv_channel.c index 8ad6c0744d36..7838e6272712 100644 --- a/drivers/media/test-drivers/vidtv/vidtv_channel.c +++ b/drivers/media/test-drivers/vidtv/vidtv_channel.c @@ -504,11 +504,11 @@ void vidtv_channel_si_destroy(struct vidtv_mux *m) { u32 i; - vidtv_psi_pat_table_destroy(m->si.pat); - for (i = 0; i < m->si.pat->num_pmt; ++i) vidtv_psi_pmt_table_destroy(m->si.pmt_secs[i]); + vidtv_psi_pat_table_destroy(m->si.pat); + kfree(m->si.pmt_secs); vidtv_psi_sdt_table_destroy(m->si.sdt); vidtv_psi_nit_table_destroy(m->si.nit); From a1ee28117077c3bf24e5ab6324c835eaab629c45 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 28 Nov 2020 17:07:21 +1000 Subject: [PATCH 099/296] powerpc/64s/powernv: Fix memory corruption when saving SLB entries on MCE This can be hit by an HPT guest running on an HPT host and bring down the host, so it's quite important to fix. Fixes: 7290f3b3d3e6 ("powerpc/64s/powernv: machine check dump SLB contents") Cc: stable@vger.kernel.org # v5.4+ Signed-off-by: Nicholas Piggin Acked-by: Mahesh Salgaonkar Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201128070728.825934-2-npiggin@gmail.com --- arch/powerpc/platforms/powernv/setup.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 46115231a3b2..4426a109ec2f 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -211,11 +211,16 @@ static void __init pnv_init(void) add_preferred_console("hvc", 0, NULL); if (!radix_enabled()) { + size_t size = sizeof(struct slb_entry) * mmu_slb_size; int i; /* Allocate per cpu area to save old slb contents during MCE */ - for_each_possible_cpu(i) - paca_ptrs[i]->mce_faulty_slbs = memblock_alloc_node(mmu_slb_size, __alignof__(*paca_ptrs[i]->mce_faulty_slbs), cpu_to_node(i)); + for_each_possible_cpu(i) { + paca_ptrs[i]->mce_faulty_slbs = + memblock_alloc_node(size, + __alignof__(struct slb_entry), + cpu_to_node(i)); + } } } From cc2ff9730e0fe649ebf064632c9d0e04218e2f25 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 2 Dec 2020 08:51:28 +0100 Subject: [PATCH 100/296] media: vidtv: fix kernel-doc markups Some functions has a different name between their prototypes and the corresponding kernel-doc markups. Signed-off-by: Mauro Carvalho Chehab --- drivers/media/test-drivers/vidtv/vidtv_psi.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/media/test-drivers/vidtv/vidtv_psi.h b/drivers/media/test-drivers/vidtv/vidtv_psi.h index 340c9fb8d583..6651cc91bda1 100644 --- a/drivers/media/test-drivers/vidtv/vidtv_psi.h +++ b/drivers/media/test-drivers/vidtv/vidtv_psi.h @@ -420,7 +420,7 @@ void vidtv_psi_desc_assign(struct vidtv_psi_desc **to, struct vidtv_psi_desc *desc); /** - * vidtv_psi_pmt_desc_assign - Assigns a descriptor loop at some point in a PMT section. + * vidtv_pmt_desc_assign - Assigns a descriptor loop at some point in a PMT section. * @pmt: The PMT section that will contain the descriptor loop * @to: Where in the PMT to assign this descriptor loop to * @desc: The descriptor loop that will be assigned. @@ -434,7 +434,7 @@ void vidtv_pmt_desc_assign(struct vidtv_psi_table_pmt *pmt, struct vidtv_psi_desc *desc); /** - * vidtv_psi_sdt_desc_assign - Assigns a descriptor loop at some point in a SDT. + * vidtv_sdt_desc_assign - Assigns a descriptor loop at some point in a SDT. * @sdt: The SDT that will contain the descriptor loop * @to: Where in the PMT to assign this descriptor loop to * @desc: The descriptor loop that will be assigned. @@ -474,7 +474,7 @@ void vidtv_psi_pmt_stream_assign(struct vidtv_psi_table_pmt *pmt, struct vidtv_psi_desc *vidtv_psi_desc_clone(struct vidtv_psi_desc *desc); /** - * vidtv_psi_create_sec_for_each_pat_entry - Create a PMT section for each + * vidtv_psi_pmt_create_sec_for_each_pat_entry - Create a PMT section for each * program found in the PAT * @pat: The PAT to look for programs. * @pcr_pid: packet ID for the PCR to be used for the program described in this From e9acf0298c664f825e6f1158f2a97341bf9e03ca Mon Sep 17 00:00:00 2001 From: Zhihao Cheng Date: Mon, 16 Nov 2020 22:10:58 +0800 Subject: [PATCH 101/296] i2c: qup: Fix error return code in qup_i2c_bam_schedule_desc() Fix to return the error code from qup_i2c_change_state() instaed of 0 in qup_i2c_bam_schedule_desc(). Fixes: fbf9921f8b35d9b2 ("i2c: qup: Fix error handling") Reported-by: Hulk Robot Signed-off-by: Zhihao Cheng Reviewed-by: Bjorn Andersson Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-qup.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-qup.c b/drivers/i2c/busses/i2c-qup.c index fbc04b60cfd1..5a47915869ae 100644 --- a/drivers/i2c/busses/i2c-qup.c +++ b/drivers/i2c/busses/i2c-qup.c @@ -801,7 +801,8 @@ static int qup_i2c_bam_schedule_desc(struct qup_i2c_dev *qup) if (ret || qup->bus_err || qup->qup_err) { reinit_completion(&qup->xfer); - if (qup_i2c_change_state(qup, QUP_RUN_STATE)) { + ret = qup_i2c_change_state(qup, QUP_RUN_STATE); + if (ret) { dev_err(qup->dev, "change to run state timed out"); goto desc_err; } From 14718b3e129b058cb716a60c6faf40ef68661c54 Mon Sep 17 00:00:00 2001 From: Robert Foss Date: Mon, 30 Nov 2020 11:04:45 +0100 Subject: [PATCH 102/296] i2c: qcom: Fix IRQ error misassignement During cci_isr() errors read from register fields belonging to i2c master1 are currently assigned to the status field belonging to i2c master0. This patch corrects this error, and always assigns master1 errors to the status field of master1. Fixes: e517526195de ("i2c: Add Qualcomm CCI I2C driver") Reported-by: Loic Poulain Suggested-by: Loic Poulain Signed-off-by: Robert Foss Reviewed-by: Manivannan Sadhasivam Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-qcom-cci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-qcom-cci.c b/drivers/i2c/busses/i2c-qcom-cci.c index f13735beca58..1c259b5188de 100644 --- a/drivers/i2c/busses/i2c-qcom-cci.c +++ b/drivers/i2c/busses/i2c-qcom-cci.c @@ -194,9 +194,9 @@ static irqreturn_t cci_isr(int irq, void *dev) if (unlikely(val & CCI_IRQ_STATUS_0_I2C_M1_ERROR)) { if (val & CCI_IRQ_STATUS_0_I2C_M1_Q0_NACK_ERR || val & CCI_IRQ_STATUS_0_I2C_M1_Q1_NACK_ERR) - cci->master[0].status = -ENXIO; + cci->master[1].status = -ENXIO; else - cci->master[0].status = -EIO; + cci->master[1].status = -EIO; writel(CCI_HALT_REQ_I2C_M1_Q0Q1, cci->base + CCI_HALT_REQ); ret = IRQ_HANDLED; From 384a9565f70a876c2e78e58c5ca0bbf0547e4f6d Mon Sep 17 00:00:00 2001 From: Christian Eggers Date: Fri, 9 Oct 2020 13:03:18 +0200 Subject: [PATCH 103/296] i2c: imx: Fix reset of I2SR_IAL flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to the "VFxxx Controller Reference Manual" (and the comment block starting at line 97), Vybrid requires writing a one for clearing an interrupt flag. Syncing the method for clearing I2SR_IIF in i2c_imx_isr(). Signed-off-by: Christian Eggers Fixes: 4b775022f6fd ("i2c: imx: add struct to hold more configurable quirks") Reviewed-by: Uwe Kleine-König Acked-by: Oleksij Rempel Cc: stable@vger.kernel.org Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-imx.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c index c98529c76348..39e98d216016 100644 --- a/drivers/i2c/busses/i2c-imx.c +++ b/drivers/i2c/busses/i2c-imx.c @@ -412,6 +412,19 @@ static void i2c_imx_dma_free(struct imx_i2c_struct *i2c_imx) dma->chan_using = NULL; } +static void i2c_imx_clear_irq(struct imx_i2c_struct *i2c_imx, unsigned int bits) +{ + unsigned int temp; + + /* + * i2sr_clr_opcode is the value to clear all interrupts. Here we want to + * clear only , so we write ~i2sr_clr_opcode with just + * toggled. This is required because i.MX needs W0C and Vybrid uses W1C. + */ + temp = ~i2c_imx->hwdata->i2sr_clr_opcode ^ bits; + imx_i2c_write_reg(temp, i2c_imx, IMX_I2C_I2SR); +} + static int i2c_imx_bus_busy(struct imx_i2c_struct *i2c_imx, int for_busy, bool atomic) { unsigned long orig_jiffies = jiffies; @@ -424,8 +437,7 @@ static int i2c_imx_bus_busy(struct imx_i2c_struct *i2c_imx, int for_busy, bool a /* check for arbitration lost */ if (temp & I2SR_IAL) { - temp &= ~I2SR_IAL; - imx_i2c_write_reg(temp, i2c_imx, IMX_I2C_I2SR); + i2c_imx_clear_irq(i2c_imx, I2SR_IAL); return -EAGAIN; } @@ -469,7 +481,7 @@ static int i2c_imx_trx_complete(struct imx_i2c_struct *i2c_imx, bool atomic) */ readb_poll_timeout_atomic(addr, regval, regval & I2SR_IIF, 5, 1000 + 100); i2c_imx->i2csr = regval; - imx_i2c_write_reg(0, i2c_imx, IMX_I2C_I2SR); + i2c_imx_clear_irq(i2c_imx, I2SR_IIF | I2SR_IAL); } else { wait_event_timeout(i2c_imx->queue, i2c_imx->i2csr & I2SR_IIF, HZ / 10); } @@ -623,9 +635,7 @@ static irqreturn_t i2c_imx_isr(int irq, void *dev_id) if (temp & I2SR_IIF) { /* save status register */ i2c_imx->i2csr = temp; - temp &= ~I2SR_IIF; - temp |= (i2c_imx->hwdata->i2sr_clr_opcode & I2SR_IIF); - imx_i2c_write_reg(temp, i2c_imx, IMX_I2C_I2SR); + i2c_imx_clear_irq(i2c_imx, I2SR_IIF); wake_up(&i2c_imx->queue); return IRQ_HANDLED; } From 1de67a3dee7a279ebe4d892b359fe3696938ec15 Mon Sep 17 00:00:00 2001 From: Christian Eggers Date: Fri, 9 Oct 2020 13:03:19 +0200 Subject: [PATCH 104/296] i2c: imx: Check for I2SR_IAL after every byte Arbitration Lost (IAL) can happen after every single byte transfer. If arbitration is lost, the I2C hardware will autonomously switch from master mode to slave. If a transfer is not aborted in this state, consecutive transfers will not be executed by the hardware and will timeout. Signed-off-by: Christian Eggers Tested (not extensively) on Vybrid VF500 (Toradex VF50): Tested-by: Krzysztof Kozlowski Acked-by: Oleksij Rempel Cc: stable@vger.kernel.org Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-imx.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c index 39e98d216016..a2abae124342 100644 --- a/drivers/i2c/busses/i2c-imx.c +++ b/drivers/i2c/busses/i2c-imx.c @@ -490,6 +490,16 @@ static int i2c_imx_trx_complete(struct imx_i2c_struct *i2c_imx, bool atomic) dev_dbg(&i2c_imx->adapter.dev, "<%s> Timeout\n", __func__); return -ETIMEDOUT; } + + /* check for arbitration lost */ + if (i2c_imx->i2csr & I2SR_IAL) { + dev_dbg(&i2c_imx->adapter.dev, "<%s> Arbitration lost\n", __func__); + i2c_imx_clear_irq(i2c_imx, I2SR_IAL); + + i2c_imx->i2csr = 0; + return -EAGAIN; + } + dev_dbg(&i2c_imx->adapter.dev, "<%s> TRX complete\n", __func__); i2c_imx->i2csr = 0; return 0; From 61e6fe59ede155881a622f5901551b1cc8748f6a Mon Sep 17 00:00:00 2001 From: Christian Eggers Date: Fri, 9 Oct 2020 13:03:20 +0200 Subject: [PATCH 105/296] i2c: imx: Don't generate STOP condition if arbitration has been lost If arbitration is lost, the master automatically changes to slave mode. I2SR_IBB may or may not be reset by hardware. Raising a STOP condition by resetting I2CR_MSTA has no effect and will not clear I2SR_IBB. So calling i2c_imx_bus_busy() is not required and would busy-wait until timeout. Signed-off-by: Christian Eggers Tested (not extensively) on Vybrid VF500 (Toradex VF50): Tested-by: Krzysztof Kozlowski Acked-by: Oleksij Rempel Cc: stable@vger.kernel.org # Requires trivial backporting, simple remove # the 3rd argument from the calls to # i2c_imx_bus_busy(). Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-imx.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c index a2abae124342..e6f8d6e45a15 100644 --- a/drivers/i2c/busses/i2c-imx.c +++ b/drivers/i2c/busses/i2c-imx.c @@ -615,6 +615,8 @@ static void i2c_imx_stop(struct imx_i2c_struct *i2c_imx, bool atomic) /* Stop I2C transaction */ dev_dbg(&i2c_imx->adapter.dev, "<%s>\n", __func__); temp = imx_i2c_read_reg(i2c_imx, IMX_I2C_I2CR); + if (!(temp & I2CR_MSTA)) + i2c_imx->stopped = 1; temp &= ~(I2CR_MSTA | I2CR_MTX); if (i2c_imx->dma) temp &= ~I2CR_DMAEN; @@ -778,9 +780,12 @@ static int i2c_imx_dma_read(struct imx_i2c_struct *i2c_imx, */ dev_dbg(dev, "<%s> clear MSTA\n", __func__); temp = imx_i2c_read_reg(i2c_imx, IMX_I2C_I2CR); + if (!(temp & I2CR_MSTA)) + i2c_imx->stopped = 1; temp &= ~(I2CR_MSTA | I2CR_MTX); imx_i2c_write_reg(temp, i2c_imx, IMX_I2C_I2CR); - i2c_imx_bus_busy(i2c_imx, 0, false); + if (!i2c_imx->stopped) + i2c_imx_bus_busy(i2c_imx, 0, false); } else { /* * For i2c master receiver repeat restart operation like: @@ -905,9 +910,12 @@ static int i2c_imx_read(struct imx_i2c_struct *i2c_imx, struct i2c_msg *msgs, dev_dbg(&i2c_imx->adapter.dev, "<%s> clear MSTA\n", __func__); temp = imx_i2c_read_reg(i2c_imx, IMX_I2C_I2CR); + if (!(temp & I2CR_MSTA)) + i2c_imx->stopped = 1; temp &= ~(I2CR_MSTA | I2CR_MTX); imx_i2c_write_reg(temp, i2c_imx, IMX_I2C_I2CR); - i2c_imx_bus_busy(i2c_imx, 0, atomic); + if (!i2c_imx->stopped) + i2c_imx_bus_busy(i2c_imx, 0, atomic); } else { /* * For i2c master receiver repeat restart operation like: From f5da54187e33dce9bea63170667dbb0ca8d98194 Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Tue, 1 Dec 2020 21:56:57 +0800 Subject: [PATCH 106/296] xsk: Replace datagram_poll by sock_poll_wait datagram_poll will judge the current socket status (EPOLLIN, EPOLLOUT) based on the traditional socket information (eg: sk_wmem_alloc), but this does not apply to xsk. So this patch uses sock_poll_wait instead of datagram_poll, and the mask is calculated by xsk_poll. Fixes: c497176cb2e4 ("xsk: add Rx receive functions and poll support") Signed-off-by: Xuan Zhuo Signed-off-by: Daniel Borkmann Acked-by: Magnus Karlsson Link: https://lore.kernel.org/bpf/e82f4697438cd63edbf271ebe1918db8261b7c09.1606555939.git.xuanzhuo@linux.alibaba.com --- net/xdp/xsk.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index b7b039bd9d03..9bbfd8adbb73 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -471,11 +471,13 @@ static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) static __poll_t xsk_poll(struct file *file, struct socket *sock, struct poll_table_struct *wait) { - __poll_t mask = datagram_poll(file, sock, wait); + __poll_t mask = 0; struct sock *sk = sock->sk; struct xdp_sock *xs = xdp_sk(sk); struct xsk_buff_pool *pool; + sock_poll_wait(file, sock, wait); + if (unlikely(!xsk_is_bound(xs))) return mask; From 3413f04141aa440c71da187755e8e22f5093ce83 Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Tue, 1 Dec 2020 21:56:58 +0800 Subject: [PATCH 107/296] xsk: Change the tx writeable condition Modify the tx writeable condition from the queue is not full to the number of present tx queues is less than the half of the total number of queues. Because the tx queue not full is a very short time, this will cause a large number of EPOLLOUT events, and cause a large number of process wake up. Fixes: 35fcde7f8deb ("xsk: support for Tx") Signed-off-by: Xuan Zhuo Signed-off-by: Daniel Borkmann Acked-by: Magnus Karlsson Link: https://lore.kernel.org/bpf/508fef55188d4e1160747ead64c6dcda36735880.1606555939.git.xuanzhuo@linux.alibaba.com --- net/xdp/xsk.c | 16 +++++++++++++--- net/xdp/xsk_queue.h | 6 ++++++ 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 9bbfd8adbb73..62504471fd20 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -211,6 +211,14 @@ static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len, return 0; } +static bool xsk_tx_writeable(struct xdp_sock *xs) +{ + if (xskq_cons_present_entries(xs->tx) > xs->tx->nentries / 2) + return false; + + return true; +} + static bool xsk_is_bound(struct xdp_sock *xs) { if (READ_ONCE(xs->state) == XSK_BOUND) { @@ -296,7 +304,8 @@ void xsk_tx_release(struct xsk_buff_pool *pool) rcu_read_lock(); list_for_each_entry_rcu(xs, &pool->xsk_tx_list, tx_list) { __xskq_cons_release(xs->tx); - xs->sk.sk_write_space(&xs->sk); + if (xsk_tx_writeable(xs)) + xs->sk.sk_write_space(&xs->sk); } rcu_read_unlock(); } @@ -436,7 +445,8 @@ static int xsk_generic_xmit(struct sock *sk) out: if (sent_frame) - sk->sk_write_space(sk); + if (xsk_tx_writeable(xs)) + sk->sk_write_space(sk); mutex_unlock(&xs->mutex); return err; @@ -493,7 +503,7 @@ static __poll_t xsk_poll(struct file *file, struct socket *sock, if (xs->rx && !xskq_prod_is_empty(xs->rx)) mask |= EPOLLIN | EPOLLRDNORM; - if (xs->tx && !xskq_cons_is_full(xs->tx)) + if (xs->tx && xsk_tx_writeable(xs)) mask |= EPOLLOUT | EPOLLWRNORM; return mask; diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h index cdb9cf3cd136..9e71b9f27679 100644 --- a/net/xdp/xsk_queue.h +++ b/net/xdp/xsk_queue.h @@ -264,6 +264,12 @@ static inline bool xskq_cons_is_full(struct xsk_queue *q) q->nentries; } +static inline u32 xskq_cons_present_entries(struct xsk_queue *q) +{ + /* No barriers needed since data is not accessed */ + return READ_ONCE(q->ring->producer) - READ_ONCE(q->ring->consumer); +} + /* Functions for producers */ static inline bool xskq_prod_is_full(struct xsk_queue *q) From 9261a1db80bc81dd445cd6dcfb466b632ad9faa8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 26 Nov 2020 14:04:05 +0000 Subject: [PATCH 108/296] drm/i915/gt: Protect context lifetime with RCU Allow a brief period for continued access to a dead intel_context by deferring the release of the struct until after an RCU grace period. As we are using a dedicated slab cache for the contexts, we can defer the release of the slab pages via RCU, with the caveat that individual structs may be reused from the freelist within an RCU grace period. To handle that, we have to avoid clearing members of the zombie struct. This is required for a later patch to handle locking around virtual requests in the signaler, as those requests may want to move between engines and be destroyed while we are holding b->irq_lock on a physical engine. v2: Drop mutex_reinit(), if we never mark the mutex as destroyed we don't need to reset the debug code, at the loss of having the mutex debug code spot us attempting to destroy a locked mutex. v3: As the intended use will remain strongly referenced counted, with very little inflight access across reuse, drop the ctor. v4: Drop the unrequired change to remove the temporary reference around dropping the active context, and add back some more missing ctor operations. v5: The ctor is back. Tvrtko spotted that ce->signal_lock [introduced later] maybe accessed under RCU and so needs special care not to be reinitialised. v6: Don't mix SLAB_TYPESAFE_BY_RCU and RCU list iteration. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20201126140407.31952-3-chris@chris-wilson.co.uk (cherry picked from commit 14d1eaf08845c534963c83f754afe0cb14cb2512) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_context.c | 12 +++++++++--- drivers/gpu/drm/i915/gt/intel_context_types.h | 11 ++++++++++- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 92a3f25c4006..d3a835212167 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -25,9 +25,16 @@ static struct intel_context *intel_context_alloc(void) return kmem_cache_zalloc(global.slab_ce, GFP_KERNEL); } +static void rcu_context_free(struct rcu_head *rcu) +{ + struct intel_context *ce = container_of(rcu, typeof(*ce), rcu); + + kmem_cache_free(global.slab_ce, ce); +} + void intel_context_free(struct intel_context *ce) { - kmem_cache_free(global.slab_ce, ce); + call_rcu(&ce->rcu, rcu_context_free); } struct intel_context * @@ -356,8 +363,7 @@ static int __intel_context_active(struct i915_active *active) } void -intel_context_init(struct intel_context *ce, - struct intel_engine_cs *engine) +intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine) { GEM_BUG_ON(!engine->cops); GEM_BUG_ON(!engine->gt->vm); diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 552cb57a2e8c..20cb5835d1c3 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -44,7 +44,16 @@ struct intel_context_ops { }; struct intel_context { - struct kref ref; + /* + * Note: Some fields may be accessed under RCU. + * + * Unless otherwise noted a field can safely be assumed to be protected + * by strong reference counting. + */ + union { + struct kref ref; /* no kref_get_unless_zero()! */ + struct rcu_head rcu; + }; struct intel_engine_cs *engine; struct intel_engine_cs *inflight; From 2bfdf302465a5eab941e551e2869a96bb473f66f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 26 Nov 2020 14:04:06 +0000 Subject: [PATCH 109/296] drm/i915/gt: Split the breadcrumb spinlock between global and contexts As we funnel more and more contexts into the breadcrumbs on an engine, the hold time of b->irq_lock grows. As we may then contend with the b->irq_lock during request submission, this increases the burden upon the engine->active.lock and so directly impacts both our execution latency and client latency. If we split the b->irq_lock by introducing a per-context spinlock to manage the signalers within a context, we then only need the b->irq_lock for enabling/disabling the interrupt and can avoid taking the lock for walking the list of contexts within the signal worker. Even with the current setup, this greatly reduces the number of times we have to take and fight for b->irq_lock. Furthermore, this closes the race between enabling the signaling context while it is in the process of being signaled and removed: <4>[ 416.208555] list_add corruption. prev->next should be next (ffff8881951d5910), but was dead000000000100. (prev=ffff8882781bb870). <4>[ 416.208573] WARNING: CPU: 7 PID: 0 at lib/list_debug.c:28 __list_add_valid+0x4d/0x70 <4>[ 416.208575] Modules linked in: i915(+) vgem snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic ledtrig_audio mei_hdcp x86_pkg_temp_thermal coretemp ax88179_178a usbnet mii crct10dif_pclmul snd_intel_dspcfg crc32_pclmul snd_hda_codec snd_hwdep ghash_clmulni_intel snd_hda_core e1000e snd_pcm ptp pps_core mei_me mei prime_numbers intel_lpss_pci [last unloaded: i915] <4>[ 416.208611] CPU: 7 PID: 0 Comm: swapper/7 Tainted: G U 5.8.0-CI-CI_DRM_8852+ #1 <4>[ 416.208614] Hardware name: Intel Corporation Ice Lake Client Platform/IceLake Y LPDDR4x T4 RVP TLC, BIOS ICLSFWR1.R00.3212.A00.1905212112 05/21/2019 <4>[ 416.208627] RIP: 0010:__list_add_valid+0x4d/0x70 <4>[ 416.208631] Code: c3 48 89 d1 48 c7 c7 60 18 33 82 48 89 c2 e8 ea e0 b6 ff 0f 0b 31 c0 c3 48 89 c1 4c 89 c6 48 c7 c7 b0 18 33 82 e8 d3 e0 b6 ff <0f> 0b 31 c0 c3 48 89 f2 4c 89 c1 48 89 fe 48 c7 c7 00 19 33 82 e8 <4>[ 416.208633] RSP: 0018:ffffc90000280e18 EFLAGS: 00010086 <4>[ 416.208636] RAX: 0000000000000000 RBX: ffff888250a44880 RCX: 0000000000000105 <4>[ 416.208639] RDX: 0000000000000105 RSI: ffffffff82320c5b RDI: 00000000ffffffff <4>[ 416.208641] RBP: ffff8882781bb870 R08: 0000000000000000 R09: 0000000000000001 <4>[ 416.208643] R10: 00000000054d2957 R11: 000000006abbd991 R12: ffff8881951d58c8 <4>[ 416.208646] R13: ffff888286073880 R14: ffff888286073848 R15: ffff8881951d5910 <4>[ 416.208669] FS: 0000000000000000(0000) GS:ffff88829c180000(0000) knlGS:0000000000000000 <4>[ 416.208671] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 <4>[ 416.208673] CR2: 0000556231326c48 CR3: 0000000005610001 CR4: 0000000000760ee0 <4>[ 416.208675] PKRU: 55555554 <4>[ 416.208677] Call Trace: <4>[ 416.208679] <4>[ 416.208751] i915_request_enable_breadcrumb+0x278/0x400 [i915] <4>[ 416.208839] __i915_request_submit+0xca/0x2a0 [i915] <4>[ 416.208892] __execlists_submission_tasklet+0x480/0x1830 [i915] <4>[ 416.208942] execlists_submission_tasklet+0xc4/0x130 [i915] <4>[ 416.208947] tasklet_action_common.isra.17+0x6c/0x1c0 <4>[ 416.208954] __do_softirq+0xdf/0x498 <4>[ 416.208960] ? handle_fasteoi_irq+0x150/0x150 <4>[ 416.208964] asm_call_on_stack+0xf/0x20 <4>[ 416.208966] <4>[ 416.208969] do_softirq_own_stack+0xa1/0xc0 <4>[ 416.208972] irq_exit_rcu+0xb5/0xc0 <4>[ 416.208976] common_interrupt+0xf7/0x260 <4>[ 416.208980] asm_common_interrupt+0x1e/0x40 <4>[ 416.208985] RIP: 0010:cpuidle_enter_state+0xb6/0x410 <4>[ 416.208987] Code: 00 31 ff e8 9c 3e 89 ff 80 7c 24 0b 00 74 12 9c 58 f6 c4 02 0f 85 31 03 00 00 31 ff e8 e3 6c 90 ff e8 fe a4 94 ff fb 45 85 ed <0f> 88 c7 02 00 00 49 63 c5 4c 2b 24 24 48 8d 14 40 48 8d 14 90 48 <4>[ 416.208989] RSP: 0018:ffffc90000143e70 EFLAGS: 00000206 <4>[ 416.208991] RAX: 0000000000000007 RBX: ffffe8ffffda8070 RCX: 0000000000000000 <4>[ 416.208993] RDX: 0000000000000000 RSI: ffffffff8238b4ee RDI: ffffffff8233184f <4>[ 416.208995] RBP: ffffffff826b4e00 R08: 0000000000000000 R09: 0000000000000000 <4>[ 416.208997] R10: 0000000000000001 R11: 0000000000000000 R12: 00000060e7f24a8f <4>[ 416.208998] R13: 0000000000000003 R14: 0000000000000003 R15: 0000000000000003 <4>[ 416.209012] cpuidle_enter+0x24/0x40 <4>[ 416.209016] do_idle+0x22f/0x2d0 <4>[ 416.209022] cpu_startup_entry+0x14/0x20 <4>[ 416.209025] start_secondary+0x158/0x1a0 <4>[ 416.209030] secondary_startup_64+0xa4/0xb0 <4>[ 416.209039] irq event stamp: 10186977 <4>[ 416.209042] hardirqs last enabled at (10186976): [] tasklet_action_common.isra.17+0xe3/0x1c0 <4>[ 416.209044] hardirqs last disabled at (10186977): [] _raw_spin_lock_irqsave+0xd/0x50 <4>[ 416.209047] softirqs last enabled at (10186968): [] irq_enter_rcu+0x6a/0x70 <4>[ 416.209049] softirqs last disabled at (10186969): [] asm_call_on_stack+0xf/0x20 <4>[ 416.209317] list_del corruption, ffff8882781bb870->next is LIST_POISON1 (dead000000000100) <4>[ 416.209317] WARNING: CPU: 7 PID: 46 at lib/list_debug.c:47 __list_del_entry_valid+0x4e/0x90 <4>[ 416.209317] Modules linked in: i915(+) vgem snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic ledtrig_audio mei_hdcp x86_pkg_temp_thermal coretemp ax88179_178a usbnet mii crct10dif_pclmul snd_intel_dspcfg crc32_pclmul snd_hda_codec snd_hwdep ghash_clmulni_intel snd_hda_core e1000e snd_pcm ptp pps_core mei_me mei prime_numbers intel_lpss_pci [last unloaded: i915] <4>[ 416.209317] CPU: 7 PID: 46 Comm: ksoftirqd/7 Tainted: G U W 5.8.0-CI-CI_DRM_8852+ #1 <4>[ 416.209317] Hardware name: Intel Corporation Ice Lake Client Platform/IceLake Y LPDDR4x T4 RVP TLC, BIOS ICLSFWR1.R00.3212.A00.1905212112 05/21/2019 <4>[ 416.209317] RIP: 0010:__list_del_entry_valid+0x4e/0x90 <4>[ 416.209317] Code: 2e 48 8b 32 48 39 fe 75 3a 48 8b 50 08 48 39 f2 75 48 b8 01 00 00 00 c3 48 89 fe 48 89 c2 48 c7 c7 38 19 33 82 e8 62 e0 b6 ff <0f> 0b 31 c0 c3 48 89 fe 48 c7 c7 70 19 33 82 e8 4e e0 b6 ff 0f 0b <4>[ 416.209317] RSP: 0018:ffffc90000280de8 EFLAGS: 00010086 <4>[ 416.209317] RAX: 0000000000000000 RBX: ffff8882781bb848 RCX: 0000000000010104 <4>[ 416.209317] RDX: 0000000000010104 RSI: ffffffff8238b4ee RDI: 00000000ffffffff <4>[ 416.209317] RBP: ffff8882781bb880 R08: 0000000000000000 R09: 0000000000000001 <4>[ 416.209317] R10: 000000009fb6666e R11: 00000000feca9427 R12: ffffc90000280e18 <4>[ 416.209317] R13: ffff8881951d5930 R14: dead0000000000d8 R15: ffff8882781bb880 <4>[ 416.209317] FS: 0000000000000000(0000) GS:ffff88829c180000(0000) knlGS:0000000000000000 <4>[ 416.209317] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 <4>[ 416.209317] CR2: 0000556231326c48 CR3: 0000000005610001 CR4: 0000000000760ee0 <4>[ 416.209317] PKRU: 55555554 <4>[ 416.209317] Call Trace: <4>[ 416.209317] <4>[ 416.209317] remove_signaling_context.isra.13+0xd/0x70 [i915] <4>[ 416.209513] signal_irq_work+0x1f7/0x4b0 [i915] This is caused by virtual engines where although we take the breadcrumb lock on each of the active engines, they may be different engines on different requests, It turns out that the b->irq_lock was not a sufficient proxy for the engine->active.lock in the case of more than one request, so introduce an explicit lock around ce->signals. v2: ce->signal_lock is acquired with only RCU protection and so must be treated carefully and not cleared during reallocation. We also then need to confirm that the ce we lock is the same as we found in the breadcrumb list. Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2276 Fixes: c18636f76344 ("drm/i915: Remove requirement for holding i915_request.lock for breadcrumbs") Fixes: 2854d866327a ("drm/i915/gt: Replace intel_engine_transfer_stale_breadcrumbs") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20201126140407.31952-4-chris@chris-wilson.co.uk (cherry picked from commit c744d50363b714783bbc88d986cc16def13710f7) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 168 ++++++++---------- .../gpu/drm/i915/gt/intel_breadcrumbs_types.h | 6 +- drivers/gpu/drm/i915/gt/intel_context.c | 3 +- drivers/gpu/drm/i915/gt/intel_context_types.h | 12 +- drivers/gpu/drm/i915/i915_request.h | 6 +- 5 files changed, 90 insertions(+), 105 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c index cf6e05ea4d8f..a24cc1ff08a0 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c @@ -101,18 +101,37 @@ static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b) intel_gt_pm_put_async(b->irq_engine->gt); } +static void intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b) +{ + spin_lock(&b->irq_lock); + if (b->irq_armed) + __intel_breadcrumbs_disarm_irq(b); + spin_unlock(&b->irq_lock); +} + static void add_signaling_context(struct intel_breadcrumbs *b, struct intel_context *ce) { - intel_context_get(ce); - list_add_tail(&ce->signal_link, &b->signalers); + lockdep_assert_held(&ce->signal_lock); + + spin_lock(&b->signalers_lock); + list_add_rcu(&ce->signal_link, &b->signalers); + spin_unlock(&b->signalers_lock); } -static void remove_signaling_context(struct intel_breadcrumbs *b, +static bool remove_signaling_context(struct intel_breadcrumbs *b, struct intel_context *ce) { - list_del(&ce->signal_link); - intel_context_put(ce); + lockdep_assert_held(&ce->signal_lock); + + if (!list_empty(&ce->signals)) + return false; + + spin_lock(&b->signalers_lock); + list_del_rcu(&ce->signal_link); + spin_unlock(&b->signalers_lock); + + return true; } static inline bool __request_completed(const struct i915_request *rq) @@ -175,6 +194,8 @@ static void add_retire(struct intel_breadcrumbs *b, struct intel_timeline *tl) static bool __signal_request(struct i915_request *rq) { + GEM_BUG_ON(test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)); + if (!__dma_fence_signal(&rq->fence)) { i915_request_put(rq); return false; @@ -195,15 +216,12 @@ static void signal_irq_work(struct irq_work *work) struct intel_breadcrumbs *b = container_of(work, typeof(*b), irq_work); const ktime_t timestamp = ktime_get(); struct llist_node *signal, *sn; - struct intel_context *ce, *cn; - struct list_head *pos, *next; + struct intel_context *ce; signal = NULL; if (unlikely(!llist_empty(&b->signaled_requests))) signal = llist_del_all(&b->signaled_requests); - spin_lock(&b->irq_lock); - /* * Keep the irq armed until the interrupt after all listeners are gone. * @@ -229,47 +247,44 @@ static void signal_irq_work(struct irq_work *work) * interrupt draw less ire from other users of the system and tools * like powertop. */ - if (!signal && b->irq_armed && list_empty(&b->signalers)) - __intel_breadcrumbs_disarm_irq(b); + if (!signal && READ_ONCE(b->irq_armed) && list_empty(&b->signalers)) + intel_breadcrumbs_disarm_irq(b); - list_for_each_entry_safe(ce, cn, &b->signalers, signal_link) { - GEM_BUG_ON(list_empty(&ce->signals)); + rcu_read_lock(); + list_for_each_entry_rcu(ce, &b->signalers, signal_link) { + struct i915_request *rq; - list_for_each_safe(pos, next, &ce->signals) { - struct i915_request *rq = - list_entry(pos, typeof(*rq), signal_link); + list_for_each_entry_rcu(rq, &ce->signals, signal_link) { + bool release; - GEM_BUG_ON(!check_signal_order(ce, rq)); if (!__request_completed(rq)) break; + if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL, + &rq->fence.flags)) + break; + /* * Queue for execution after dropping the signaling * spinlock as the callback chain may end up adding * more signalers to the same context or engine. */ - clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags); + spin_lock(&ce->signal_lock); + list_del_rcu(&rq->signal_link); + release = remove_signaling_context(b, ce); + spin_unlock(&ce->signal_lock); + if (__signal_request(rq)) /* We own signal_node now, xfer to local list */ signal = slist_add(&rq->signal_node, signal); - } - /* - * We process the list deletion in bulk, only using a list_add - * (not list_move) above but keeping the status of - * rq->signal_link known with the I915_FENCE_FLAG_SIGNAL bit. - */ - if (!list_is_first(pos, &ce->signals)) { - /* Advance the list to the first incomplete request */ - __list_del_many(&ce->signals, pos); - if (&ce->signals == pos) { /* now empty */ + if (release) { add_retire(b, ce->timeline); - remove_signaling_context(b, ce); + intel_context_put(ce); } } } - - spin_unlock(&b->irq_lock); + rcu_read_unlock(); llist_for_each_safe(signal, sn, signal) { struct i915_request *rq = @@ -298,14 +313,15 @@ intel_breadcrumbs_create(struct intel_engine_cs *irq_engine) if (!b) return NULL; - spin_lock_init(&b->irq_lock); + b->irq_engine = irq_engine; + + spin_lock_init(&b->signalers_lock); INIT_LIST_HEAD(&b->signalers); init_llist_head(&b->signaled_requests); + spin_lock_init(&b->irq_lock); init_irq_work(&b->irq_work, signal_irq_work); - b->irq_engine = irq_engine; - return b; } @@ -347,9 +363,9 @@ void intel_breadcrumbs_free(struct intel_breadcrumbs *b) kfree(b); } -static void insert_breadcrumb(struct i915_request *rq, - struct intel_breadcrumbs *b) +static void insert_breadcrumb(struct i915_request *rq) { + struct intel_breadcrumbs *b = READ_ONCE(rq->engine)->breadcrumbs; struct intel_context *ce = rq->context; struct list_head *pos; @@ -371,6 +387,7 @@ static void insert_breadcrumb(struct i915_request *rq, } if (list_empty(&ce->signals)) { + intel_context_get(ce); add_signaling_context(b, ce); pos = &ce->signals; } else { @@ -396,8 +413,9 @@ static void insert_breadcrumb(struct i915_request *rq, break; } } - list_add(&rq->signal_link, pos); + list_add_rcu(&rq->signal_link, pos); GEM_BUG_ON(!check_signal_order(ce, rq)); + GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags)); set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags); /* @@ -410,7 +428,7 @@ static void insert_breadcrumb(struct i915_request *rq, bool i915_request_enable_breadcrumb(struct i915_request *rq) { - struct intel_breadcrumbs *b; + struct intel_context *ce = rq->context; /* Serialises with i915_request_retire() using rq->lock */ if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags)) @@ -425,67 +443,30 @@ bool i915_request_enable_breadcrumb(struct i915_request *rq) if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) return true; - /* - * rq->engine is locked by rq->engine->active.lock. That however - * is not known until after rq->engine has been dereferenced and - * the lock acquired. Hence we acquire the lock and then validate - * that rq->engine still matches the lock we hold for it. - * - * Here, we are using the breadcrumb lock as a proxy for the - * rq->engine->active.lock, and we know that since the breadcrumb - * will be serialised within i915_request_submit/i915_request_unsubmit, - * the engine cannot change while active as long as we hold the - * breadcrumb lock on that engine. - * - * From the dma_fence_enable_signaling() path, we are outside of the - * request submit/unsubmit path, and so we must be more careful to - * acquire the right lock. - */ - b = READ_ONCE(rq->engine)->breadcrumbs; - spin_lock(&b->irq_lock); - while (unlikely(b != READ_ONCE(rq->engine)->breadcrumbs)) { - spin_unlock(&b->irq_lock); - b = READ_ONCE(rq->engine)->breadcrumbs; - spin_lock(&b->irq_lock); - } - - /* - * Now that we are finally serialised with request submit/unsubmit, - * [with b->irq_lock] and with i915_request_retire() [via checking - * SIGNALED with rq->lock] confirm the request is indeed active. If - * it is no longer active, the breadcrumb will be attached upon - * i915_request_submit(). - */ + spin_lock(&ce->signal_lock); if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) - insert_breadcrumb(rq, b); - - spin_unlock(&b->irq_lock); + insert_breadcrumb(rq); + spin_unlock(&ce->signal_lock); return true; } void i915_request_cancel_breadcrumb(struct i915_request *rq) { - struct intel_breadcrumbs *b = rq->engine->breadcrumbs; + struct intel_context *ce = rq->context; + bool release; - /* - * We must wait for b->irq_lock so that we know the interrupt handler - * has released its reference to the intel_context and has completed - * the DMA_FENCE_FLAG_SIGNALED_BIT/I915_FENCE_FLAG_SIGNAL dance (if - * required). - */ - spin_lock(&b->irq_lock); - if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) { - struct intel_context *ce = rq->context; + if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) + return; - list_del(&rq->signal_link); - if (list_empty(&ce->signals)) - remove_signaling_context(b, ce); + spin_lock(&ce->signal_lock); + list_del_rcu(&rq->signal_link); + release = remove_signaling_context(rq->engine->breadcrumbs, ce); + spin_unlock(&ce->signal_lock); + if (release) + intel_context_put(ce); - clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags); - i915_request_put(rq); - } - spin_unlock(&b->irq_lock); + i915_request_put(rq); } static void print_signals(struct intel_breadcrumbs *b, struct drm_printer *p) @@ -495,18 +476,17 @@ static void print_signals(struct intel_breadcrumbs *b, struct drm_printer *p) drm_printf(p, "Signals:\n"); - spin_lock_irq(&b->irq_lock); - list_for_each_entry(ce, &b->signalers, signal_link) { - list_for_each_entry(rq, &ce->signals, signal_link) { + rcu_read_lock(); + list_for_each_entry_rcu(ce, &b->signalers, signal_link) { + list_for_each_entry_rcu(rq, &ce->signals, signal_link) drm_printf(p, "\t[%llx:%llx%s] @ %dms\n", rq->fence.context, rq->fence.seqno, i915_request_completed(rq) ? "!" : i915_request_started(rq) ? "*" : "", jiffies_to_msecs(jiffies - rq->emitted_jiffies)); - } } - spin_unlock_irq(&b->irq_lock); + rcu_read_unlock(); } void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine, diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h b/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h index 3fa19820b37a..a74bb3062bd8 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h @@ -29,18 +29,16 @@ * the overhead of waking that client is much preferred. */ struct intel_breadcrumbs { - spinlock_t irq_lock; /* protects the lists used in hardirq context */ - /* Not all breadcrumbs are attached to physical HW */ struct intel_engine_cs *irq_engine; + spinlock_t signalers_lock; /* protects the list of signalers */ struct list_head signalers; struct llist_head signaled_requests; + spinlock_t irq_lock; /* protects the interrupt from hardirq context */ struct irq_work irq_work; /* for use from inside irq_lock */ - unsigned int irq_enabled; - bool irq_armed; }; diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index d3a835212167..349e7fa1488d 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -379,7 +379,8 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine) ce->vm = i915_vm_get(engine->gt->vm); - INIT_LIST_HEAD(&ce->signal_link); + /* NB ce->signal_link/lock is used under RCU */ + spin_lock_init(&ce->signal_lock); INIT_LIST_HEAD(&ce->signals); mutex_init(&ce->pin_mutex); diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 20cb5835d1c3..52fa9c132746 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -25,6 +25,7 @@ DECLARE_EWMA(runtime, 3, 8); struct i915_gem_context; struct i915_gem_ww_ctx; struct i915_vma; +struct intel_breadcrumbs; struct intel_context; struct intel_ring; @@ -63,8 +64,15 @@ struct intel_context { struct i915_address_space *vm; struct i915_gem_context __rcu *gem_context; - struct list_head signal_link; - struct list_head signals; + /* + * @signal_lock protects the list of requests that need signaling, + * @signals. While there are any requests that need signaling, + * we add the context to the breadcrumbs worker, and remove it + * upon completion/cancellation of the last request. + */ + struct list_head signal_link; /* Accessed under RCU */ + struct list_head signals; /* Guarded by signal_lock */ + spinlock_t signal_lock; /* protects signals, the list of requests */ struct i915_vma *state; struct intel_ring *ring; diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 874af6db6103..620b6fab2c5c 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -177,10 +177,8 @@ struct i915_request { struct intel_ring *ring; struct intel_timeline __rcu *timeline; - union { - struct list_head signal_link; - struct llist_node signal_node; - }; + struct list_head signal_link; + struct llist_node signal_node; /* * The rcu epoch of when this request was allocated. Used to judiciously From 78b2eb8a1f10f366681acad8d21c974c1f66791a Mon Sep 17 00:00:00 2001 From: Venkata Ramana Nayana Date: Fri, 27 Nov 2020 12:07:16 +0000 Subject: [PATCH 110/296] drm/i915/gt: Retain default context state across shrinking As we use a shmemfs file to hold the context state, when not in use it may be swapped out, such as across suspend. Since we wrote into the shmemfs without marking the pages as dirty, the contents may be dropped instead of being written back to swap. On re-using the shmemfs file, such as creating a new context after resume, the contents of that file were likely garbage and so the new context could then hang the GPU. Simply mark the page as being written when copying into the shmemfs file, and it the new contents will be retained across swapout. Fixes: be1cb55a07bf ("drm/i915/gt: Keep a no-frills swappable copy of the default context state") Cc: Sudeep Dutt Cc: Matthew Auld Cc: Tvrtko Ursulin Cc: Ramalingam C Signed-off-by: CQ Tang Signed-off-by: Venkata Ramana Nayana Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Cc: # v5.8+ Link: https://patchwork.freedesktop.org/patch/msgid/20201127120718.454037-161-matthew.auld@intel.com (cherry picked from commit a9d71f76ccfd309f3bd5f7c9b60e91a4decae792) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/shmem_utils.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/shmem_utils.c b/drivers/gpu/drm/i915/gt/shmem_utils.c index f011ea42487e..463af675fadd 100644 --- a/drivers/gpu/drm/i915/gt/shmem_utils.c +++ b/drivers/gpu/drm/i915/gt/shmem_utils.c @@ -103,10 +103,13 @@ static int __shmem_rw(struct file *file, loff_t off, return PTR_ERR(page); vaddr = kmap(page); - if (write) + if (write) { memcpy(vaddr + offset_in_page(off), ptr, this); - else + set_page_dirty(page); + } else { memcpy(ptr, vaddr + offset_in_page(off), this); + } + mark_page_accessed(page); kunmap(page); put_page(page); From aff76ab795364569b1cac58c1d0bc7df956e3899 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 24 Nov 2020 18:35:21 +0000 Subject: [PATCH 111/296] drm/i915/gt: Limit frequency drop to RPe on parking We treat idling the GT (intel_rps_park) as a downclock event, and reduce the frequency we intend to restart the GT with. Since the two workloads are likely related (e.g. a compositor rendering every 16ms), we want to carry the frequency and load information from across the idling. However, we do also need to update the frequencies so that workloads that run for less than 1ms are autotuned by RPS (otherwise we leave compositors running at max clocks, draining excess power). Conversely, if we try to run too slowly, the next workload has to run longer. Since there is a hysteresis in the power graph, below a certain frequency running a short workload for longer consumes more energy than running it slightly higher for less time. The exact balance point is unknown beforehand, but measurements with 30fps media playback indicate that RPe is a better choice. Reported-by: Edward Baker Tested-by: Edward Baker Fixes: 043cd2d14ede ("drm/i915/gt: Leave rps->cur_freq on unpark") Signed-off-by: Chris Wilson Cc: Edward Baker Cc: Andi Shyti Cc: Lyude Paul Cc: # v5.8+ Reviewed-by: Rodrigo Vivi Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20201124183521.28623-1-chris@chris-wilson.co.uk (cherry picked from commit f7ed83cc1925f0b8ce2515044d674354035c3af9) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_rps.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index e6a00eea0631..c1c9cc0ad3b9 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -883,6 +883,10 @@ void intel_rps_park(struct intel_rps *rps) adj = -2; rps->last_adj = adj; rps->cur_freq = max_t(int, rps->cur_freq + adj, rps->min_freq); + if (rps->cur_freq < rps->efficient_freq) { + rps->cur_freq = rps->efficient_freq; + rps->last_adj = 0; + } GT_TRACE(rps_to_gt(rps), "park:%x\n", rps->cur_freq); } From 37eade64eb11c6d548c9a7030ccc655decfb8fa0 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 6 Nov 2020 14:55:27 -0800 Subject: [PATCH 112/296] drm/i915/display: return earlier from intel_modeset_init() without display MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit !HAS_DISPLAY() implies !HAS_OVERLAY(), skipping overlay setup anyway, so return earlier from intel_modeset_init() for clarity. Cc: Ville Syrjälä Signed-off-by: Jani Nikula Signed-off-by: Lucas De Marchi Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20201106225531.920641-4-lucas.demarchi@intel.com (cherry picked from commit 71c8415d0daa78ef1295743d0e11ba0214d0a9b9) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_display.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 99e682563d47..c0d920f596f4 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -18039,11 +18039,11 @@ int intel_modeset_init(struct drm_i915_private *i915) { int ret; - intel_overlay_setup(i915); - if (!HAS_DISPLAY(i915)) return 0; + intel_overlay_setup(i915); + ret = intel_fbdev_init(&i915->drm); if (ret) return ret; From ccc9e67ab26feda7e62749bb54c05d7abe07dca9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 25 Nov 2020 19:30:32 +0000 Subject: [PATCH 113/296] drm/i915/display: Defer initial modeset until after GGTT is initialised MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prior to sanitizing the GGTT, the only operations allowed in intel_display_init_nogem() are those to reserve the preallocated (and active) regions in the GGTT leftover from the BIOS. Trying to allocate a GGTT vma (such as intel_pin_and_fence_fb_obj during the initial modeset) may then conflict with other preallocated regions that have not yet been protected. Move the initial modesetting from the end of init_nogem to the beginning of init so that any vma pinning (either framebuffers or DSB, for example), is after the GGTT is ready to handle it. This will prevent the DSB object from being destroyed too early: [ 53.449241] BUG: KASAN: use-after-free in i915_init_ggtt+0x324/0x9e0 [i915] [ 53.449309] Read of size 8 at addr ffff88811b1e8070 by task systemd-udevd/345 [ 53.449399] CPU: 1 PID: 345 Comm: systemd-udevd Tainted: G W 5.10.0-rc5+ #12 [ 53.449409] Call Trace: [ 53.449418] dump_stack+0x9a/0xcc [ 53.449558] ? i915_init_ggtt+0x324/0x9e0 [i915] [ 53.449565] print_address_description.constprop.0+0x3e/0x60 [ 53.449577] ? _raw_spin_lock_irqsave+0x4e/0x50 [ 53.449718] ? i915_init_ggtt+0x324/0x9e0 [i915] [ 53.449849] ? i915_init_ggtt+0x324/0x9e0 [i915] [ 53.449857] kasan_report.cold+0x1f/0x37 [ 53.449993] ? i915_init_ggtt+0x324/0x9e0 [i915] [ 53.450130] i915_init_ggtt+0x324/0x9e0 [i915] [ 53.450273] ? i915_ggtt_suspend+0x1f0/0x1f0 [i915] [ 53.450281] ? static_obj+0x69/0x80 [ 53.450289] ? lockdep_init_map_waits+0xa9/0x310 [ 53.450431] ? intel_wopcm_init+0x96/0x3d0 [i915] [ 53.450581] ? i915_gem_init+0x75/0x2d0 [i915] [ 53.450720] i915_gem_init+0x75/0x2d0 [i915] [ 53.450852] i915_driver_probe+0x8c2/0x1210 [i915] [ 53.450993] ? i915_pm_prepare+0x630/0x630 [i915] [ 53.451006] ? check_chain_key+0x1e7/0x2e0 [ 53.451025] ? __pm_runtime_resume+0x58/0xb0 [ 53.451157] i915_pci_probe+0xa6/0x2b0 [i915] [ 53.451285] ? i915_pci_remove+0x40/0x40 [i915] [ 53.451295] ? lockdep_hardirqs_on_prepare+0x124/0x230 [ 53.451302] ? _raw_spin_unlock_irqrestore+0x42/0x50 [ 53.451309] ? lockdep_hardirqs_on+0xbf/0x130 [ 53.451315] ? preempt_count_sub+0xf/0xb0 [ 53.451321] ? _raw_spin_unlock_irqrestore+0x2f/0x50 [ 53.451335] pci_device_probe+0xf9/0x190 [ 53.451350] really_probe+0x17f/0x5b0 [ 53.451365] driver_probe_device+0x13a/0x1c0 [ 53.451376] device_driver_attach+0x82/0x90 [ 53.451386] ? device_driver_attach+0x90/0x90 [ 53.451391] __driver_attach+0xab/0x190 [ 53.451401] ? device_driver_attach+0x90/0x90 [ 53.451407] bus_for_each_dev+0xe4/0x140 [ 53.451414] ? subsys_dev_iter_exit+0x10/0x10 [ 53.451423] ? __list_add_valid+0x2b/0xa0 [ 53.451440] bus_add_driver+0x227/0x2e0 [ 53.451454] driver_register+0xd3/0x150 [ 53.451585] i915_init+0x92/0xac [i915] [ 53.451592] ? 0xffffffffa0a20000 [ 53.451598] do_one_initcall+0xb6/0x3b0 [ 53.451606] ? trace_event_raw_event_initcall_finish+0x150/0x150 [ 53.451614] ? __kasan_kmalloc.constprop.0+0xc2/0xd0 [ 53.451627] ? kmem_cache_alloc_trace+0x4a4/0x8e0 [ 53.451634] ? kasan_unpoison_shadow+0x33/0x40 [ 53.451649] do_init_module+0xf8/0x350 [ 53.451662] load_module+0x43de/0x47f0 [ 53.451716] ? module_frob_arch_sections+0x20/0x20 [ 53.451731] ? rw_verify_area+0x5f/0x130 [ 53.451780] ? __do_sys_finit_module+0x10d/0x1a0 [ 53.451785] __do_sys_finit_module+0x10d/0x1a0 [ 53.451792] ? __ia32_sys_init_module+0x40/0x40 [ 53.451800] ? seccomp_do_user_notification.isra.0+0x5c0/0x5c0 [ 53.451829] ? rcu_read_lock_bh_held+0xb0/0xb0 [ 53.451835] ? mark_held_locks+0x24/0x90 [ 53.451856] do_syscall_64+0x33/0x80 [ 53.451863] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 53.451868] RIP: 0033:0x7fde09b4470d [ 53.451875] Code: 00 c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 53 f7 0c 00 f7 d8 64 89 01 48 [ 53.451880] RSP: 002b:00007ffd6abc1718 EFLAGS: 00000246 ORIG_RAX: 0000000000000139 [ 53.451890] RAX: ffffffffffffffda RBX: 000056444e528150 RCX: 00007fde09b4470d [ 53.451895] RDX: 0000000000000000 RSI: 00007fde09a21ded RDI: 000000000000000f [ 53.451899] RBP: 0000000000020000 R08: 0000000000000000 R09: 0000000000000000 [ 53.451904] R10: 000000000000000f R11: 0000000000000246 R12: 00007fde09a21ded [ 53.451909] R13: 0000000000000000 R14: 000056444e329200 R15: 000056444e528150 [ 53.451957] Allocated by task 345: [ 53.451995] kasan_save_stack+0x1b/0x40 [ 53.452001] __kasan_kmalloc.constprop.0+0xc2/0xd0 [ 53.452006] kmem_cache_alloc+0x1cd/0x8d0 [ 53.452146] i915_vma_instance+0x126/0xb70 [i915] [ 53.452304] i915_gem_object_ggtt_pin_ww+0x222/0x3f0 [i915] [ 53.452446] intel_dsb_prepare+0x14f/0x230 [i915] [ 53.452588] intel_atomic_commit+0x183/0x690 [i915] [ 53.452730] intel_initial_commit+0x2bc/0x2f0 [i915] [ 53.452871] intel_modeset_init_nogem+0xa02/0x2af0 [i915] [ 53.452995] i915_driver_probe+0x8af/0x1210 [i915] [ 53.453120] i915_pci_probe+0xa6/0x2b0 [i915] [ 53.453125] pci_device_probe+0xf9/0x190 [ 53.453131] really_probe+0x17f/0x5b0 [ 53.453136] driver_probe_device+0x13a/0x1c0 [ 53.453142] device_driver_attach+0x82/0x90 [ 53.453148] __driver_attach+0xab/0x190 [ 53.453153] bus_for_each_dev+0xe4/0x140 [ 53.453158] bus_add_driver+0x227/0x2e0 [ 53.453164] driver_register+0xd3/0x150 [ 53.453286] i915_init+0x92/0xac [i915] [ 53.453292] do_one_initcall+0xb6/0x3b0 [ 53.453297] do_init_module+0xf8/0x350 [ 53.453302] load_module+0x43de/0x47f0 [ 53.453307] __do_sys_finit_module+0x10d/0x1a0 [ 53.453312] do_syscall_64+0x33/0x80 [ 53.453318] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 53.453345] Freed by task 82: [ 53.453379] kasan_save_stack+0x1b/0x40 [ 53.453384] kasan_set_track+0x1c/0x30 [ 53.453389] kasan_set_free_info+0x1b/0x30 [ 53.453394] __kasan_slab_free+0x112/0x160 [ 53.453399] kmem_cache_free+0xb2/0x3f0 [ 53.453536] i915_gem_flush_free_objects+0x31a/0x3b0 [i915] [ 53.453542] process_one_work+0x519/0x9f0 [ 53.453547] worker_thread+0x75/0x5c0 [ 53.453552] kthread+0x1da/0x230 [ 53.453557] ret_from_fork+0x22/0x30 [ 53.453584] The buggy address belongs to the object at ffff88811b1e8040 which belongs to the cache i915_vma of size 968 [ 53.453692] The buggy address is located 48 bytes inside of 968-byte region [ffff88811b1e8040, ffff88811b1e8408) [ 53.453792] The buggy address belongs to the page: [ 53.453842] page:00000000b35f7048 refcount:1 mapcount:0 mapping:0000000000000000 index:0xffff88811b1ef940 pfn:0x11b1e8 [ 53.453847] head:00000000b35f7048 order:3 compound_mapcount:0 compound_pincount:0 [ 53.453853] flags: 0x8000000000010200(slab|head) [ 53.453860] raw: 8000000000010200 ffff888115596248 ffff888115596248 ffff8881155b6340 [ 53.453866] raw: ffff88811b1ef940 0000000000170001 00000001ffffffff 0000000000000000 [ 53.453870] page dumped because: kasan: bad access detected [ 53.453895] Memory state around the buggy address: [ 53.453944] ffff88811b1e7f00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 53.454011] ffff88811b1e7f80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 53.454079] >ffff88811b1e8000: fc fc fc fc fc fc fc fc fa fb fb fb fb fb fb fb [ 53.454146] ^ [ 53.454211] ffff88811b1e8080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 53.454279] ffff88811b1e8100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 53.454347] ================================================================== [ 53.454414] Disabling lock debugging due to kernel taint [ 53.454434] general protection fault, probably for non-canonical address 0xdead0000000000d0: 0000 [#1] PREEMPT SMP KASAN PTI [ 53.454446] CPU: 1 PID: 345 Comm: systemd-udevd Tainted: G B W 5.10.0-rc5+ #12 [ 53.454592] RIP: 0010:i915_init_ggtt+0x26f/0x9e0 [i915] [ 53.454602] Code: 89 8d 48 ff ff ff 4c 8d 60 d0 49 39 c7 0f 84 37 02 00 00 4c 89 b5 40 ff ff ff 4d 8d bc 24 90 00 00 00 4c 89 ff e8 c1 97 f8 e0 <49> 83 bc 24 90 00 00 00 00 0f 84 0f 02 00 00 49 8d 7c 24 08 e8 a8 [ 53.454618] RSP: 0018:ffff88812247f430 EFLAGS: 00010286 [ 53.454625] RAX: 0000000000000000 RBX: ffff888136440000 RCX: ffffffffa03fb78f [ 53.454633] RDX: 0000000000000000 RSI: 0000000000000008 RDI: dead000000000160 [ 53.454641] RBP: ffff88812247f500 R08: ffffffff8113589f R09: 0000000000000000 [ 53.454648] R10: ffffffff83063843 R11: fffffbfff060c708 R12: dead0000000000d0 [ 53.454656] R13: ffff888136449ba0 R14: 0000000000002000 R15: dead000000000160 [ 53.454664] FS: 00007fde095c4880(0000) GS:ffff88840c880000(0000) knlGS:0000000000000000 [ 53.454672] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 53.454679] CR2: 00007fef132b4f28 CR3: 000000012245c002 CR4: 00000000003706e0 [ 53.454686] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 53.454693] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 53.454700] Call Trace: [ 53.454833] ? i915_ggtt_suspend+0x1f0/0x1f0 [i915] Reported-by: Matthew Auld Fixes: afeda4f3b1c8 ("drm/i915/dsb: Pre allocate and late cleanup of cmd buffer") Signed-off-by: Chris Wilson Cc: Ville Syrjälä Cc: Matthew Auld Cc: Lucas De Marchi Tested-by: Matthew Auld Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20201125193032.29282-1-chris@chris-wilson.co.uk (cherry picked from commit b3bf99daaee96a141536ce5c60a0d6dba6ec1d23) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_display.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index c0d920f596f4..3bfe6ed67da1 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -18021,16 +18021,6 @@ int intel_modeset_init_nogem(struct drm_i915_private *i915) if (!HAS_GMCH(i915)) sanitize_watermarks(i915); - /* - * Force all active planes to recompute their states. So that on - * mode_setcrtc after probe, all the intel_plane_state variables - * are already calculated and there is no assert_plane warnings - * during bootup. - */ - ret = intel_initial_commit(dev); - if (ret) - drm_dbg_kms(&i915->drm, "Initial commit in probe failed.\n"); - return 0; } @@ -18042,6 +18032,16 @@ int intel_modeset_init(struct drm_i915_private *i915) if (!HAS_DISPLAY(i915)) return 0; + /* + * Force all active planes to recompute their states. So that on + * mode_setcrtc after probe, all the intel_plane_state variables + * are already calculated and there is no assert_plane warnings + * during bootup. + */ + ret = intel_initial_commit(&i915->drm); + if (ret) + return ret; + intel_overlay_setup(i915); ret = intel_fbdev_init(&i915->drm); From acab02c1af43d3a9051524579b1c3dcfbfa5479d Mon Sep 17 00:00:00 2001 From: Arunpravin Date: Fri, 27 Nov 2020 21:40:24 +0530 Subject: [PATCH 114/296] drm/amdgpu/pm/smu11: Fix fan set speed bug Fix fan set speed calculation. Suggested-by: Kenneth Feng Signed-off-by: Arunpravin Acked-by: Alex Deucher Reviewed-by: Kenneth Feng Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c index 2380759ddf48..6db96fa1df09 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c @@ -1164,7 +1164,12 @@ int smu_v11_0_set_fan_speed_rpm(struct smu_context *smu, if (ret) return ret; - crystal_clock_freq = amdgpu_asic_get_xclk(adev); + /* + * crystal_clock_freq div by 4 is required since the fan control + * module refers to 25MHz + */ + + crystal_clock_freq = amdgpu_asic_get_xclk(adev) / 4; tach_period = 60 * crystal_clock_freq * 10000 / (8 * speed); WREG32_SOC15(THM, 0, mmCG_TACH_CTRL, REG_SET_FIELD(RREG32_SOC15(THM, 0, mmCG_TACH_CTRL), From 7e0b367db85ef7b91399006253759a024eab7653 Mon Sep 17 00:00:00 2001 From: Brandon Syu Date: Thu, 12 Nov 2020 15:35:52 +0800 Subject: [PATCH 115/296] drm/amd/display: Init clock value by current vbios CLKs [Why] While booting into OS, driver updates DPP/DISP CLKs. But init clock value is zero which is invalid. [How] Get current clocks value to update init clocks. To avoid underflow. Signed-off-by: Brandon Syu Reviewed-by: Tony Cheng Acked-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c index 2f8fee05547a..c001307b0a59 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c @@ -163,8 +163,17 @@ void rn_update_clocks(struct clk_mgr *clk_mgr_base, new_clocks->dppclk_khz = 100000; } - if (should_set_clock(safe_to_lower, new_clocks->dppclk_khz, clk_mgr->base.clks.dppclk_khz)) { - if (clk_mgr->base.clks.dppclk_khz > new_clocks->dppclk_khz) + /* + * Temporally ignore thew 0 cases for disp and dpp clks. + * We may have a new feature that requires 0 clks in the future. + */ + if (new_clocks->dppclk_khz == 0 || new_clocks->dispclk_khz == 0) { + new_clocks->dppclk_khz = clk_mgr_base->clks.dppclk_khz; + new_clocks->dispclk_khz = clk_mgr_base->clks.dispclk_khz; + } + + if (should_set_clock(safe_to_lower, new_clocks->dppclk_khz, clk_mgr_base->clks.dppclk_khz)) { + if (clk_mgr_base->clks.dppclk_khz > new_clocks->dppclk_khz) dpp_clock_lowered = true; clk_mgr_base->clks.dppclk_khz = new_clocks->dppclk_khz; update_dppclk = true; From ac2db9488cf21de0be7899c1e5963e5ac0ff351f Mon Sep 17 00:00:00 2001 From: Boyuan Zhang Date: Sun, 10 May 2020 15:47:03 -0400 Subject: [PATCH 116/296] drm/amdgpu/vcn3.0: stall DPG when WPTR/RPTR reset Port from VCN2.5 Add vcn dpg harware synchronization to fix race condition issue between vcn driver and hardware. Signed-off-by: Boyuan Zhang Reviewed-by: James Zhu Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 5.9.x --- drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index e074f7ed388c..2aa84993f42d 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -1011,6 +1011,11 @@ static int vcn_v3_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_CNTL, tmp); + /* Stall DPG before WPTR/RPTR reset */ + WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), + UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK, + ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK); + /* set the write pointer delay */ WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR_CNTL, 0); @@ -1033,6 +1038,10 @@ static int vcn_v3_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); + /* Unstall DPG */ + WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), + 0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK); + return 0; } @@ -1556,8 +1565,14 @@ static int vcn_v3_0_pause_dpg_mode(struct amdgpu_device *adev, UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); + /* Stall DPG before WPTR/RPTR reset */ + WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), + UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK, + ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK); + /* Restore */ ring = &adev->vcn.inst[inst_idx].ring_enc[0]; + ring->wptr = 0; WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_LO, ring->gpu_addr); WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(VCN, inst_idx, mmUVD_RB_SIZE, ring->ring_size / 4); @@ -1565,6 +1580,7 @@ static int vcn_v3_0_pause_dpg_mode(struct amdgpu_device *adev, WREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); ring = &adev->vcn.inst[inst_idx].ring_enc[1]; + ring->wptr = 0; WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_LO2, ring->gpu_addr); WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(VCN, inst_idx, mmUVD_RB_SIZE2, ring->ring_size / 4); @@ -1574,6 +1590,10 @@ static int vcn_v3_0_pause_dpg_mode(struct amdgpu_device *adev, WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR, RREG32_SOC15(VCN, inst_idx, mmUVD_SCRATCH2) & 0x7FFFFFFF); + /* Unstall DPG */ + WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), + 0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK); + SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); } From efd6d85a18102241538dd1cc257948a0dbe6fae6 Mon Sep 17 00:00:00 2001 From: Boyuan Zhang Date: Tue, 19 May 2020 11:38:44 -0400 Subject: [PATCH 117/296] drm/amdgpu/vcn3.0: remove old DPG workaround Port from VCN2.5 SCRATCH2 is used to keep decode wptr as a workaround which fix a hardware DPG decode wptr update bug for vcn2.5 beforehand. Signed-off-by: Boyuan Zhang Reviewed-by: James Zhu Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 5.9.x --- drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index 2aa84993f42d..b5f8f3d731cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -1587,9 +1587,6 @@ static int vcn_v3_0_pause_dpg_mode(struct amdgpu_device *adev, WREG32_SOC15(VCN, inst_idx, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); WREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); - WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR, - RREG32_SOC15(VCN, inst_idx, mmUVD_SCRATCH2) & 0x7FFFFFFF); - /* Unstall DPG */ WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), 0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK); @@ -1650,10 +1647,6 @@ static void vcn_v3_0_dec_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) - WREG32_SOC15(VCN, ring->me, mmUVD_SCRATCH2, - lower_32_bits(ring->wptr) | 0x80000000); - if (ring->use_doorbell) { adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); From 4d916140bf28ff027997144ea1bb4299e1536f87 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 30 Nov 2020 12:54:34 +0100 Subject: [PATCH 118/296] intel_idle: Build fix Because CONFIG_ soup. Fixes: 6e1d2bc675bd ("intel_idle: Fix intel_idle() vs tracing") Reported-by: Randy Dunlap Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20201130115402.GO3040@hirez.programming.kicks-ass.net --- drivers/idle/intel_idle.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 7ee7ffe22ae3..d79335506ecd 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -1140,6 +1140,20 @@ static bool __init intel_idle_max_cstate_reached(int cstate) return false; } +static bool __init intel_idle_state_needs_timer_stop(struct cpuidle_state *state) +{ + unsigned long eax = flg2MWAIT(state->flags); + + if (boot_cpu_has(X86_FEATURE_ARAT)) + return false; + + /* + * Switch over to one-shot tick broadcast if the target C-state + * is deeper than C1. + */ + return !!((eax >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK); +} + #ifdef CONFIG_ACPI_PROCESSOR_CSTATE #include @@ -1210,20 +1224,6 @@ static bool __init intel_idle_acpi_cst_extract(void) return false; } -static bool __init intel_idle_state_needs_timer_stop(struct cpuidle_state *state) -{ - unsigned long eax = flg2MWAIT(state->flags); - - if (boot_cpu_has(X86_FEATURE_ARAT)) - return false; - - /* - * Switch over to one-shot tick broadcast if the target C-state - * is deeper than C1. - */ - return !!((eax >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK); -} - static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { int cstate, limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count); From 5debf02131227d39988e44adf5090fb796fa8466 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 26 Nov 2020 20:09:21 +0900 Subject: [PATCH 119/296] perf/x86/intel: Fix a warning on x86_pmu_stop() with large PEBS The commit 3966c3feca3f ("x86/perf/amd: Remove need to check "running" bit in NMI handler") introduced this. It seems x86_pmu_stop can be called recursively (like when it losts some samples) like below: x86_pmu_stop intel_pmu_disable_event (x86_pmu_disable) intel_pmu_pebs_disable intel_pmu_drain_pebs_nhm (x86_pmu_drain_pebs_buffer) x86_pmu_stop While commit 35d1ce6bec13 ("perf/x86/intel/ds: Fix x86_pmu_stop warning for large PEBS") fixed it for the normal cases, there's another path to call x86_pmu_stop() recursively when a PEBS error was detected (like two or more counters overflowed at the same time). Like in the Kan's previous fix, we can skip the interrupt accounting for large PEBS, so check the iregs which is set for PMI only. Fixes: 3966c3feca3f ("x86/perf/amd: Remove need to check "running" bit in NMI handler") Reported-by: John Sperbeck Suggested-by: Peter Zijlstra Signed-off-by: Namhyung Kim Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20201126110922.317681-1-namhyung@kernel.org --- arch/x86/events/intel/ds.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index b47cc4226934..89dba588636e 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1940,7 +1940,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d if (error[bit]) { perf_log_lost_samples(event, error[bit]); - if (perf_event_account_interrupt(event)) + if (iregs && perf_event_account_interrupt(event)) x86_pmu_stop(event, 0); } From fc17db8aa4c53cbd2d5469bb0521ea0f0a6dbb27 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 26 Nov 2020 20:09:22 +0900 Subject: [PATCH 120/296] perf/x86/intel: Check PEBS status correctly The kernel cannot disambiguate when 2+ PEBS counters overflow at the same time. This is what the comment for this code suggests. However, I see the comparison is done with the unfiltered p->status which is a copy of IA32_PERF_GLOBAL_STATUS at the time of the sample. This register contains more than the PEBS counter overflow bits. It also includes many other bits which could also be set. Signed-off-by: Namhyung Kim Signed-off-by: Stephane Eranian Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20201126110922.317681-2-namhyung@kernel.org --- arch/x86/events/intel/ds.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 89dba588636e..485c5066f8b8 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1916,7 +1916,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d * that caused the PEBS record. It's called collision. * If collision happened, the record will be dropped. */ - if (p->status != (1ULL << bit)) { + if (pebs_status != (1ULL << bit)) { for_each_set_bit(i, (unsigned long *)&pebs_status, size) error[i]++; continue; From 46ee4abb10a07bd8f8ce910ee6b4ae6a947d7f63 Mon Sep 17 00:00:00 2001 From: Jan-Niklas Burfeind Date: Thu, 3 Dec 2020 04:03:59 +0100 Subject: [PATCH 121/296] USB: serial: ch341: add new Product ID for CH341A Add PID for CH340 that's found on a ch341 based Programmer made by keeyees. The specific device that contains the serial converter is described here: http://www.keeyees.com/a/Products/ej/36.html The driver works flawlessly as soon as the new PID (0x5512) is added to it. Signed-off-by: Jan-Niklas Burfeind Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/ch341.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c index a2e2f56c88cd..b157a230178d 100644 --- a/drivers/usb/serial/ch341.c +++ b/drivers/usb/serial/ch341.c @@ -84,6 +84,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x4348, 0x5523) }, { USB_DEVICE(0x1a86, 0x7522) }, { USB_DEVICE(0x1a86, 0x7523) }, + { USB_DEVICE(0x1a86, 0x5512) }, { USB_DEVICE(0x1a86, 0x5523) }, { }, }; From bf193bfc12dbc3754fc8a6e0e1e3702f1af2f772 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 3 Dec 2020 10:11:59 +0100 Subject: [PATCH 122/296] USB: serial: ch341: sort device-id entries Keep the device-id entries sorted to make it easier to add new ones in the right spot. Reviewed-by: Greg Kroah-Hartman Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/ch341.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c index b157a230178d..28deaaec581f 100644 --- a/drivers/usb/serial/ch341.c +++ b/drivers/usb/serial/ch341.c @@ -81,11 +81,11 @@ #define CH341_QUIRK_SIMULATE_BREAK BIT(1) static const struct usb_device_id id_table[] = { - { USB_DEVICE(0x4348, 0x5523) }, - { USB_DEVICE(0x1a86, 0x7522) }, - { USB_DEVICE(0x1a86, 0x7523) }, { USB_DEVICE(0x1a86, 0x5512) }, { USB_DEVICE(0x1a86, 0x5523) }, + { USB_DEVICE(0x1a86, 0x7522) }, + { USB_DEVICE(0x1a86, 0x7523) }, + { USB_DEVICE(0x4348, 0x5523) }, { }, }; MODULE_DEVICE_TABLE(usb, id_table); From 8dcc0e19dfbd73ad6b3172924d6da8f7f3f8b3b0 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Thu, 3 Dec 2020 09:22:52 -0600 Subject: [PATCH 123/296] x86/platform/uv: Fix UV4 hub revision adjustment Currently, UV4 is incorrectly identified as UV4A and UV4A as UV5. Hub chip starts with revision 1, fix it. [ bp: Massage commit message. ] Fixes: 647128f1536e ("x86/platform/uv: Update UV MMRs for UV5") Signed-off-by: Mike Travis Signed-off-by: Borislav Petkov Reviewed-by: Steve Wahl Acked-by: Dimitri Sivanich Link: https://lkml.kernel.org/r/20201203152252.371199-1-mike.travis@hpe.com --- arch/x86/kernel/apic/x2apic_uv_x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 1b98f8c12b96..235f5cde06fc 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -161,7 +161,7 @@ static int __init early_set_hub_type(void) /* UV4/4A only have a revision difference */ case UV4_HUB_PART_NUMBER: uv_min_hub_revision_id = node_id.s.revision - + UV4_HUB_REVISION_BASE; + + UV4_HUB_REVISION_BASE - 1; uv_hub_type_set(UV4); if (uv_min_hub_revision_id == UV4A_HUB_REVISION_BASE) uv_hub_type_set(UV4|UV4A); From 796317848517292eb951d8876773b98867cf3c28 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Thu, 3 Dec 2020 12:31:36 +0900 Subject: [PATCH 124/296] smb3: set COMPOUND_FID to FileID field of subsequent compound request For an operation compounded with an SMB2 CREATE request, client must set COMPOUND_FID(0xFFFFFFFFFFFFFFFF) to FileID field of smb2 ioctl. Signed-off-by: Namjae Jeon Fixes: 2e4564b31b645 ("smb3: add support stat of WSL reparse points for special file types") Reviewed-by: Aurelien Aptel Reviewed-by: Ronnie Sahlberg Signed-off-by: Steve French --- fs/cifs/smb2ops.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index dab94f67c988..3d914d7d0d11 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -3114,8 +3114,8 @@ smb2_query_reparse_tag(const unsigned int xid, struct cifs_tcon *tcon, rqst[1].rq_nvec = SMB2_IOCTL_IOV_SIZE; rc = SMB2_ioctl_init(tcon, server, - &rqst[1], fid.persistent_fid, - fid.volatile_fid, FSCTL_GET_REPARSE_POINT, + &rqst[1], COMPOUND_FID, + COMPOUND_FID, FSCTL_GET_REPARSE_POINT, true /* is_fctl */, NULL, 0, CIFSMaxBufSize - MAX_SMB2_CREATE_RESPONSE_SIZE - From 59463eb88829f646aed13283fd84d02a475334fe Mon Sep 17 00:00:00 2001 From: Aurelien Aptel Date: Thu, 3 Dec 2020 19:46:08 +0100 Subject: [PATCH 125/296] cifs: add NULL check for ses->tcon_ipc In some scenarios (DFS and BAD_NETWORK_NAME) set_root_set() can be called with a NULL ses->tcon_ipc. Signed-off-by: Aurelien Aptel Reviewed-by: Paulo Alcantara (SUSE) CC: Stable Signed-off-by: Steve French --- fs/cifs/connect.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 28c1459fb0fc..44f9cce57099 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -4546,7 +4546,8 @@ static void set_root_ses(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses, if (ses) { spin_lock(&cifs_tcp_ses_lock); ses->ses_count++; - ses->tcon_ipc->remap = cifs_remap(cifs_sb); + if (ses->tcon_ipc) + ses->tcon_ipc->remap = cifs_remap(cifs_sb); spin_unlock(&cifs_tcp_ses_lock); } *root_ses = ses; From ea64370bcae126a88cd26a16f1abcc23ab2b9a55 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Mon, 30 Nov 2020 11:29:20 +1000 Subject: [PATCH 126/296] cifs: refactor create_sd_buf() and and avoid corrupting the buffer When mounting with "idsfromsid" mount option, Azure corrupted the owner SIDs due to excessive padding caused by placing the owner fields at the end of the security descriptor on create. Placing owners at the front of the security descriptor (rather than the end) is also safer, as the number of ACEs (that follow it) are variable. Signed-off-by: Ronnie Sahlberg Suggested-by: Rohith Surabattula CC: Stable # v5.8 Signed-off-by: Steve French --- fs/cifs/smb2pdu.c | 69 +++++++++++++++++++++++++---------------------- fs/cifs/smb2pdu.h | 2 -- 2 files changed, 37 insertions(+), 34 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 445e80862865..acb72705062d 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -2272,17 +2272,15 @@ static struct crt_sd_ctxt * create_sd_buf(umode_t mode, bool set_owner, unsigned int *len) { struct crt_sd_ctxt *buf; - struct cifs_ace *pace; - unsigned int sdlen, acelen; + __u8 *ptr, *aclptr; + unsigned int acelen, acl_size, ace_count; unsigned int owner_offset = 0; unsigned int group_offset = 0; + struct smb3_acl acl; - *len = roundup(sizeof(struct crt_sd_ctxt) + (sizeof(struct cifs_ace) * 2), 8); + *len = roundup(sizeof(struct crt_sd_ctxt) + (sizeof(struct cifs_ace) * 4), 8); if (set_owner) { - /* offset fields are from beginning of security descriptor not of create context */ - owner_offset = sizeof(struct smb3_acl) + (sizeof(struct cifs_ace) * 2); - /* sizeof(struct owner_group_sids) is already multiple of 8 so no need to round */ *len += sizeof(struct owner_group_sids); } @@ -2291,26 +2289,22 @@ create_sd_buf(umode_t mode, bool set_owner, unsigned int *len) if (buf == NULL) return buf; + ptr = (__u8 *)&buf[1]; if (set_owner) { + /* offset fields are from beginning of security descriptor not of create context */ + owner_offset = ptr - (__u8 *)&buf->sd; buf->sd.OffsetOwner = cpu_to_le32(owner_offset); - group_offset = owner_offset + sizeof(struct owner_sid); + group_offset = owner_offset + offsetof(struct owner_group_sids, group); buf->sd.OffsetGroup = cpu_to_le32(group_offset); + + setup_owner_group_sids(ptr); + ptr += sizeof(struct owner_group_sids); } else { buf->sd.OffsetOwner = 0; buf->sd.OffsetGroup = 0; } - sdlen = sizeof(struct smb3_sd) + sizeof(struct smb3_acl) + - 2 * sizeof(struct cifs_ace); - if (set_owner) { - sdlen += sizeof(struct owner_group_sids); - setup_owner_group_sids(owner_offset + sizeof(struct create_context) + 8 /* name */ - + (char *)buf); - } - - buf->ccontext.DataOffset = cpu_to_le16(offsetof - (struct crt_sd_ctxt, sd)); - buf->ccontext.DataLength = cpu_to_le32(sdlen); + buf->ccontext.DataOffset = cpu_to_le16(offsetof(struct crt_sd_ctxt, sd)); buf->ccontext.NameOffset = cpu_to_le16(offsetof(struct crt_sd_ctxt, Name)); buf->ccontext.NameLength = cpu_to_le16(4); /* SMB2_CREATE_SD_BUFFER_TOKEN is "SecD" */ @@ -2319,6 +2313,7 @@ create_sd_buf(umode_t mode, bool set_owner, unsigned int *len) buf->Name[2] = 'c'; buf->Name[3] = 'D'; buf->sd.Revision = 1; /* Must be one see MS-DTYP 2.4.6 */ + /* * ACL is "self relative" ie ACL is stored in contiguous block of memory * and "DP" ie the DACL is present @@ -2326,28 +2321,38 @@ create_sd_buf(umode_t mode, bool set_owner, unsigned int *len) buf->sd.Control = cpu_to_le16(ACL_CONTROL_SR | ACL_CONTROL_DP); /* offset owner, group and Sbz1 and SACL are all zero */ - buf->sd.OffsetDacl = cpu_to_le32(sizeof(struct smb3_sd)); - buf->acl.AclRevision = ACL_REVISION; /* See 2.4.4.1 of MS-DTYP */ + buf->sd.OffsetDacl = cpu_to_le32(ptr - (__u8 *)&buf->sd); + /* Ship the ACL for now. we will copy it into buf later. */ + aclptr = ptr; + ptr += sizeof(struct cifs_acl); /* create one ACE to hold the mode embedded in reserved special SID */ - pace = (struct cifs_ace *)(sizeof(struct crt_sd_ctxt) + (char *)buf); - acelen = setup_special_mode_ACE(pace, (__u64)mode); + acelen = setup_special_mode_ACE((struct cifs_ace *)ptr, (__u64)mode); + ptr += acelen; + acl_size = acelen + sizeof(struct smb3_acl); + ace_count = 1; if (set_owner) { /* we do not need to reallocate buffer to add the two more ACEs. plenty of space */ - pace = (struct cifs_ace *)(acelen + (sizeof(struct crt_sd_ctxt) + (char *)buf)); - acelen += setup_special_user_owner_ACE(pace); - /* it does not appear necessary to add an ACE for the NFS group SID */ - buf->acl.AceCount = cpu_to_le16(3); - } else - buf->acl.AceCount = cpu_to_le16(2); + acelen = setup_special_user_owner_ACE((struct cifs_ace *)ptr); + ptr += acelen; + acl_size += acelen; + ace_count += 1; + } /* and one more ACE to allow access for authenticated users */ - pace = (struct cifs_ace *)(acelen + (sizeof(struct crt_sd_ctxt) + - (char *)buf)); - acelen += setup_authusers_ACE(pace); + acelen = setup_authusers_ACE((struct cifs_ace *)ptr); + ptr += acelen; + acl_size += acelen; + ace_count += 1; - buf->acl.AclSize = cpu_to_le16(sizeof(struct cifs_acl) + acelen); + acl.AclRevision = ACL_REVISION; /* See 2.4.4.1 of MS-DTYP */ + acl.AclSize = cpu_to_le16(acl_size); + acl.AceCount = cpu_to_le16(ace_count); + memcpy(aclptr, &acl, sizeof(struct cifs_acl)); + + buf->ccontext.DataLength = cpu_to_le32(ptr - (__u8 *)&buf->sd); + *len = ptr - (__u8 *)buf; return buf; } diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index f05f9b12f689..fa57b03ca98c 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -963,8 +963,6 @@ struct crt_sd_ctxt { struct create_context ccontext; __u8 Name[8]; struct smb3_sd sd; - struct smb3_acl acl; - /* Followed by at least 4 ACEs */ } __packed; From 47a0001436352c9853d72bf2071e85b316d688a2 Mon Sep 17 00:00:00 2001 From: Coiby Xu Date: Wed, 25 Nov 2020 21:03:19 +0800 Subject: [PATCH 127/296] pinctrl: amd: remove debounce filter setting in IRQ type setting Debounce filter setting should be independent from IRQ type setting because according to the ACPI specs, there are separate arguments for specifying debounce timeout and IRQ type in GpioIo() and GpioInt(). Together with commit 06abe8291bc31839950f7d0362d9979edc88a666 ("pinctrl: amd: fix incorrect way to disable debounce filter") and Andy's patch "gpiolib: acpi: Take into account debounce settings" [1], this will fix broken touchpads for laptops whose BIOS set the debounce timeout to a relatively large value. For example, the BIOS of Lenovo AMD gaming laptops including Legion-5 15ARH05 (R7000), Legion-5P (R7000P) and IdeaPad Gaming 3 15ARH05, set the debounce timeout to 124.8ms. This led to the kernel receiving only ~7 HID reports per second from the Synaptics touchpad (MSFT0001:00 06CB:7F28). Existing touchpads like [2][3] are not troubled by this bug because the debounce timeout has been set to 0 by the BIOS before enabling the debounce filter in setting IRQ type. [1] https://lore.kernel.org/linux-gpio/20201111222008.39993-11-andriy.shevchenko@linux.intel.com/ 8dcb7a15a585 ("gpiolib: acpi: Take into account debounce settings") [2] https://github.com/Syniurge/i2c-amd-mp2/issues/11#issuecomment-721331582 [3] https://forum.manjaro.org/t/random-short-touchpad-freezes/30832/28 Signed-off-by: Coiby Xu Reviewed-by: Andy Shevchenko Cc: Hans de Goede Cc: Andy Shevchenko Cc: Benjamin Tissoires Cc: stable@vger.kernel.org Link: https://lore.kernel.org/linux-gpio/CAHp75VcwiGREBUJ0A06EEw-SyabqYsp%2Bdqs2DpSrhaY-2GVdAA%40mail.gmail.com/ BugLink: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1887190 Link: https://lore.kernel.org/r/20201125130320.311059-1-coiby.xu@gmail.com Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-amd.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c index 4aea3e05e8c6..899c16c17b6d 100644 --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c @@ -429,7 +429,6 @@ static int amd_gpio_irq_set_type(struct irq_data *d, unsigned int type) pin_reg &= ~BIT(LEVEL_TRIG_OFF); pin_reg &= ~(ACTIVE_LEVEL_MASK << ACTIVE_LEVEL_OFF); pin_reg |= ACTIVE_HIGH << ACTIVE_LEVEL_OFF; - pin_reg |= DB_TYPE_REMOVE_GLITCH << DB_CNTRL_OFF; irq_set_handler_locked(d, handle_edge_irq); break; @@ -437,7 +436,6 @@ static int amd_gpio_irq_set_type(struct irq_data *d, unsigned int type) pin_reg &= ~BIT(LEVEL_TRIG_OFF); pin_reg &= ~(ACTIVE_LEVEL_MASK << ACTIVE_LEVEL_OFF); pin_reg |= ACTIVE_LOW << ACTIVE_LEVEL_OFF; - pin_reg |= DB_TYPE_REMOVE_GLITCH << DB_CNTRL_OFF; irq_set_handler_locked(d, handle_edge_irq); break; @@ -445,7 +443,6 @@ static int amd_gpio_irq_set_type(struct irq_data *d, unsigned int type) pin_reg &= ~BIT(LEVEL_TRIG_OFF); pin_reg &= ~(ACTIVE_LEVEL_MASK << ACTIVE_LEVEL_OFF); pin_reg |= BOTH_EADGE << ACTIVE_LEVEL_OFF; - pin_reg |= DB_TYPE_REMOVE_GLITCH << DB_CNTRL_OFF; irq_set_handler_locked(d, handle_edge_irq); break; @@ -453,8 +450,6 @@ static int amd_gpio_irq_set_type(struct irq_data *d, unsigned int type) pin_reg |= LEVEL_TRIGGER << LEVEL_TRIG_OFF; pin_reg &= ~(ACTIVE_LEVEL_MASK << ACTIVE_LEVEL_OFF); pin_reg |= ACTIVE_HIGH << ACTIVE_LEVEL_OFF; - pin_reg &= ~(DB_CNTRl_MASK << DB_CNTRL_OFF); - pin_reg |= DB_TYPE_PRESERVE_LOW_GLITCH << DB_CNTRL_OFF; irq_set_handler_locked(d, handle_level_irq); break; @@ -462,8 +457,6 @@ static int amd_gpio_irq_set_type(struct irq_data *d, unsigned int type) pin_reg |= LEVEL_TRIGGER << LEVEL_TRIG_OFF; pin_reg &= ~(ACTIVE_LEVEL_MASK << ACTIVE_LEVEL_OFF); pin_reg |= ACTIVE_LOW << ACTIVE_LEVEL_OFF; - pin_reg &= ~(DB_CNTRl_MASK << DB_CNTRL_OFF); - pin_reg |= DB_TYPE_PRESERVE_HIGH_GLITCH << DB_CNTRL_OFF; irq_set_handler_locked(d, handle_level_irq); break; From 3f203f057edfcf6bd02c6b942799262bfcf31f73 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 4 Dec 2020 09:55:19 +0100 Subject: [PATCH 128/296] USB: serial: kl5kusb105: fix memleak on open Fix memory leak of control-message transfer buffer on successful open(). Fixes: 6774d5f53271 ("USB: serial: kl5kusb105: fix open error path") Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/kl5kusb105.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/usb/serial/kl5kusb105.c b/drivers/usb/serial/kl5kusb105.c index 5ee48b0650c4..5f6b82ebccc5 100644 --- a/drivers/usb/serial/kl5kusb105.c +++ b/drivers/usb/serial/kl5kusb105.c @@ -276,12 +276,12 @@ static int klsi_105_open(struct tty_struct *tty, struct usb_serial_port *port) priv->cfg.unknown2 = cfg->unknown2; spin_unlock_irqrestore(&priv->lock, flags); + kfree(cfg); + /* READ_ON and urb submission */ rc = usb_serial_generic_open(tty, port); - if (rc) { - retval = rc; - goto err_free_cfg; - } + if (rc) + return rc; rc = usb_control_msg(port->serial->dev, usb_sndctrlpipe(port->serial->dev, 0), @@ -324,8 +324,6 @@ err_disable_read: KLSI_TIMEOUT); err_generic_close: usb_serial_generic_close(port); -err_free_cfg: - kfree(cfg); return retval; } From 32a9e0c445fa5abfd8730461c3ae0be1860bc6b2 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Wed, 25 Nov 2020 21:49:53 +0100 Subject: [PATCH 129/296] mmc: tmio: improve bringing HW to a sane state with MMC_POWER_OFF Further testing of error cases revealed that downgrade is not enough, so we need to reset the SCC which is done by calling the custom reset function. This reset function can distinguish between the various SDHI variants, so protecting the call with MIN_RCAR2 is enough here. Fixes: 24ce2d7b8bea ("mmc: tmio: bring tuning HW to a sane state with MMC_POWER_OFF") Reported-by: Yoshihiro Shimoda Signed-off-by: Wolfram Sang Reviewed-by: Yoshihiro Shimoda Tested-by: Yoshihiro Shimoda Link: https://lore.kernel.org/r/20201125204953.3344-1-wsa+renesas@sang-engineering.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/tmio_mmc_core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/mmc/host/tmio_mmc_core.c b/drivers/mmc/host/tmio_mmc_core.c index cb4149fd12e0..ac4e7874a3f1 100644 --- a/drivers/mmc/host/tmio_mmc_core.c +++ b/drivers/mmc/host/tmio_mmc_core.c @@ -927,9 +927,9 @@ static void tmio_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) switch (ios->power_mode) { case MMC_POWER_OFF: tmio_mmc_power_off(host); - /* Downgrade ensures a sane state for tuning HW (e.g. SCC) */ - if (host->mmc->ops->hs400_downgrade) - host->mmc->ops->hs400_downgrade(host->mmc); + /* For R-Car Gen2+, we need to reset SDHI specific SCC */ + if (host->pdata->flags & TMIO_MMC_MIN_RCAR2) + host->reset(host); host->set_clock(host, 0); break; case MMC_POWER_UP: From 8fca2b8706f39f86312c086229e0cb364f8b4f97 Mon Sep 17 00:00:00 2001 From: Wen Gong Date: Mon, 23 Nov 2020 16:45:52 +0800 Subject: [PATCH 130/296] mac80211: fix return value of ieee80211_chandef_he_6ghz_oper ieee80211_chandef_he_6ghz_oper() needs to return true if it determined a value 6 GHz chandef, fix that. Fixes: 1d00ce807efa ("mac80211: support S1G association") Signed-off-by: Wen Gong Link: https://lore.kernel.org/r/1606121152-3452-1-git-send-email-wgong@codeaurora.org [rewrite commit message] Signed-off-by: Johannes Berg --- net/mac80211/util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 49342060490f..94e624e9439b 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -3455,7 +3455,7 @@ bool ieee80211_chandef_he_6ghz_oper(struct ieee80211_sub_if_data *sdata, *chandef = he_chandef; - return false; + return true; } bool ieee80211_chandef_s1g_oper(const struct ieee80211_s1g_oper_ie *oper, From f495acd8851d7b345e5f0e521b2645b1e1f928a0 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Sun, 29 Nov 2020 17:30:44 +0200 Subject: [PATCH 131/296] cfg80211: initialize rekey_data In case we have old supplicant, the akm field is uninitialized. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/iwlwifi.20201129172929.930f0ab7ebee.Ic546e384efab3f4a89f318eafddc3eb7d556aecb@changeid Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index a77174b99b07..f67ddf2cebcb 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -12634,7 +12634,7 @@ static int nl80211_set_rekey_data(struct sk_buff *skb, struct genl_info *info) struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; struct nlattr *tb[NUM_NL80211_REKEY_DATA]; - struct cfg80211_gtk_rekey_data rekey_data; + struct cfg80211_gtk_rekey_data rekey_data = {}; int err; if (!info->attrs[NL80211_ATTR_REKEY_DATA]) From bdeca45a0cc58f864f1eb2e919304203ff5c5f39 Mon Sep 17 00:00:00 2001 From: "Borwankar, Antara" Date: Sun, 29 Nov 2020 17:30:53 +0200 Subject: [PATCH 132/296] mac80211: set SDATA_STATE_RUNNING for monitor interfaces During restarrt, mac80211 is supposed to reconfigure the driver. When there's a monitor interface, the interface is added and the channel context for it was created, but not assigned to it as it was not considered running during the restart. Fix this by setting SDATA_STATE_RUNNING while adding monitor interfaces. Signed-off-by: Borwankar, Antara Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/iwlwifi.20201129172929.e1df99693a4c.I494579f28018c2d0b9d4083a664cf872c28405ae@changeid [reword commit log] Signed-off-by: Johannes Berg --- net/mac80211/iface.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 1be775979132..44154cc596cd 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -948,6 +948,8 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local) return ret; } + set_bit(SDATA_STATE_RUNNING, &sdata->state); + ret = ieee80211_check_queues(sdata, NL80211_IFTYPE_MONITOR); if (ret) { kfree(sdata); From 6246d7c9d15aaff0bc3863f67900c6a6e6be921b Mon Sep 17 00:00:00 2001 From: Bean Huo Date: Wed, 2 Dec 2020 21:23:20 +0100 Subject: [PATCH 133/296] mmc: block: Fixup condition for CMD13 polling for RPMB requests The CMD13 polling is needed for commands with R1B responses. In commit a0d4c7eb71dd ("mmc: block: Add CMD13 polling for MMC IOCTLS with R1B response"), the intent was to introduce this for requests targeted to the RPMB partition. However, the condition to trigger the polling loop became wrong, leading to unnecessary polling. Let's fix the condition to avoid this. Fixes: a0d4c7eb71dd ("mmc: block: Add CMD13 polling for MMC IOCTLS with R1B response") Cc: stable@vger.kernel.org Reported-by: Zhan Liu Signed-off-by: Zhan Liu Signed-off-by: Bean Huo Link: https://lore.kernel.org/r/20201202202320.22165-1-huobean@gmail.com Signed-off-by: Ulf Hansson --- drivers/mmc/core/block.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index 8d3df0be0355..42e27a298218 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -580,7 +580,7 @@ static int __mmc_blk_ioctl_cmd(struct mmc_card *card, struct mmc_blk_data *md, memcpy(&(idata->ic.response), cmd.resp, sizeof(cmd.resp)); - if (idata->rpmb || (cmd.flags & MMC_RSP_R1B)) { + if (idata->rpmb || (cmd.flags & MMC_RSP_R1B) == MMC_RSP_R1B) { /* * Ensure RPMB/R1B command has completed by polling CMD13 * "Send Status". From c0d638a03bc5dfdb08fb95d0a79ecada25f40da8 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 3 Dec 2020 23:29:16 +0100 Subject: [PATCH 134/296] mmc: mediatek: mark PM functions as __maybe_unused The #ifdef check for the suspend/resume functions is wrong: drivers/mmc/host/mtk-sd.c:2765:12: error: unused function 'msdc_suspend' [-Werror,-Wunused-function] static int msdc_suspend(struct device *dev) drivers/mmc/host/mtk-sd.c:2779:12: error: unused function 'msdc_resume' [-Werror,-Wunused-function] static int msdc_resume(struct device *dev) Remove the #ifdef and mark all four as __maybe_unused to aovid the problem. Fixes: c0a2074ac575 ("mmc: mediatek: Fix system suspend/resume support for CQHCI") Cc: stable@vger.kernel.org Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20201203222922.1067522-1-arnd@kernel.org Signed-off-by: Ulf Hansson --- drivers/mmc/host/mtk-sd.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c index 7eb99255ae3d..004fbfc23672 100644 --- a/drivers/mmc/host/mtk-sd.c +++ b/drivers/mmc/host/mtk-sd.c @@ -2604,7 +2604,6 @@ static int msdc_drv_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM static void msdc_save_reg(struct msdc_host *host) { u32 tune_reg = host->dev_comp->pad_tune_reg; @@ -2663,7 +2662,7 @@ static void msdc_restore_reg(struct msdc_host *host) __msdc_enable_sdio_irq(host, 1); } -static int msdc_runtime_suspend(struct device *dev) +static int __maybe_unused msdc_runtime_suspend(struct device *dev) { struct mmc_host *mmc = dev_get_drvdata(dev); struct msdc_host *host = mmc_priv(mmc); @@ -2673,7 +2672,7 @@ static int msdc_runtime_suspend(struct device *dev) return 0; } -static int msdc_runtime_resume(struct device *dev) +static int __maybe_unused msdc_runtime_resume(struct device *dev) { struct mmc_host *mmc = dev_get_drvdata(dev); struct msdc_host *host = mmc_priv(mmc); @@ -2683,7 +2682,7 @@ static int msdc_runtime_resume(struct device *dev) return 0; } -static int msdc_suspend(struct device *dev) +static int __maybe_unused msdc_suspend(struct device *dev) { struct mmc_host *mmc = dev_get_drvdata(dev); int ret; @@ -2697,11 +2696,10 @@ static int msdc_suspend(struct device *dev) return pm_runtime_force_suspend(dev); } -static int msdc_resume(struct device *dev) +static int __maybe_unused msdc_resume(struct device *dev) { return pm_runtime_force_resume(dev); } -#endif static const struct dev_pm_ops msdc_dev_pm_ops = { SET_SYSTEM_SLEEP_PM_OPS(msdc_suspend, msdc_resume) From da881ded10a65885cdcb87ab817eea3acf23dcf9 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 4 Dec 2020 15:59:27 +0100 Subject: [PATCH 135/296] Revert "uas: bump hw_max_sectors to 2048 blocks for SS or faster drives" This reverts commit 5df7ef7d32fec1d6d1c34dbec019b461a12ce870 as Hans reports it causes problems on some systems. Until a "real" fix for this can be found, revert this change to get normal functionality back. Link: https://lore.kernel.org/r/70ca74c2-4a80-e25b-eca9-a63a75516673@redhat.com Cc: Tom Yan Cc: Alan Stern Tested-by: Hans de Goede Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/uas.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c index c8a577309e8f..9369c6df80c3 100644 --- a/drivers/usb/storage/uas.c +++ b/drivers/usb/storage/uas.c @@ -849,8 +849,6 @@ static int uas_slave_configure(struct scsi_device *sdev) blk_queue_max_hw_sectors(sdev->request_queue, 64); else if (devinfo->flags & US_FL_MAX_SECTORS_240) blk_queue_max_hw_sectors(sdev->request_queue, 240); - else if (devinfo->udev->speed >= USB_SPEED_SUPER) - blk_queue_max_hw_sectors(sdev->request_queue, 2048); blk_queue_max_hw_sectors(sdev->request_queue, min_t(size_t, queue_max_hw_sectors(sdev->request_queue), From d5c65d32dc240bf600d9e54250a8133e93ece60a Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 4 Dec 2020 16:00:34 +0100 Subject: [PATCH 136/296] Revert "uas: fix sdev->host->dma_dev" This reverts commit 558033c2828f832ab3b68c6f8b8710e0de6faef0 as Hans reports it causes problems on some systems. Until a "real" fix for this can be found, revert this change to get normal functionality back. Link: https://lore.kernel.org/r/70ca74c2-4a80-e25b-eca9-a63a75516673@redhat.com Cc: Tom Yan Cc: Alan Stern Tested-by: Hans de Goede Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/uas.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c index 9369c6df80c3..652d6d6f1f36 100644 --- a/drivers/usb/storage/uas.c +++ b/drivers/usb/storage/uas.c @@ -837,22 +837,17 @@ static int uas_slave_alloc(struct scsi_device *sdev) */ blk_queue_update_dma_alignment(sdev->request_queue, (512 - 1)); + if (devinfo->flags & US_FL_MAX_SECTORS_64) + blk_queue_max_hw_sectors(sdev->request_queue, 64); + else if (devinfo->flags & US_FL_MAX_SECTORS_240) + blk_queue_max_hw_sectors(sdev->request_queue, 240); + return 0; } static int uas_slave_configure(struct scsi_device *sdev) { struct uas_dev_info *devinfo = sdev->hostdata; - struct device *dev = sdev->host->dma_dev; - - if (devinfo->flags & US_FL_MAX_SECTORS_64) - blk_queue_max_hw_sectors(sdev->request_queue, 64); - else if (devinfo->flags & US_FL_MAX_SECTORS_240) - blk_queue_max_hw_sectors(sdev->request_queue, 240); - - blk_queue_max_hw_sectors(sdev->request_queue, - min_t(size_t, queue_max_hw_sectors(sdev->request_queue), - dma_max_mapping_size(dev) >> SECTOR_SHIFT)); if (devinfo->flags & US_FL_NO_REPORT_OPCODES) sdev->no_report_opcodes = 1; @@ -1038,7 +1033,7 @@ static int uas_probe(struct usb_interface *intf, const struct usb_device_id *id) shost->can_queue = devinfo->qdepth - 2; usb_set_intfdata(intf, shost); - result = scsi_add_host_with_dma(shost, &intf->dev, udev->bus->sysdev); + result = scsi_add_host(shost, &intf->dev); if (result) goto free_streams; From 97ad4a77f23e30801d2c0ef0c12b59f0e5760e6e Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 4 Dec 2020 16:01:23 +0100 Subject: [PATCH 137/296] Revert "usb-storage: fix sdev->host->dma_dev" This reverts commit 0154012f8018bba4d9971d1007c12ffd48539ddb as Hans reports it causes problems on some systems. Until a "real" fix for this can be found, revert this change to get normal functionality back. Link: https://lore.kernel.org/r/70ca74c2-4a80-e25b-eca9-a63a75516673@redhat.com Cc: Tom Yan Cc: Alan Stern Tested-by: Hans de Goede Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/scsiglue.c | 2 +- drivers/usb/storage/usb.c | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c index 560efd1479ba..e5a971b83e3f 100644 --- a/drivers/usb/storage/scsiglue.c +++ b/drivers/usb/storage/scsiglue.c @@ -92,7 +92,7 @@ static int slave_alloc (struct scsi_device *sdev) static int slave_configure(struct scsi_device *sdev) { struct us_data *us = host_to_us(sdev->host); - struct device *dev = sdev->host->dma_dev; + struct device *dev = us->pusb_dev->bus->sysdev; /* * Many devices have trouble transferring more than 32KB at a time, diff --git a/drivers/usb/storage/usb.c b/drivers/usb/storage/usb.c index c2ef367cf257..94a64729dc27 100644 --- a/drivers/usb/storage/usb.c +++ b/drivers/usb/storage/usb.c @@ -1049,9 +1049,8 @@ int usb_stor_probe2(struct us_data *us) goto BadDevice; usb_autopm_get_interface_no_resume(us->pusb_intf); snprintf(us->scsi_name, sizeof(us->scsi_name), "usb-storage %s", - dev_name(dev)); - result = scsi_add_host_with_dma(us_to_host(us), dev, - us->pusb_dev->bus->sysdev); + dev_name(&us->pusb_intf->dev)); + result = scsi_add_host(us_to_host(us), dev); if (result) { dev_warn(dev, "Unable to add the scsi host\n"); From 45c5775460f32ed8cdb7c16986ae1a2c254346b3 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 30 Nov 2020 09:30:33 +0100 Subject: [PATCH 138/296] usb: ohci-omap: Fix descriptor conversion There were a bunch of issues with the patch converting the OMAP1 OSK board to use descriptors for controlling the USB host: - The chip label was incorrect - The GPIO offset was off-by-one - The code should use sleeping accessors This patch tries to fix all issues at the same time. Cc: Aaro Koskinen Reported-by: Aaro Koskinen Fixes: 15d157e87443 ("usb: ohci-omap: Convert to use GPIO descriptors") Signed-off-by: Linus Walleij Link: https://lore.kernel.org/r/20201130083033.29435-1-linus.walleij@linaro.org Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-omap1/board-osk.c | 2 +- drivers/usb/host/ohci-omap.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/mach-omap1/board-osk.c b/arch/arm/mach-omap1/board-osk.c index 144b9caa935c..a720259099ed 100644 --- a/arch/arm/mach-omap1/board-osk.c +++ b/arch/arm/mach-omap1/board-osk.c @@ -288,7 +288,7 @@ static struct gpiod_lookup_table osk_usb_gpio_table = { .dev_id = "ohci", .table = { /* Power GPIO on the I2C-attached TPS65010 */ - GPIO_LOOKUP("i2c-tps65010", 1, "power", GPIO_ACTIVE_HIGH), + GPIO_LOOKUP("tps65010", 0, "power", GPIO_ACTIVE_HIGH), GPIO_LOOKUP(OMAP_GPIO_LABEL, 9, "overcurrent", GPIO_ACTIVE_HIGH), }, diff --git a/drivers/usb/host/ohci-omap.c b/drivers/usb/host/ohci-omap.c index 9ccdf2c216b5..6374501ba139 100644 --- a/drivers/usb/host/ohci-omap.c +++ b/drivers/usb/host/ohci-omap.c @@ -91,14 +91,14 @@ static int omap_ohci_transceiver_power(struct ohci_omap_priv *priv, int on) | ((1 << 5/*usb1*/) | (1 << 3/*usb2*/)), INNOVATOR_FPGA_CAM_USB_CONTROL); else if (priv->power) - gpiod_set_value(priv->power, 0); + gpiod_set_value_cansleep(priv->power, 0); } else { if (machine_is_omap_innovator() && cpu_is_omap1510()) __raw_writeb(__raw_readb(INNOVATOR_FPGA_CAM_USB_CONTROL) & ~((1 << 5/*usb1*/) | (1 << 3/*usb2*/)), INNOVATOR_FPGA_CAM_USB_CONTROL); else if (priv->power) - gpiod_set_value(priv->power, 1); + gpiod_set_value_cansleep(priv->power, 1); } return 0; From a4b98a7512f18534ce33a7e98e49115af59ffa00 Mon Sep 17 00:00:00 2001 From: Vamsi Krishna Samavedam Date: Mon, 30 Nov 2020 12:34:53 -0800 Subject: [PATCH 139/296] usb: gadget: f_fs: Use local copy of descriptors for userspace copy The function may be unbound causing the ffs_ep and its descriptors to be freed while userspace is in the middle of an ioctl requesting the same descriptors. Avoid dangling pointer reference by first making a local copy of desctiptors before releasing the spinlock. Fixes: c559a3534109 ("usb: gadget: f_fs: add ioctl returning ep descriptor") Reviewed-by: Peter Chen Signed-off-by: Vamsi Krishna Samavedam Signed-off-by: Jack Pham Cc: stable Link: https://lore.kernel.org/r/20201130203453.28154-1-jackp@codeaurora.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_fs.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 046f770a76da..c727cb5de871 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -1324,7 +1324,7 @@ static long ffs_epfile_ioctl(struct file *file, unsigned code, case FUNCTIONFS_ENDPOINT_DESC: { int desc_idx; - struct usb_endpoint_descriptor *desc; + struct usb_endpoint_descriptor desc1, *desc; switch (epfile->ffs->gadget->speed) { case USB_SPEED_SUPER: @@ -1336,10 +1336,12 @@ static long ffs_epfile_ioctl(struct file *file, unsigned code, default: desc_idx = 0; } + desc = epfile->ep->descs[desc_idx]; + memcpy(&desc1, desc, desc->bLength); spin_unlock_irq(&epfile->ffs->eps_lock); - ret = copy_to_user((void __user *)value, desc, desc->bLength); + ret = copy_to_user((void __user *)value, &desc1, desc1.bLength); if (ret) ret = -EFAULT; return ret; From 12c8a8ca117f3d734babc3fba131fdaa329d2163 Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Fri, 4 Dec 2020 18:21:16 +0800 Subject: [PATCH 140/296] xsk: Return error code if force_zc is set If force_zc is set, we should exit out with an error, not fall back to copy mode. Fixes: 921b68692abb ("xsk: Enable sharing of dma mappings") Reported-by: Hulk Robot Signed-off-by: Zhang Changzhong Signed-off-by: Daniel Borkmann Acked-by: Magnus Karlsson Link: https://lore.kernel.org/bpf/1607077277-41995-1-git-send-email-zhangchangzhong@huawei.com --- net/xdp/xsk_buff_pool.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index 9287eddec52c..d5adeee9d5d9 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -175,6 +175,7 @@ static int __xp_assign_dev(struct xsk_buff_pool *pool, if (!pool->dma_pages) { WARN(1, "Driver did not DMA map zero-copy buffers"); + err = -EINVAL; goto err_unreg_xsk; } pool->umem->zc = true; From 54ffccbf053b5b6ca4f6e45094b942fab92a25fc Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Thu, 3 Dec 2020 02:25:04 +0100 Subject: [PATCH 141/296] tty: Fix ->pgrp locking in tiocspgrp() tiocspgrp() takes two tty_struct pointers: One to the tty that userspace passed to ioctl() (`tty`) and one to the TTY being changed (`real_tty`). These pointers are different when ioctl() is called with a master fd. To properly lock real_tty->pgrp, we must take real_tty->ctrl_lock. This bug makes it possible for racing ioctl(TIOCSPGRP, ...) calls on both sides of a PTY pair to corrupt the refcount of `struct pid`, leading to use-after-free errors. Fixes: 47f86834bbd4 ("redo locking of tty->pgrp") CC: stable@kernel.org Signed-off-by: Jann Horn Reviewed-by: Jiri Slaby Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_jobctrl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/tty/tty_jobctrl.c b/drivers/tty/tty_jobctrl.c index 28a23a0fef21..baadeea4a289 100644 --- a/drivers/tty/tty_jobctrl.c +++ b/drivers/tty/tty_jobctrl.c @@ -494,10 +494,10 @@ static int tiocspgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t if (session_of_pgrp(pgrp) != task_session(current)) goto out_unlock; retval = 0; - spin_lock_irq(&tty->ctrl_lock); + spin_lock_irq(&real_tty->ctrl_lock); put_pid(real_tty->pgrp); real_tty->pgrp = get_pid(pgrp); - spin_unlock_irq(&tty->ctrl_lock); + spin_unlock_irq(&real_tty->ctrl_lock); out_unlock: rcu_read_unlock(); return retval; From c8bcd9c5be24fb9e6132e97da5a35e55a83e36b9 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Thu, 3 Dec 2020 02:25:05 +0100 Subject: [PATCH 142/296] tty: Fix ->session locking Currently, locking of ->session is very inconsistent; most places protect it using the legacy tty mutex, but disassociate_ctty(), __do_SAK(), tiocspgrp() and tiocgsid() don't. Two of the writers hold the ctrl_lock (because they already need it for ->pgrp), but __proc_set_tty() doesn't do that yet. On a PREEMPT=y system, an unprivileged user can theoretically abuse this broken locking to read 4 bytes of freed memory via TIOCGSID if tiocgsid() is preempted long enough at the right point. (Other things might also go wrong, especially if root-only ioctls are involved; I'm not sure about that.) Change the locking on ->session such that: - tty_lock() is held by all writers: By making disassociate_ctty() hold it. This should be fine because the same lock can already be taken through the call to tty_vhangup_session(). The tricky part is that we need to shorten the area covered by siglock to be able to take tty_lock() without ugly retry logic; as far as I can tell, this should be fine, since nothing in the signal_struct is touched in the `if (tty)` branch. - ctrl_lock is held by all writers: By changing __proc_set_tty() to hold the lock a little longer. - All readers that aren't holding tty_lock() hold ctrl_lock: By adding locking to tiocgsid() and __do_SAK(), and expanding the area covered by ctrl_lock in tiocspgrp(). Cc: stable@kernel.org Signed-off-by: Jann Horn Reviewed-by: Jiri Slaby Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_io.c | 7 ++++++- drivers/tty/tty_jobctrl.c | 44 +++++++++++++++++++++++++++------------ include/linux/tty.h | 4 ++++ 3 files changed, 41 insertions(+), 14 deletions(-) diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index 9f8b9a567b35..56ade99ef99f 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -2897,10 +2897,14 @@ void __do_SAK(struct tty_struct *tty) struct task_struct *g, *p; struct pid *session; int i; + unsigned long flags; if (!tty) return; - session = tty->session; + + spin_lock_irqsave(&tty->ctrl_lock, flags); + session = get_pid(tty->session); + spin_unlock_irqrestore(&tty->ctrl_lock, flags); tty_ldisc_flush(tty); @@ -2932,6 +2936,7 @@ void __do_SAK(struct tty_struct *tty) task_unlock(p); } while_each_thread(g, p); read_unlock(&tasklist_lock); + put_pid(session); #endif } diff --git a/drivers/tty/tty_jobctrl.c b/drivers/tty/tty_jobctrl.c index baadeea4a289..aa6d0537b379 100644 --- a/drivers/tty/tty_jobctrl.c +++ b/drivers/tty/tty_jobctrl.c @@ -103,8 +103,8 @@ static void __proc_set_tty(struct tty_struct *tty) put_pid(tty->session); put_pid(tty->pgrp); tty->pgrp = get_pid(task_pgrp(current)); - spin_unlock_irqrestore(&tty->ctrl_lock, flags); tty->session = get_pid(task_session(current)); + spin_unlock_irqrestore(&tty->ctrl_lock, flags); if (current->signal->tty) { tty_debug(tty, "current tty %s not NULL!!\n", current->signal->tty->name); @@ -293,20 +293,23 @@ void disassociate_ctty(int on_exit) spin_lock_irq(¤t->sighand->siglock); put_pid(current->signal->tty_old_pgrp); current->signal->tty_old_pgrp = NULL; - tty = tty_kref_get(current->signal->tty); + spin_unlock_irq(¤t->sighand->siglock); + if (tty) { unsigned long flags; + + tty_lock(tty); spin_lock_irqsave(&tty->ctrl_lock, flags); put_pid(tty->session); put_pid(tty->pgrp); tty->session = NULL; tty->pgrp = NULL; spin_unlock_irqrestore(&tty->ctrl_lock, flags); + tty_unlock(tty); tty_kref_put(tty); } - spin_unlock_irq(¤t->sighand->siglock); /* Now clear signal->tty under the lock */ read_lock(&tasklist_lock); session_clear_tty(task_session(current)); @@ -477,14 +480,19 @@ static int tiocspgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t return -ENOTTY; if (retval) return retval; - if (!current->signal->tty || - (current->signal->tty != real_tty) || - (real_tty->session != task_session(current))) - return -ENOTTY; + if (get_user(pgrp_nr, p)) return -EFAULT; if (pgrp_nr < 0) return -EINVAL; + + spin_lock_irq(&real_tty->ctrl_lock); + if (!current->signal->tty || + (current->signal->tty != real_tty) || + (real_tty->session != task_session(current))) { + retval = -ENOTTY; + goto out_unlock_ctrl; + } rcu_read_lock(); pgrp = find_vpid(pgrp_nr); retval = -ESRCH; @@ -494,12 +502,12 @@ static int tiocspgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t if (session_of_pgrp(pgrp) != task_session(current)) goto out_unlock; retval = 0; - spin_lock_irq(&real_tty->ctrl_lock); put_pid(real_tty->pgrp); real_tty->pgrp = get_pid(pgrp); - spin_unlock_irq(&real_tty->ctrl_lock); out_unlock: rcu_read_unlock(); +out_unlock_ctrl: + spin_unlock_irq(&real_tty->ctrl_lock); return retval; } @@ -511,20 +519,30 @@ out_unlock: * * Obtain the session id of the tty. If there is no session * return an error. - * - * Locking: none. Reference to current->signal->tty is safe. */ static int tiocgsid(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p) { + unsigned long flags; + pid_t sid; + /* * (tty == real_tty) is a cheap way of * testing if the tty is NOT a master pty. */ if (tty == real_tty && current->signal->tty != real_tty) return -ENOTTY; + + spin_lock_irqsave(&real_tty->ctrl_lock, flags); if (!real_tty->session) - return -ENOTTY; - return put_user(pid_vnr(real_tty->session), p); + goto err; + sid = pid_vnr(real_tty->session); + spin_unlock_irqrestore(&real_tty->ctrl_lock, flags); + + return put_user(sid, p); + +err: + spin_unlock_irqrestore(&real_tty->ctrl_lock, flags); + return -ENOTTY; } /* diff --git a/include/linux/tty.h b/include/linux/tty.h index a99e9b8e4e31..eb33d948788c 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -306,6 +306,10 @@ struct tty_struct { struct termiox *termiox; /* May be NULL for unsupported */ char name[64]; struct pid *pgrp; /* Protected by ctrl lock */ + /* + * Writes protected by both ctrl lock and legacy mutex, readers must use + * at least one of them. + */ struct pid *session; unsigned long flags; int count; From 3ee16db390b42b8a21f2ad2ea2518f3469c6e532 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Mon, 30 Nov 2020 10:57:43 -0500 Subject: [PATCH 143/296] dm: fix IO splitting Commit 882ec4e609c1 ("dm table: stack 'chunk_sectors' limit to account for target-specific splitting") caused a couple regressions: 1) Using lcm_not_zero() when stacking chunk_sectors was a bug because chunk_sectors must reflect the most limited of all devices in the IO stack. 2) DM targets that set max_io_len but that do _not_ provide an .iterate_devices method no longer had there IO split properly. And commit 5091cdec56fa ("dm: change max_io_len() to use blk_max_size_offset()") also caused a regression where DM no longer supported varied (per target) IO splitting. The implication being the potential for severely reduced performance for IO stacks that use a DM target like dm-cache to hide performance limitations of a slower device (e.g. one that requires 4K IO splitting). Coming full circle: Fix all these issues by discontinuing stacking chunk_sectors up using ti->max_io_len in dm_calculate_queue_limits(), add optional chunk_sectors override argument to blk_max_size_offset() and update DM's max_io_len() to pass ti->max_io_len to its blk_max_size_offset() call. Passing in an optional chunk_sectors override to blk_max_size_offset() allows for code reuse of block's centralized calculation for max IO size based on provided offset and split boundary. Fixes: 882ec4e609c1 ("dm table: stack 'chunk_sectors' limit to account for target-specific splitting") Fixes: 5091cdec56fa ("dm: change max_io_len() to use blk_max_size_offset()") Cc: stable@vger.kernel.org Reported-by: John Dorminy Reported-by: Bruce Johnston Reported-by: Kirill Tkhai Reviewed-by: John Dorminy Signed-off-by: Mike Snitzer Reviewed-by: Jens Axboe --- block/blk-merge.c | 2 +- drivers/md/dm-table.c | 5 ----- drivers/md/dm.c | 19 +++++++++++-------- include/linux/blkdev.h | 11 ++++++----- 4 files changed, 18 insertions(+), 19 deletions(-) diff --git a/block/blk-merge.c b/block/blk-merge.c index bcf5e4580603..97b7c2821565 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -144,7 +144,7 @@ static struct bio *blk_bio_write_same_split(struct request_queue *q, static inline unsigned get_max_io_size(struct request_queue *q, struct bio *bio) { - unsigned sectors = blk_max_size_offset(q, bio->bi_iter.bi_sector); + unsigned sectors = blk_max_size_offset(q, bio->bi_iter.bi_sector, 0); unsigned max_sectors = sectors; unsigned pbs = queue_physical_block_size(q) >> SECTOR_SHIFT; unsigned lbs = queue_logical_block_size(q) >> SECTOR_SHIFT; diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 2073ee8d18f4..7eeb7c4169c9 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include @@ -1449,10 +1448,6 @@ int dm_calculate_queue_limits(struct dm_table *table, zone_sectors = ti_limits.chunk_sectors; } - /* Stack chunk_sectors if target-specific splitting is required */ - if (ti->max_io_len) - ti_limits.chunk_sectors = lcm_not_zero(ti->max_io_len, - ti_limits.chunk_sectors); /* Set I/O hints portion of queue limits */ if (ti->type->io_hints) ti->type->io_hints(ti, &ti_limits); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 98866e725f25..f7eb3d2964f3 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1039,15 +1039,18 @@ static sector_t max_io_len(struct dm_target *ti, sector_t sector) sector_t max_len; /* - * Does the target need to split even further? - * - q->limits.chunk_sectors reflects ti->max_io_len so - * blk_max_size_offset() provides required splitting. - * - blk_max_size_offset() also respects q->limits.max_sectors + * Does the target need to split IO even further? + * - varied (per target) IO splitting is a tenet of DM; this + * explains why stacked chunk_sectors based splitting via + * blk_max_size_offset() isn't possible here. So pass in + * ti->max_io_len to override stacked chunk_sectors. */ - max_len = blk_max_size_offset(ti->table->md->queue, - target_offset); - if (len > max_len) - len = max_len; + if (ti->max_io_len) { + max_len = blk_max_size_offset(ti->table->md->queue, + target_offset, ti->max_io_len); + if (len > max_len) + len = max_len; + } return len; } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 639cae2c158b..24ae504cf77d 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1073,11 +1073,12 @@ static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q, * file system requests. */ static inline unsigned int blk_max_size_offset(struct request_queue *q, - sector_t offset) + sector_t offset, + unsigned int chunk_sectors) { - unsigned int chunk_sectors = q->limits.chunk_sectors; - - if (!chunk_sectors) + if (!chunk_sectors && q->limits.chunk_sectors) + chunk_sectors = q->limits.chunk_sectors; + else return q->limits.max_sectors; if (likely(is_power_of_2(chunk_sectors))) @@ -1101,7 +1102,7 @@ static inline unsigned int blk_rq_get_max_sectors(struct request *rq, req_op(rq) == REQ_OP_SECURE_ERASE) return blk_queue_get_max_sectors(q, req_op(rq)); - return min(blk_max_size_offset(q, offset), + return min(blk_max_size_offset(q, offset, 0), blk_queue_get_max_sectors(q, req_op(rq))); } From f05c4403db5bba881d4964e731f6da35be46aabd Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 4 Dec 2020 15:19:27 -0500 Subject: [PATCH 144/296] dm: fix double RCU unlock in dm_dax_zero_page_range() error path Remove redundant dm_put_live_table() in dm_dax_zero_page_range() error path to fix sparse warning: drivers/md/dm.c:1208:9: warning: context imbalance in 'dm_dax_zero_page_range' - unexpected unlock Fixes: cdf6cdcd3b99a ("dm,dax: Add dax zero_page_range operation") Cc: stable@vger.kernel.org Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index f7eb3d2964f3..45c623ff931a 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1201,11 +1201,9 @@ static int dm_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff, * ->zero_page_range() is mandatory dax operation. If we are * here, something is wrong. */ - dm_put_live_table(md, srcu_idx); goto out; } ret = ti->type->dax_zero_page_range(ti, pgoff, nr_pages); - out: dm_put_live_table(md, srcu_idx); From bde3808bc8c2741ad3d804f84720409aee0c2972 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 4 Dec 2020 15:25:18 -0500 Subject: [PATCH 145/296] dm: remove invalid sparse __acquires and __releases annotations Fixes sparse warnings: drivers/md/dm.c:508:12: warning: context imbalance in 'dm_prepare_ioctl' - wrong count at exit drivers/md/dm.c:543:13: warning: context imbalance in 'dm_unprepare_ioctl' - wrong count at exit Fixes: 971888c46993f ("dm: hold DM table for duration of ioctl rather than use blkdev_get") Cc: stable@vger.kernel.org Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 45c623ff931a..4e0cbfe3f14d 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -509,7 +509,6 @@ out: static int dm_prepare_ioctl(struct mapped_device *md, int *srcu_idx, struct block_device **bdev) - __acquires(md->io_barrier) { struct dm_target *tgt; struct dm_table *map; @@ -543,7 +542,6 @@ retry: } static void dm_unprepare_ioctl(struct mapped_device *md, int srcu_idx) - __releases(md->io_barrier) { dm_put_live_table(md, srcu_idx); } From bcee5278958802b40ee8b26679155a6d9231783e Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 4 Dec 2020 16:36:16 -0500 Subject: [PATCH 146/296] tracing: Fix userstacktrace option for instances When the instances were able to use their own options, the userstacktrace option was left hardcoded for the top level. This made the instance userstacktrace option bascially into a nop, and will confuse users that set it, but nothing happens (I was confused when it happened to me!) Cc: stable@vger.kernel.org Fixes: 16270145ce6b ("tracing: Add trace options for core options to instances") Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 7d53c5bdea3e..06134189e9a7 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -163,7 +163,8 @@ static union trace_eval_map_item *trace_eval_maps; #endif /* CONFIG_TRACE_EVAL_MAP_FILE */ int tracing_set_tracer(struct trace_array *tr, const char *buf); -static void ftrace_trace_userstack(struct trace_buffer *buffer, +static void ftrace_trace_userstack(struct trace_array *tr, + struct trace_buffer *buffer, unsigned long flags, int pc); #define MAX_TRACER_SIZE 100 @@ -2870,7 +2871,7 @@ void trace_buffer_unlock_commit_regs(struct trace_array *tr, * two. They are not that meaningful. */ ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs); - ftrace_trace_userstack(buffer, flags, pc); + ftrace_trace_userstack(tr, buffer, flags, pc); } /* @@ -3056,13 +3057,14 @@ EXPORT_SYMBOL_GPL(trace_dump_stack); static DEFINE_PER_CPU(int, user_stack_count); static void -ftrace_trace_userstack(struct trace_buffer *buffer, unsigned long flags, int pc) +ftrace_trace_userstack(struct trace_array *tr, + struct trace_buffer *buffer, unsigned long flags, int pc) { struct trace_event_call *call = &event_user_stack; struct ring_buffer_event *event; struct userstack_entry *entry; - if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE)) + if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE)) return; /* @@ -3101,7 +3103,8 @@ ftrace_trace_userstack(struct trace_buffer *buffer, unsigned long flags, int pc) preempt_enable(); } #else /* CONFIG_USER_STACKTRACE_SUPPORT */ -static void ftrace_trace_userstack(struct trace_buffer *buffer, +static void ftrace_trace_userstack(struct trace_array *tr, + struct trace_buffer *buffer, unsigned long flags, int pc) { } From 4eef8b1f36f2ff06966b8f7c2143ef0c447877de Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Thu, 3 Dec 2020 19:40:47 +0100 Subject: [PATCH 147/296] net/sched: fq_pie: initialize timer earlier in fq_pie_init() with the following tdc testcase: 83be: (qdisc, fq_pie) Create FQ-PIE with invalid number of flows as fq_pie_init() fails, fq_pie_destroy() is called to clean up. Since the timer is not yet initialized, it's possible to observe a splat like this: INFO: trying to register non-static key. the code is fine but needs lockdep annotation. turning off the locking correctness validator. CPU: 0 PID: 975 Comm: tc Not tainted 5.10.0-rc4+ #298 Hardware name: Red Hat KVM, BIOS 1.11.1-4.module+el8.1.0+4066+0f1aadab 04/01/2014 Call Trace: dump_stack+0x99/0xcb register_lock_class+0x12dd/0x1750 __lock_acquire+0xfe/0x3970 lock_acquire+0x1c8/0x7f0 del_timer_sync+0x49/0xd0 fq_pie_destroy+0x3f/0x80 [sch_fq_pie] qdisc_create+0x916/0x1160 tc_modify_qdisc+0x3c4/0x1630 rtnetlink_rcv_msg+0x346/0x8e0 netlink_unicast+0x439/0x630 netlink_sendmsg+0x719/0xbf0 sock_sendmsg+0xe2/0x110 ____sys_sendmsg+0x5ba/0x890 ___sys_sendmsg+0xe9/0x160 __sys_sendmsg+0xd3/0x170 do_syscall_64+0x33/0x40 entry_SYSCALL_64_after_hwframe+0x44/0xa9 [...] ODEBUG: assert_init not available (active state 0) object type: timer_list hint: 0x0 WARNING: CPU: 0 PID: 975 at lib/debugobjects.c:508 debug_print_object+0x162/0x210 [...] Call Trace: debug_object_assert_init+0x268/0x380 try_to_del_timer_sync+0x6a/0x100 del_timer_sync+0x9e/0xd0 fq_pie_destroy+0x3f/0x80 [sch_fq_pie] qdisc_create+0x916/0x1160 tc_modify_qdisc+0x3c4/0x1630 rtnetlink_rcv_msg+0x346/0x8e0 netlink_rcv_skb+0x120/0x380 netlink_unicast+0x439/0x630 netlink_sendmsg+0x719/0xbf0 sock_sendmsg+0xe2/0x110 ____sys_sendmsg+0x5ba/0x890 ___sys_sendmsg+0xe9/0x160 __sys_sendmsg+0xd3/0x170 do_syscall_64+0x33/0x40 entry_SYSCALL_64_after_hwframe+0x44/0xa9 fix it moving timer_setup() before any failure, like it was done on 'red' with former commit 608b4adab178 ("net_sched: initialize timer earlier in red_init()"). Fixes: ec97ecf1ebe4 ("net: sched: add Flow Queue PIE packet scheduler") Signed-off-by: Davide Caratti Reviewed-by: Cong Wang Link: https://lore.kernel.org/r/2e78e01c504c633ebdff18d041833cf2e079a3a4.1607020450.git.dcaratti@redhat.com Signed-off-by: Jakub Kicinski --- net/sched/sch_fq_pie.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c index 4dda15588cf4..949163fe68af 100644 --- a/net/sched/sch_fq_pie.c +++ b/net/sched/sch_fq_pie.c @@ -401,6 +401,7 @@ static int fq_pie_init(struct Qdisc *sch, struct nlattr *opt, INIT_LIST_HEAD(&q->new_flows); INIT_LIST_HEAD(&q->old_flows); + timer_setup(&q->adapt_timer, fq_pie_timer, 0); if (opt) { err = fq_pie_change(sch, opt, extack); @@ -426,7 +427,6 @@ static int fq_pie_init(struct Qdisc *sch, struct nlattr *opt, pie_vars_init(&flow->vars); } - timer_setup(&q->adapt_timer, fq_pie_timer, 0); mod_timer(&q->adapt_timer, jiffies + HZ / 2); return 0; From 65f33b35722952fa076811d5686bfd8a611a80fa Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 4 Dec 2020 17:21:03 -0500 Subject: [PATCH 148/296] block: fix incorrect branching in blk_max_size_offset() If non-zero 'chunk_sectors' is passed in to blk_max_size_offset() that override will be incorrectly ignored. Old blk_max_size_offset() branching, prior to commit 3ee16db390b4, must be used only if passed 'chunk_sectors' override is zero. Fixes: 3ee16db390b4 ("dm: fix IO splitting") Cc: stable@vger.kernel.org # 5.9 Reported-by: John Dorminy Signed-off-by: Mike Snitzer --- include/linux/blkdev.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 24ae504cf77d..033eb5f73b65 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1076,10 +1076,12 @@ static inline unsigned int blk_max_size_offset(struct request_queue *q, sector_t offset, unsigned int chunk_sectors) { - if (!chunk_sectors && q->limits.chunk_sectors) - chunk_sectors = q->limits.chunk_sectors; - else - return q->limits.max_sectors; + if (!chunk_sectors) { + if (q->limits.chunk_sectors) + chunk_sectors = q->limits.chunk_sectors; + else + return q->limits.max_sectors; + } if (likely(is_power_of_2(chunk_sectors))) chunk_sectors -= offset & (chunk_sectors - 1); From 1130b252480f3c98cf468e78c1c5c516b390a29c Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 3 Dec 2020 15:51:06 -0600 Subject: [PATCH 149/296] net: ipa: pass the correct size when freeing DMA memory When the coherent memory is freed in gsi_trans_pool_exit_dma(), we are mistakenly passing the size of a single element in the pool rather than the actual allocated size. Fix this bug. Fixes: 9dd441e4ed575 ("soc: qcom: ipa: GSI transactions") Reported-by: Stephen Boyd Tested-by: Sujit Kautkar Signed-off-by: Alex Elder Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/r/20201203215106.17450-1-elder@linaro.org Signed-off-by: Jakub Kicinski --- drivers/net/ipa/gsi_trans.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ipa/gsi_trans.c b/drivers/net/ipa/gsi_trans.c index e8599bb948c0..6c3ed5b17b80 100644 --- a/drivers/net/ipa/gsi_trans.c +++ b/drivers/net/ipa/gsi_trans.c @@ -156,6 +156,9 @@ int gsi_trans_pool_init_dma(struct device *dev, struct gsi_trans_pool *pool, /* The allocator will give us a power-of-2 number of pages. But we * can't guarantee that, so request it. That way we won't waste any * memory that would be available beyond the required space. + * + * Note that gsi_trans_pool_exit_dma() assumes the total allocated + * size is exactly (count * size). */ total_size = get_order(total_size) << PAGE_SHIFT; @@ -175,7 +178,9 @@ int gsi_trans_pool_init_dma(struct device *dev, struct gsi_trans_pool *pool, void gsi_trans_pool_exit_dma(struct device *dev, struct gsi_trans_pool *pool) { - dma_free_coherent(dev, pool->size, pool->base, pool->addr); + size_t total_size = pool->count * pool->size; + + dma_free_coherent(dev, total_size, pool->base, pool->addr); memset(pool, 0, sizeof(*pool)); } From 0b32e91fdfd87314af9943e69eb85a88adb4233c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 4 Dec 2020 00:20:37 +0100 Subject: [PATCH 150/296] ethernet: select CONFIG_CRC32 as needed A number of ethernet drivers require crc32 functionality to be avaialable in the kernel, causing a link error otherwise: arm-linux-gnueabi-ld: drivers/net/ethernet/agere/et131x.o: in function `et1310_setup_device_for_multicast': et131x.c:(.text+0x5918): undefined reference to `crc32_le' arm-linux-gnueabi-ld: drivers/net/ethernet/cadence/macb_main.o: in function `macb_start_xmit': macb_main.c:(.text+0x4b88): undefined reference to `crc32_le' arm-linux-gnueabi-ld: drivers/net/ethernet/faraday/ftgmac100.o: in function `ftgmac100_set_rx_mode': ftgmac100.c:(.text+0x2b38): undefined reference to `crc32_le' arm-linux-gnueabi-ld: drivers/net/ethernet/freescale/fec_main.o: in function `set_multicast_list': fec_main.c:(.text+0x6120): undefined reference to `crc32_le' arm-linux-gnueabi-ld: drivers/net/ethernet/freescale/fman/fman_dtsec.o: in function `dtsec_add_hash_mac_address': fman_dtsec.c:(.text+0x830): undefined reference to `crc32_le' arm-linux-gnueabi-ld: drivers/net/ethernet/freescale/fman/fman_dtsec.o:fman_dtsec.c:(.text+0xb68): more undefined references to `crc32_le' follow arm-linux-gnueabi-ld: drivers/net/ethernet/netronome/nfp/nfpcore/nfp_hwinfo.o: in function `nfp_hwinfo_read': nfp_hwinfo.c:(.text+0x250): undefined reference to `crc32_be' arm-linux-gnueabi-ld: nfp_hwinfo.c:(.text+0x288): undefined reference to `crc32_be' arm-linux-gnueabi-ld: drivers/net/ethernet/netronome/nfp/nfpcore/nfp_resource.o: in function `nfp_resource_acquire': nfp_resource.c:(.text+0x144): undefined reference to `crc32_be' arm-linux-gnueabi-ld: nfp_resource.c:(.text+0x158): undefined reference to `crc32_be' arm-linux-gnueabi-ld: drivers/net/ethernet/nxp/lpc_eth.o: in function `lpc_eth_set_multicast_list': lpc_eth.c:(.text+0x1934): undefined reference to `crc32_le' arm-linux-gnueabi-ld: drivers/net/ethernet/rocker/rocker_ofdpa.o: in function `ofdpa_flow_tbl_do': rocker_ofdpa.c:(.text+0x2e08): undefined reference to `crc32_le' arm-linux-gnueabi-ld: drivers/net/ethernet/rocker/rocker_ofdpa.o: in function `ofdpa_flow_tbl_del': rocker_ofdpa.c:(.text+0x3074): undefined reference to `crc32_le' arm-linux-gnueabi-ld: drivers/net/ethernet/rocker/rocker_ofdpa.o: in function `ofdpa_port_fdb': arm-linux-gnueabi-ld: drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.o: in function `mlx5dr_ste_calc_hash_index': dr_ste.c:(.text+0x354): undefined reference to `crc32_le' arm-linux-gnueabi-ld: drivers/net/ethernet/microchip/lan743x_main.o: in function `lan743x_netdev_set_multicast': lan743x_main.c:(.text+0x5dc4): undefined reference to `crc32_le' Add the missing 'select CRC32' entries in Kconfig for each of them. Signed-off-by: Arnd Bergmann Acked-by: Nicolas Ferre Acked-by: Madalin Bucur Acked-by: Mark Einon Acked-by: Simon Horman Link: https://lore.kernel.org/r/20201203232114.1485603-1-arnd@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/agere/Kconfig | 1 + drivers/net/ethernet/cadence/Kconfig | 1 + drivers/net/ethernet/faraday/Kconfig | 1 + drivers/net/ethernet/freescale/Kconfig | 1 + drivers/net/ethernet/freescale/fman/Kconfig | 1 + drivers/net/ethernet/mellanox/mlx5/core/Kconfig | 1 + drivers/net/ethernet/microchip/Kconfig | 1 + drivers/net/ethernet/netronome/Kconfig | 1 + drivers/net/ethernet/nxp/Kconfig | 1 + drivers/net/ethernet/rocker/Kconfig | 1 + 10 files changed, 10 insertions(+) diff --git a/drivers/net/ethernet/agere/Kconfig b/drivers/net/ethernet/agere/Kconfig index d92516ae59cc..9cd750184947 100644 --- a/drivers/net/ethernet/agere/Kconfig +++ b/drivers/net/ethernet/agere/Kconfig @@ -21,6 +21,7 @@ config ET131X tristate "Agere ET-1310 Gigabit Ethernet support" depends on PCI select PHYLIB + select CRC32 help This driver supports Agere ET-1310 ethernet adapters. diff --git a/drivers/net/ethernet/cadence/Kconfig b/drivers/net/ethernet/cadence/Kconfig index 85858163bac5..e432a68ac520 100644 --- a/drivers/net/ethernet/cadence/Kconfig +++ b/drivers/net/ethernet/cadence/Kconfig @@ -23,6 +23,7 @@ config MACB tristate "Cadence MACB/GEM support" depends on HAS_DMA && COMMON_CLK select PHYLINK + select CRC32 help The Cadence MACB ethernet interface is found on many Atmel AT32 and AT91 parts. This driver also supports the Cadence GEM (Gigabit diff --git a/drivers/net/ethernet/faraday/Kconfig b/drivers/net/ethernet/faraday/Kconfig index c2677ec0564d..3d1e9a302148 100644 --- a/drivers/net/ethernet/faraday/Kconfig +++ b/drivers/net/ethernet/faraday/Kconfig @@ -33,6 +33,7 @@ config FTGMAC100 depends on !64BIT || BROKEN select PHYLIB select MDIO_ASPEED if MACH_ASPEED_G6 + select CRC32 help This driver supports the FTGMAC100 Gigabit Ethernet controller from Faraday. It is used on Faraday A369, Andes AG102 and some diff --git a/drivers/net/ethernet/freescale/Kconfig b/drivers/net/ethernet/freescale/Kconfig index a1d53ddf1593..3f9175bdce77 100644 --- a/drivers/net/ethernet/freescale/Kconfig +++ b/drivers/net/ethernet/freescale/Kconfig @@ -25,6 +25,7 @@ config FEC depends on (M523x || M527x || M5272 || M528x || M520x || M532x || \ ARCH_MXC || SOC_IMX28 || COMPILE_TEST) default ARCH_MXC || SOC_IMX28 if ARM + select CRC32 select PHYLIB imply PTP_1588_CLOCK help diff --git a/drivers/net/ethernet/freescale/fman/Kconfig b/drivers/net/ethernet/freescale/fman/Kconfig index 34150182cc35..48bf8088795d 100644 --- a/drivers/net/ethernet/freescale/fman/Kconfig +++ b/drivers/net/ethernet/freescale/fman/Kconfig @@ -4,6 +4,7 @@ config FSL_FMAN depends on FSL_SOC || ARCH_LAYERSCAPE || COMPILE_TEST select GENERIC_ALLOCATOR select PHYLIB + select CRC32 default n help Freescale Data-Path Acceleration Architecture Frame Manager diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index 99f1ec3b2575..3e371d24c462 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -198,6 +198,7 @@ config MLX5_EN_TLS config MLX5_SW_STEERING bool "Mellanox Technologies software-managed steering" depends on MLX5_CORE_EN && MLX5_ESWITCH + select CRC32 default y help Build support for software-managed steering in the NIC. diff --git a/drivers/net/ethernet/microchip/Kconfig b/drivers/net/ethernet/microchip/Kconfig index 31f9a82dc113..d0f6dfe0dcf3 100644 --- a/drivers/net/ethernet/microchip/Kconfig +++ b/drivers/net/ethernet/microchip/Kconfig @@ -47,6 +47,7 @@ config LAN743X depends on PCI select PHYLIB select CRC16 + select CRC32 help Support for the Microchip LAN743x PCI Express Gigabit Ethernet chip diff --git a/drivers/net/ethernet/netronome/Kconfig b/drivers/net/ethernet/netronome/Kconfig index d8b99d6a0356..b82758d5beed 100644 --- a/drivers/net/ethernet/netronome/Kconfig +++ b/drivers/net/ethernet/netronome/Kconfig @@ -22,6 +22,7 @@ config NFP depends on VXLAN || VXLAN=n depends on TLS && TLS_DEVICE || TLS_DEVICE=n select NET_DEVLINK + select CRC32 help This driver supports the Netronome(R) NFP4000/NFP6000 based cards working as a advanced Ethernet NIC. It works with both diff --git a/drivers/net/ethernet/nxp/Kconfig b/drivers/net/ethernet/nxp/Kconfig index ee83a71c2509..c84997db828c 100644 --- a/drivers/net/ethernet/nxp/Kconfig +++ b/drivers/net/ethernet/nxp/Kconfig @@ -3,6 +3,7 @@ config LPC_ENET tristate "NXP ethernet MAC on LPC devices" depends on ARCH_LPC32XX || COMPILE_TEST select PHYLIB + select CRC32 help Say Y or M here if you want to use the NXP ethernet MAC included on some NXP LPC devices. You can safely enable this option for LPC32xx diff --git a/drivers/net/ethernet/rocker/Kconfig b/drivers/net/ethernet/rocker/Kconfig index 99e1290e0307..2318811ff75a 100644 --- a/drivers/net/ethernet/rocker/Kconfig +++ b/drivers/net/ethernet/rocker/Kconfig @@ -19,6 +19,7 @@ if NET_VENDOR_ROCKER config ROCKER tristate "Rocker switch driver (EXPERIMENTAL)" depends on PCI && NET_SWITCHDEV && BRIDGE + select CRC32 help This driver supports Rocker switch device. From b410f04eb5b482b5efc4eee90de81ad35d3d923b Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Fri, 4 Dec 2020 16:48:14 +0800 Subject: [PATCH 151/296] ipv4: fix error return code in rtm_to_fib_config() Fix to return a negative error code from the error handling case instead of 0, as done elsewhere in this function. Fixes: d15662682db2 ("ipv4: Allow ipv6 gateway with ipv4 routes") Reported-by: Hulk Robot Signed-off-by: Zhang Changzhong Reviewed-by: David Ahern Link: https://lore.kernel.org/r/1607071695-33740-1-git-send-email-zhangchangzhong@huawei.com Signed-off-by: Jakub Kicinski --- net/ipv4/fib_frontend.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index b87140a1fa28..cdf6ec5aa45d 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -825,7 +825,7 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, if (has_gw && has_via) { NL_SET_ERR_MSG(extack, "Nexthop configuration can not contain both GATEWAY and VIA"); - goto errout; + return -EINVAL; } return 0; From ee4f52a8de2c6f78b01f10b4c330867d88c1653a Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Fri, 4 Dec 2020 16:48:56 +0800 Subject: [PATCH 152/296] net: bridge: vlan: fix error return code in __vlan_add() Fix to return a negative error code from the error handling case instead of 0, as done elsewhere in this function. Fixes: f8ed289fab84 ("bridge: vlan: use br_vlan_(get|put)_master to deal with refcounts") Reported-by: Hulk Robot Signed-off-by: Zhang Changzhong Acked-by: Nikolay Aleksandrov Link: https://lore.kernel.org/r/1607071737-33875-1-git-send-email-zhangchangzhong@huawei.com Signed-off-by: Jakub Kicinski --- net/bridge/br_vlan.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 3e493eb85bb2..08c77418c687 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -266,8 +266,10 @@ static int __vlan_add(struct net_bridge_vlan *v, u16 flags, } masterv = br_vlan_get_master(br, v->vid, extack); - if (!masterv) + if (!masterv) { + err = -ENOMEM; goto out_filt; + } v->brvlan = masterv; if (br_opt_get(br, BROPT_VLAN_STATS_PER_PORT)) { v->stats = netdev_alloc_pcpu_stats(struct br_vlan_stats); From bb2da7651a47dc042cb7fc9c40cd77092b6b4445 Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Fri, 4 Dec 2020 19:43:14 +0800 Subject: [PATCH 153/296] openvswitch: fix error return code in validate_and_copy_dec_ttl() Fix to return a negative error code from the error handling case instead of 0, as done elsewhere in this function. Changing 'return start' to 'return action_start' can fix this bug. Fixes: 69929d4c49e1 ("net: openvswitch: fix TTL decrement action netlink message format") Reported-by: Hulk Robot Signed-off-by: Wang Hai Reviewed-by: Eelco Chaudron Link: https://lore.kernel.org/r/20201204114314.1596-1-wanghai38@huawei.com Signed-off-by: Jakub Kicinski --- net/openvswitch/flow_netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index ec0689ddc635..4c5c2331e764 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -2531,7 +2531,7 @@ static int validate_and_copy_dec_ttl(struct net *net, action_start = add_nested_action_start(sfa, OVS_DEC_TTL_ATTR_ACTION, log); if (action_start < 0) - return start; + return action_start; err = __ovs_nla_copy_actions(net, actions, key, sfa, eth_type, vlan_tci, mpls_label_count, log); From ed9b25d1970a4787ac6a39c2091e63b127ecbfc1 Mon Sep 17 00:00:00 2001 From: Serge Hallyn Date: Sun, 15 Nov 2020 21:55:31 -0600 Subject: [PATCH 154/296] [SECURITY] fix namespaced fscaps when !CONFIG_SECURITY MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Namespaced file capabilities were introduced in 8db6c34f1dbc . When userspace reads an xattr for a namespaced capability, a virtualized representation of it is returned if the caller is in a user namespace owned by the capability's owning rootid. The function which performs this virtualization was not hooked up if CONFIG_SECURITY=n. Therefore in that case the original xattr was shown instead of the virtualized one. To test this using libcap-bin (*1), $ v=$(mktemp) $ unshare -Ur setcap cap_sys_admin-eip $v $ unshare -Ur setcap -v cap_sys_admin-eip $v /tmp/tmp.lSiIFRvt8Y: OK "setcap -v" verifies the values instead of setting them, and will check whether the rootid value is set. Therefore, with this bug un-fixed, and with CONFIG_SECURITY=n, setcap -v will fail: $ v=$(mktemp) $ unshare -Ur setcap cap_sys_admin=eip $v $ unshare -Ur setcap -v cap_sys_admin=eip $v nsowner[got=1000, want=0],/tmp/tmp.HHDiOOl9fY differs in [] Fix this bug by calling cap_inode_getsecurity() in security_inode_getsecurity() instead of returning -EOPNOTSUPP, when CONFIG_SECURITY=n. *1 - note, if libcap is too old for getcap to have the '-n' option, then use verify-caps instead. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=209689 Cc: Hervé Guillemet Acked-by: Casey Schaufler Signed-off-by: Serge Hallyn Signed-off-by: Andrew G. Morgan Signed-off-by: James Morris --- include/linux/security.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/security.h b/include/linux/security.h index 0a0a03b36a3b..2befc0a25eb3 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -864,7 +864,7 @@ static inline int security_inode_killpriv(struct dentry *dentry) static inline int security_inode_getsecurity(struct inode *inode, const char *name, void **buffer, bool alloc) { - return -EOPNOTSUPP; + return cap_inode_getsecurity(inode, name, buffer, alloc); } static inline int security_inode_setsecurity(struct inode *inode, const char *name, const void *value, size_t size, int flags) From 905b2032fa424f253d9126271439cc1db2b01130 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 4 Dec 2020 08:24:28 -0800 Subject: [PATCH 155/296] mac80211: mesh: fix mesh_pathtbl_init() error path If tbl_mpp can not be allocated, we call mesh_table_free(tbl_path) while tbl_path rhashtable has not yet been initialized, which causes panics. Simply factorize the rhashtable_init() call into mesh_table_alloc() WARNING: CPU: 1 PID: 8474 at kernel/workqueue.c:3040 __flush_work kernel/workqueue.c:3040 [inline] WARNING: CPU: 1 PID: 8474 at kernel/workqueue.c:3040 __cancel_work_timer+0x514/0x540 kernel/workqueue.c:3136 Modules linked in: CPU: 1 PID: 8474 Comm: syz-executor663 Not tainted 5.10.0-rc6-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:__flush_work kernel/workqueue.c:3040 [inline] RIP: 0010:__cancel_work_timer+0x514/0x540 kernel/workqueue.c:3136 Code: 5d c3 e8 bf ae 29 00 0f 0b e9 f0 fd ff ff e8 b3 ae 29 00 0f 0b 43 80 3c 3e 00 0f 85 31 ff ff ff e9 34 ff ff ff e8 9c ae 29 00 <0f> 0b e9 dc fe ff ff 89 e9 80 e1 07 80 c1 03 38 c1 0f 8c 7d fd ff RSP: 0018:ffffc9000165f5a0 EFLAGS: 00010293 RAX: ffffffff814b7064 RBX: 0000000000000001 RCX: ffff888021c80000 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: ffff888024039ca0 R08: dffffc0000000000 R09: fffffbfff1dd3e64 R10: fffffbfff1dd3e64 R11: 0000000000000000 R12: 1ffff920002cbebd R13: ffff888024039c88 R14: 1ffff11004807391 R15: dffffc0000000000 FS: 0000000001347880(0000) GS:ffff8880b9d00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020000140 CR3: 000000002cc0a000 CR4: 00000000001506e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: rhashtable_free_and_destroy+0x25/0x9c0 lib/rhashtable.c:1137 mesh_table_free net/mac80211/mesh_pathtbl.c:69 [inline] mesh_pathtbl_init+0x287/0x2e0 net/mac80211/mesh_pathtbl.c:785 ieee80211_mesh_init_sdata+0x2ee/0x530 net/mac80211/mesh.c:1591 ieee80211_setup_sdata+0x733/0xc40 net/mac80211/iface.c:1569 ieee80211_if_add+0xd5c/0x1cd0 net/mac80211/iface.c:1987 ieee80211_add_iface+0x59/0x130 net/mac80211/cfg.c:125 rdev_add_virtual_intf net/wireless/rdev-ops.h:45 [inline] nl80211_new_interface+0x563/0xb40 net/wireless/nl80211.c:3855 genl_family_rcv_msg_doit net/netlink/genetlink.c:739 [inline] genl_family_rcv_msg net/netlink/genetlink.c:783 [inline] genl_rcv_msg+0xe4e/0x1280 net/netlink/genetlink.c:800 netlink_rcv_skb+0x190/0x3a0 net/netlink/af_netlink.c:2494 genl_rcv+0x24/0x40 net/netlink/genetlink.c:811 netlink_unicast_kernel net/netlink/af_netlink.c:1304 [inline] netlink_unicast+0x780/0x930 net/netlink/af_netlink.c:1330 netlink_sendmsg+0x9a8/0xd40 net/netlink/af_netlink.c:1919 sock_sendmsg_nosec net/socket.c:651 [inline] sock_sendmsg net/socket.c:671 [inline] ____sys_sendmsg+0x519/0x800 net/socket.c:2353 ___sys_sendmsg net/socket.c:2407 [inline] __sys_sendmsg+0x2b1/0x360 net/socket.c:2440 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: 60854fd94573 ("mac80211: mesh: convert path table to rhashtable") Signed-off-by: Eric Dumazet Reported-by: syzbot Reviewed-by: Johannes Berg Link: https://lore.kernel.org/r/20201204162428.2583119-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- net/mac80211/mesh_pathtbl.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 48f31ac9233c..620ecf922408 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -60,6 +60,7 @@ static struct mesh_table *mesh_table_alloc(void) atomic_set(&newtbl->entries, 0); spin_lock_init(&newtbl->gates_lock); spin_lock_init(&newtbl->walk_lock); + rhashtable_init(&newtbl->rhead, &mesh_rht_params); return newtbl; } @@ -773,9 +774,6 @@ int mesh_pathtbl_init(struct ieee80211_sub_if_data *sdata) goto free_path; } - rhashtable_init(&tbl_path->rhead, &mesh_rht_params); - rhashtable_init(&tbl_mpp->rhead, &mesh_rht_params); - sdata->u.mesh.mesh_paths = tbl_path; sdata->u.mesh.mpp_paths = tbl_mpp; From c62dac0a35bb6d351cd568e97090c5fd3e7aa532 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 3 Dec 2020 23:32:50 +0100 Subject: [PATCH 156/296] i2c: mlxbf: select CONFIG_I2C_SLAVE If this is not enabled, the interfaces used in this driver do not work: drivers/i2c/busses/i2c-mlxbf.c:1888:3: error: implicit declaration of function 'i2c_slave_event' [-Werror,-Wimplicit-function-declaration] i2c_slave_event(slave, I2C_SLAVE_WRITE_REQUESTED, &value); ^ drivers/i2c/busses/i2c-mlxbf.c:1888:26: error: use of undeclared identifier 'I2C_SLAVE_WRITE_REQUESTED' i2c_slave_event(slave, I2C_SLAVE_WRITE_REQUESTED, &value); ^ drivers/i2c/busses/i2c-mlxbf.c:1890:32: error: use of undeclared identifier 'I2C_SLAVE_WRITE_RECEIVED' ret = i2c_slave_event(slave, I2C_SLAVE_WRITE_RECEIVED, ^ drivers/i2c/busses/i2c-mlxbf.c:1892:26: error: use of undeclared identifier 'I2C_SLAVE_STOP' i2c_slave_event(slave, I2C_SLAVE_STOP, &value); ^ Fixes: b5b5b32081cd ("i2c: mlxbf: I2C SMBus driver for Mellanox BlueField SoC") Signed-off-by: Arnd Bergmann Acked-by: Khalil Blaiech Signed-off-by: Wolfram Sang --- drivers/i2c/busses/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index a97a9d058198..a49e0ed4a599 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -734,6 +734,7 @@ config I2C_LPC2K config I2C_MLXBF tristate "Mellanox BlueField I2C controller" depends on MELLANOX_PLATFORM && ARM64 + select I2C_SLAVE help Enabling this option will add I2C SMBus support for Mellanox BlueField system. From 2bf9545626f8d09f552ab86d0047a415fe9a07a0 Mon Sep 17 00:00:00 2001 From: Wang Xiaojun Date: Wed, 2 Dec 2020 20:46:47 -0500 Subject: [PATCH 157/296] i2c: mlxbf: Fix the return check of devm_ioremap and ioremap devm_ioremap and ioremap may return NULL which cannot be checked by IS_ERR. Signed-off-by: Wang Xiaojun Reported-by: Hulk Robot Acked-by: Khalil Blaiech Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-mlxbf.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/i2c/busses/i2c-mlxbf.c b/drivers/i2c/busses/i2c-mlxbf.c index 33574d40ea9c..2fb0532d8a16 100644 --- a/drivers/i2c/busses/i2c-mlxbf.c +++ b/drivers/i2c/busses/i2c-mlxbf.c @@ -1258,9 +1258,9 @@ static int mlxbf_i2c_get_gpio(struct platform_device *pdev, return -EFAULT; gpio_res->io = devm_ioremap(dev, params->start, size); - if (IS_ERR(gpio_res->io)) { + if (!gpio_res->io) { devm_release_mem_region(dev, params->start, size); - return PTR_ERR(gpio_res->io); + return -ENOMEM; } return 0; @@ -1323,9 +1323,9 @@ static int mlxbf_i2c_get_corepll(struct platform_device *pdev, return -EFAULT; corepll_res->io = devm_ioremap(dev, params->start, size); - if (IS_ERR(corepll_res->io)) { + if (!corepll_res->io) { devm_release_mem_region(dev, params->start, size); - return PTR_ERR(corepll_res->io); + return -ENOMEM; } return 0; @@ -1717,9 +1717,9 @@ static int mlxbf_i2c_init_coalesce(struct platform_device *pdev, return -EFAULT; coalesce_res->io = ioremap(params->start, size); - if (IS_ERR(coalesce_res->io)) { + if (!coalesce_res->io) { release_mem_region(params->start, size); - return PTR_ERR(coalesce_res->io); + return -ENOMEM; } priv->coalesce = coalesce_res; From a54ba3465d86fa5dd7d41bb88c0b5e71fb3b627e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 3 Dec 2020 23:26:16 +0100 Subject: [PATCH 158/296] ch_ktls: fix build warning for ipv4-only config When CONFIG_IPV6 is disabled, clang complains that a variable is uninitialized for non-IPv4 data: drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c:1046:6: error: variable 'cntrl1' is used uninitialized whenever 'if' condition is false [-Werror,-Wsometimes-uninitialized] if (tx_info->ip_family == AF_INET) { ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c:1059:2: note: uninitialized use occurs here cntrl1 |= T6_TXPKT_ETHHDR_LEN_V(maclen - ETH_HLEN) | ^~~~~~ Replace the preprocessor conditional with the corresponding C version, and make the ipv4 case unconditional in this configuration to improve readability and avoid the warning. Fixes: 86716b51d14f ("ch_ktls: Update cheksum information") Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20201203222641.964234-1-arnd@kernel.org Signed-off-by: Jakub Kicinski --- .../net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c index 7f90b828d159..1b7e8c91b541 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c @@ -987,9 +987,7 @@ chcr_ktls_write_tcp_options(struct chcr_ktls_info *tx_info, struct sk_buff *skb, struct fw_eth_tx_pkt_wr *wr; struct cpl_tx_pkt_core *cpl; u32 ctrl, iplen, maclen; -#if IS_ENABLED(CONFIG_IPV6) struct ipv6hdr *ip6; -#endif unsigned int ndesc; struct tcphdr *tcp; int len16, pktlen; @@ -1043,17 +1041,15 @@ chcr_ktls_write_tcp_options(struct chcr_ktls_info *tx_info, struct sk_buff *skb, cpl->len = htons(pktlen); memcpy(buf, skb->data, pktlen); - if (tx_info->ip_family == AF_INET) { + if (!IS_ENABLED(CONFIG_IPV6) || tx_info->ip_family == AF_INET) { /* we need to correct ip header len */ ip = (struct iphdr *)(buf + maclen); ip->tot_len = htons(pktlen - maclen); cntrl1 = TXPKT_CSUM_TYPE_V(TX_CSUM_TCPIP); -#if IS_ENABLED(CONFIG_IPV6) } else { ip6 = (struct ipv6hdr *)(buf + maclen); ip6->payload_len = htons(pktlen - maclen - iplen); cntrl1 = TXPKT_CSUM_TYPE_V(TX_CSUM_TCPIP6); -#endif } cntrl1 |= T6_TXPKT_ETHHDR_LEN_V(maclen - ETH_HLEN) | From 4d1be581ec6b92a338bb7ed23e1381f45ddf336f Mon Sep 17 00:00:00 2001 From: Zhang Qilong Date: Fri, 4 Dec 2020 14:35:06 +0100 Subject: [PATCH 159/296] can: softing: softing_netdev_open(): fix error handling If softing_netdev_open() fails, we should call close_candev() to avoid reference leak. Fixes: 03fd3cf5a179d ("can: add driver for Softing card") Signed-off-by: Zhang Qilong Acked-by: Kurt Van Dijck Link: https://lore.kernel.org/r/20201202151632.1343786-1-zhangqilong3@huawei.com Signed-off-by: Marc Kleine-Budde Link: https://lore.kernel.org/r/20201204133508.742120-2-mkl@pengutronix.de Signed-off-by: Jakub Kicinski --- drivers/net/can/softing/softing_main.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/softing/softing_main.c b/drivers/net/can/softing/softing_main.c index 9d2faaa39ce4..c9ca8b9fceb9 100644 --- a/drivers/net/can/softing/softing_main.c +++ b/drivers/net/can/softing/softing_main.c @@ -382,8 +382,13 @@ static int softing_netdev_open(struct net_device *ndev) /* check or determine and set bittime */ ret = open_candev(ndev); - if (!ret) - ret = softing_startstop(ndev, 1); + if (ret) + return ret; + + ret = softing_startstop(ndev, 1); + if (ret < 0) + close_candev(ndev); + return ret; } From 205704c618af0ab2366015d2281a3b0814d918a0 Mon Sep 17 00:00:00 2001 From: Stephen Suryaputra Date: Thu, 3 Dec 2020 22:06:04 -0500 Subject: [PATCH 160/296] vrf: packets with lladdr src needs dst at input with orig_iif when needs strict Depending on the order of the routes to fe80::/64 are installed on the VRF table, the NS for the source link-local address of the originator might be sent to the wrong interface. This patch ensures that packets with link-local addr source is doing a lookup with the orig_iif when the destination addr indicates that it is strict. Add the reproducer as a use case in self test script fcnal-test.sh. Fixes: b4869aa2f881 ("net: vrf: ipv6 support for local traffic to local addresses") Signed-off-by: Stephen Suryaputra Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20201204030604.18828-1-ssuryaextr@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/vrf.c | 10 ++- tools/testing/selftests/net/fcnal-test.sh | 95 +++++++++++++++++++++++ 2 files changed, 103 insertions(+), 2 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index f2793ffde191..b9b7e00b72a8 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -1315,11 +1315,17 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev, int orig_iif = skb->skb_iif; bool need_strict = rt6_need_strict(&ipv6_hdr(skb)->daddr); bool is_ndisc = ipv6_ndisc_frame(skb); + bool is_ll_src; /* loopback, multicast & non-ND link-local traffic; do not push through - * packet taps again. Reset pkt_type for upper layers to process skb + * packet taps again. Reset pkt_type for upper layers to process skb. + * for packets with lladdr src, however, skip so that the dst can be + * determine at input using original ifindex in the case that daddr + * needs strict */ - if (skb->pkt_type == PACKET_LOOPBACK || (need_strict && !is_ndisc)) { + is_ll_src = ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL; + if (skb->pkt_type == PACKET_LOOPBACK || + (need_strict && !is_ndisc && !is_ll_src)) { skb->dev = vrf_dev; skb->skb_iif = vrf_dev->ifindex; IP6CB(skb)->flags |= IP6SKB_L3SLAVE; diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index fb5c55dd6df8..02b0b9ead40b 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -256,6 +256,28 @@ setup_cmd_nsb() fi } +setup_cmd_nsc() +{ + local cmd="$*" + local rc + + run_cmd_nsc ${cmd} + rc=$? + if [ $rc -ne 0 ]; then + # show user the command if not done so already + if [ "$VERBOSE" = "0" ]; then + echo "setup command: $cmd" + fi + echo "failed. stopping tests" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue" + read a + fi + exit $rc + fi +} + # set sysctl values in NS-A set_sysctl() { @@ -471,6 +493,36 @@ setup() sleep 1 } +setup_lla_only() +{ + # make sure we are starting with a clean slate + kill_procs + cleanup 2>/dev/null + + log_debug "Configuring network namespaces" + set -e + + create_ns ${NSA} "-" "-" + create_ns ${NSB} "-" "-" + create_ns ${NSC} "-" "-" + connect_ns ${NSA} ${NSA_DEV} "-" "-" \ + ${NSB} ${NSB_DEV} "-" "-" + connect_ns ${NSA} ${NSA_DEV2} "-" "-" \ + ${NSC} ${NSC_DEV} "-" "-" + + NSA_LINKIP6=$(get_linklocal ${NSA} ${NSA_DEV}) + NSB_LINKIP6=$(get_linklocal ${NSB} ${NSB_DEV}) + NSC_LINKIP6=$(get_linklocal ${NSC} ${NSC_DEV}) + + create_vrf ${NSA} ${VRF} ${VRF_TABLE} "-" "-" + ip -netns ${NSA} link set dev ${NSA_DEV} vrf ${VRF} + ip -netns ${NSA} link set dev ${NSA_DEV2} vrf ${VRF} + + set +e + + sleep 1 +} + ################################################################################ # IPv4 @@ -3787,10 +3839,53 @@ use_case_br() setup_cmd_nsb ip li del vlan100 2>/dev/null } +# VRF only. +# ns-A device is connected to both ns-B and ns-C on a single VRF but only has +# LLA on the interfaces +use_case_ping_lla_multi() +{ + setup_lla_only + # only want reply from ns-A + setup_cmd_nsb sysctl -qw net.ipv6.icmp.echo_ignore_multicast=1 + setup_cmd_nsc sysctl -qw net.ipv6.icmp.echo_ignore_multicast=1 + + log_start + run_cmd_nsb ping -c1 -w1 ${MCAST}%${NSB_DEV} + log_test_addr ${MCAST}%${NSB_DEV} $? 0 "Pre cycle, ping out ns-B" + + run_cmd_nsc ping -c1 -w1 ${MCAST}%${NSC_DEV} + log_test_addr ${MCAST}%${NSC_DEV} $? 0 "Pre cycle, ping out ns-C" + + # cycle/flap the first ns-A interface + setup_cmd ip link set ${NSA_DEV} down + setup_cmd ip link set ${NSA_DEV} up + sleep 1 + + log_start + run_cmd_nsb ping -c1 -w1 ${MCAST}%${NSB_DEV} + log_test_addr ${MCAST}%${NSB_DEV} $? 0 "Post cycle ${NSA} ${NSA_DEV}, ping out ns-B" + run_cmd_nsc ping -c1 -w1 ${MCAST}%${NSC_DEV} + log_test_addr ${MCAST}%${NSC_DEV} $? 0 "Post cycle ${NSA} ${NSA_DEV}, ping out ns-C" + + # cycle/flap the second ns-A interface + setup_cmd ip link set ${NSA_DEV2} down + setup_cmd ip link set ${NSA_DEV2} up + sleep 1 + + log_start + run_cmd_nsb ping -c1 -w1 ${MCAST}%${NSB_DEV} + log_test_addr ${MCAST}%${NSB_DEV} $? 0 "Post cycle ${NSA} ${NSA_DEV2}, ping out ns-B" + run_cmd_nsc ping -c1 -w1 ${MCAST}%${NSC_DEV} + log_test_addr ${MCAST}%${NSC_DEV} $? 0 "Post cycle ${NSA} ${NSA_DEV2}, ping out ns-C" +} + use_cases() { log_section "Use cases" + log_subsection "Device enslaved to bridge" use_case_br + log_subsection "Ping LLA with multiple interfaces" + use_case_ping_lla_multi } ################################################################################ From 4de377b659035309ba48638d70f3150d5c67611f Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Fri, 4 Dec 2020 16:49:42 +0800 Subject: [PATCH 161/296] net: marvell: prestera: Fix error return code in prestera_port_create() Fix to return a negative error code from the error handling case instead of 0, as done elsewhere in this function. Fixes: 501ef3066c89 ("net: marvell: prestera: Add driver for Prestera family ASIC devices") Reported-by: Hulk Robot Signed-off-by: Zhang Changzhong Link: https://lore.kernel.org/r/1607071782-34006-1-git-send-email-zhangchangzhong@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/prestera/prestera_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/prestera/prestera_main.c b/drivers/net/ethernet/marvell/prestera/prestera_main.c index 0f20e0788cce..da4b286d1337 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_main.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_main.c @@ -318,8 +318,10 @@ static int prestera_port_create(struct prestera_switch *sw, u32 id) goto err_port_init; } - if (port->fp_id >= PRESTERA_MAC_ADDR_NUM_MAX) + if (port->fp_id >= PRESTERA_MAC_ADDR_NUM_MAX) { + err = -EINVAL; goto err_port_init; + } /* firmware requires that port's MAC address consist of the first * 5 bytes of the base MAC address From edd2410b165e2ef00b2264ae362edf7441ca929c Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 4 Dec 2020 19:54:16 +0200 Subject: [PATCH 162/296] net: mscc: ocelot: fix dropping of unknown IPv4 multicast on Seville The current assumption is that the felix DSA driver has flooding knobs per traffic class, while ocelot switchdev has a single flooding knob. This was correct for felix VSC9959 and ocelot VSC7514, but with the introduction of seville VSC9953, we see a switch driven by felix.c which has a single flooding knob. So it is clear that we must do what should have been done from the beginning, which is not to overwrite the configuration done by ocelot.c in felix, but instead to teach the common ocelot library about the differences in our switches, and set up the flooding PGIDs centrally. The effect that the bogus iteration through FELIX_NUM_TC has upon seville is quite dramatic. ANA_FLOODING is located at 0x00b548, and ANA_FLOODING_IPMC is located at 0x00b54c. So the bogus iteration will actually overwrite ANA_FLOODING_IPMC when attempting to write ANA_FLOODING[1]. There is no ANA_FLOODING[1] in sevile, just ANA_FLOODING. And when ANA_FLOODING_IPMC is overwritten with a bogus value, the effect is that ANA_FLOODING_IPMC gets the value of 0x0003CF7D: MC6_DATA = 61, MC6_CTRL = 61, MC4_DATA = 60, MC4_CTRL = 0. Because MC4_CTRL is zero, this means that IPv4 multicast control packets are not flooded, but dropped. An invalid configuration, and this is how the issue was actually spotted. Reported-by: Eldar Gasanov Reported-by: Maxim Kochetkov Tested-by: Eldar Gasanov Fixes: 84705fc16552 ("net: dsa: felix: introduce support for Seville VSC9953 switch") Fixes: 3c7b51bd39b2 ("net: dsa: felix: allow flooding for all traffic classes") Signed-off-by: Vladimir Oltean Reviewed-by: Alexandre Belloni Link: https://lore.kernel.org/r/20201204175416.1445937-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/ocelot/felix.c | 7 ------- drivers/net/dsa/ocelot/felix_vsc9959.c | 1 + drivers/net/dsa/ocelot/seville_vsc9953.c | 1 + drivers/net/ethernet/mscc/ocelot.c | 9 +++++---- drivers/net/ethernet/mscc/ocelot_vsc7514.c | 1 + include/soc/mscc/ocelot.h | 3 +++ 6 files changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index f791860d495f..c444ef3da3e2 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -569,7 +569,6 @@ static int felix_setup(struct dsa_switch *ds) struct ocelot *ocelot = ds->priv; struct felix *felix = ocelot_to_felix(ocelot); int port, err; - int tc; err = felix_init_structs(felix, ds->num_ports); if (err) @@ -608,12 +607,6 @@ static int felix_setup(struct dsa_switch *ds) ocelot_write_rix(ocelot, ANA_PGID_PGID_PGID(GENMASK(ocelot->num_phys_ports, 0)), ANA_PGID_PGID, PGID_UC); - /* Setup the per-traffic class flooding PGIDs */ - for (tc = 0; tc < FELIX_NUM_TC; tc++) - ocelot_write_rix(ocelot, ANA_FLOODING_FLD_MULTICAST(PGID_MC) | - ANA_FLOODING_FLD_BROADCAST(PGID_MC) | - ANA_FLOODING_FLD_UNICAST(PGID_UC), - ANA_FLOODING, tc); ds->mtu_enforcement_ingress = true; ds->configure_vlan_while_not_filtering = true; diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c index 3e925b8d5306..2e5bbdca5ea4 100644 --- a/drivers/net/dsa/ocelot/felix_vsc9959.c +++ b/drivers/net/dsa/ocelot/felix_vsc9959.c @@ -1429,6 +1429,7 @@ static int felix_pci_probe(struct pci_dev *pdev, pci_set_drvdata(pdev, felix); ocelot = &felix->ocelot; ocelot->dev = &pdev->dev; + ocelot->num_flooding_pgids = FELIX_NUM_TC; felix->info = &felix_info_vsc9959; felix->switch_base = pci_resource_start(pdev, felix->info->switch_pci_bar); diff --git a/drivers/net/dsa/ocelot/seville_vsc9953.c b/drivers/net/dsa/ocelot/seville_vsc9953.c index 1d420c4a2f0f..ebbaf6817ec8 100644 --- a/drivers/net/dsa/ocelot/seville_vsc9953.c +++ b/drivers/net/dsa/ocelot/seville_vsc9953.c @@ -1210,6 +1210,7 @@ static int seville_probe(struct platform_device *pdev) ocelot = &felix->ocelot; ocelot->dev = &pdev->dev; + ocelot->num_flooding_pgids = 1; felix->info = &seville_info_vsc9953; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 70bf8c67d7ef..a53bd36b11c6 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -1489,10 +1489,11 @@ int ocelot_init(struct ocelot *ocelot) SYS_FRM_AGING_MAX_AGE(307692), SYS_FRM_AGING); /* Setup flooding PGIDs */ - ocelot_write_rix(ocelot, ANA_FLOODING_FLD_MULTICAST(PGID_MC) | - ANA_FLOODING_FLD_BROADCAST(PGID_MC) | - ANA_FLOODING_FLD_UNICAST(PGID_UC), - ANA_FLOODING, 0); + for (i = 0; i < ocelot->num_flooding_pgids; i++) + ocelot_write_rix(ocelot, ANA_FLOODING_FLD_MULTICAST(PGID_MC) | + ANA_FLOODING_FLD_BROADCAST(PGID_MC) | + ANA_FLOODING_FLD_UNICAST(PGID_UC), + ANA_FLOODING, i); ocelot_write(ocelot, ANA_FLOODING_IPMC_FLD_MC6_DATA(PGID_MCIPV6) | ANA_FLOODING_IPMC_FLD_MC6_CTRL(PGID_MC) | ANA_FLOODING_IPMC_FLD_MC4_DATA(PGID_MCIPV4) | diff --git a/drivers/net/ethernet/mscc/ocelot_vsc7514.c b/drivers/net/ethernet/mscc/ocelot_vsc7514.c index dc00772950e5..1e7729421a82 100644 --- a/drivers/net/ethernet/mscc/ocelot_vsc7514.c +++ b/drivers/net/ethernet/mscc/ocelot_vsc7514.c @@ -1254,6 +1254,7 @@ static int mscc_ocelot_probe(struct platform_device *pdev) } ocelot->num_phys_ports = of_get_child_count(ports); + ocelot->num_flooding_pgids = 1; ocelot->vcap = vsc7514_vcap_props; ocelot->inj_prefix = OCELOT_TAG_PREFIX_NONE; diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index 1e9db9577441..49b46df476f2 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -618,6 +618,9 @@ struct ocelot { /* Keep track of the vlan port masks */ u32 vlan_mask[VLAN_N_VID]; + /* Switches like VSC9959 have flooding per traffic class */ + int num_flooding_pgids; + /* In tables like ANA:PORT and the ANA:PGID:PGID mask, * the CPU is located after the physical ports (at the * num_phys_ports index). From 4e9a5ae8df5b3365183150f6df49e49dece80d8c Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 3 Dec 2020 13:50:37 +0900 Subject: [PATCH 163/296] x86/uprobes: Do not use prefixes.nbytes when looping over prefixes.bytes Since insn.prefixes.nbytes can be bigger than the size of insn.prefixes.bytes[] when a prefix is repeated, the proper check must be insn.prefixes.bytes[i] != 0 and i < 4 instead of using insn.prefixes.nbytes. Introduce a for_each_insn_prefix() macro for this purpose. Debugged by Kees Cook . [ bp: Massage commit message, sync with the respective header in tools/ and drop "we". ] Fixes: 2b1444983508 ("uprobes, mm, x86: Add the ability to install and remove uprobes breakpoints") Reported-by: syzbot+9b64b619f10f19d19a7c@syzkaller.appspotmail.com Signed-off-by: Masami Hiramatsu Signed-off-by: Borislav Petkov Reviewed-by: Srikar Dronamraju Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/160697103739.3146288.7437620795200799020.stgit@devnote2 --- arch/x86/include/asm/insn.h | 15 +++++++++++++++ arch/x86/kernel/uprobes.c | 10 ++++++---- tools/arch/x86/include/asm/insn.h | 15 +++++++++++++++ 3 files changed, 36 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h index 5c1ae3eff9d4..a8c3d284fa46 100644 --- a/arch/x86/include/asm/insn.h +++ b/arch/x86/include/asm/insn.h @@ -201,6 +201,21 @@ static inline int insn_offset_immediate(struct insn *insn) return insn_offset_displacement(insn) + insn->displacement.nbytes; } +/** + * for_each_insn_prefix() -- Iterate prefixes in the instruction + * @insn: Pointer to struct insn. + * @idx: Index storage. + * @prefix: Prefix byte. + * + * Iterate prefix bytes of given @insn. Each prefix byte is stored in @prefix + * and the index is stored in @idx (note that this @idx is just for a cursor, + * do not change it.) + * Since prefixes.nbytes can be bigger than 4 if some prefixes + * are repeated, it cannot be used for looping over the prefixes. + */ +#define for_each_insn_prefix(insn, idx, prefix) \ + for (idx = 0; idx < ARRAY_SIZE(insn->prefixes.bytes) && (prefix = insn->prefixes.bytes[idx]) != 0; idx++) + #define POP_SS_OPCODE 0x1f #define MOV_SREG_OPCODE 0x8e diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 3fdaa042823d..138bdb1fd136 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -255,12 +255,13 @@ static volatile u32 good_2byte_insns[256 / 32] = { static bool is_prefix_bad(struct insn *insn) { + insn_byte_t p; int i; - for (i = 0; i < insn->prefixes.nbytes; i++) { + for_each_insn_prefix(insn, i, p) { insn_attr_t attr; - attr = inat_get_opcode_attribute(insn->prefixes.bytes[i]); + attr = inat_get_opcode_attribute(p); switch (attr) { case INAT_MAKE_PREFIX(INAT_PFX_ES): case INAT_MAKE_PREFIX(INAT_PFX_CS): @@ -715,6 +716,7 @@ static const struct uprobe_xol_ops push_xol_ops = { static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn) { u8 opc1 = OPCODE1(insn); + insn_byte_t p; int i; switch (opc1) { @@ -746,8 +748,8 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn) * Intel and AMD behavior differ in 64-bit mode: Intel ignores 66 prefix. * No one uses these insns, reject any branch insns with such prefix. */ - for (i = 0; i < insn->prefixes.nbytes; i++) { - if (insn->prefixes.bytes[i] == 0x66) + for_each_insn_prefix(insn, i, p) { + if (p == 0x66) return -ENOTSUPP; } diff --git a/tools/arch/x86/include/asm/insn.h b/tools/arch/x86/include/asm/insn.h index 568854b14d0a..52c6262e6bfd 100644 --- a/tools/arch/x86/include/asm/insn.h +++ b/tools/arch/x86/include/asm/insn.h @@ -201,6 +201,21 @@ static inline int insn_offset_immediate(struct insn *insn) return insn_offset_displacement(insn) + insn->displacement.nbytes; } +/** + * for_each_insn_prefix() -- Iterate prefixes in the instruction + * @insn: Pointer to struct insn. + * @idx: Index storage. + * @prefix: Prefix byte. + * + * Iterate prefix bytes of given @insn. Each prefix byte is stored in @prefix + * and the index is stored in @idx (note that this @idx is just for a cursor, + * do not change it.) + * Since prefixes.nbytes can be bigger than 4 if some prefixes + * are repeated, it cannot be used for looping over the prefixes. + */ +#define for_each_insn_prefix(insn, idx, prefix) \ + for (idx = 0; idx < ARRAY_SIZE(insn->prefixes.bytes) && (prefix = insn->prefixes.bytes[idx]) != 0; idx++) + #define POP_SS_OPCODE 0x1f #define MOV_SREG_OPCODE 0x8e From 12cb908a11b2544b5f53e9af856e6b6a90ed5533 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 3 Dec 2020 13:50:50 +0900 Subject: [PATCH 164/296] x86/insn-eval: Use new for_each_insn_prefix() macro to loop over prefixes bytes Since insn.prefixes.nbytes can be bigger than the size of insn.prefixes.bytes[] when a prefix is repeated, the proper check must be insn.prefixes.bytes[i] != 0 and i < 4 instead of using insn.prefixes.nbytes. Use the new for_each_insn_prefix() macro which does it correctly. Debugged by Kees Cook . [ bp: Massage commit message. ] Fixes: 32d0b95300db ("x86/insn-eval: Add utility functions to get segment selector") Reported-by: syzbot+9b64b619f10f19d19a7c@syzkaller.appspotmail.com Signed-off-by: Masami Hiramatsu Signed-off-by: Borislav Petkov Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/160697104969.3146288.16329307586428270032.stgit@devnote2 --- arch/x86/lib/insn-eval.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c index 58f7fb95c7f4..4229950a5d78 100644 --- a/arch/x86/lib/insn-eval.c +++ b/arch/x86/lib/insn-eval.c @@ -63,13 +63,12 @@ static bool is_string_insn(struct insn *insn) */ bool insn_has_rep_prefix(struct insn *insn) { + insn_byte_t p; int i; insn_get_prefixes(insn); - for (i = 0; i < insn->prefixes.nbytes; i++) { - insn_byte_t p = insn->prefixes.bytes[i]; - + for_each_insn_prefix(insn, i, p) { if (p == 0xf2 || p == 0xf3) return true; } @@ -95,14 +94,15 @@ static int get_seg_reg_override_idx(struct insn *insn) { int idx = INAT_SEG_REG_DEFAULT; int num_overrides = 0, i; + insn_byte_t p; insn_get_prefixes(insn); /* Look for any segment override prefixes. */ - for (i = 0; i < insn->prefixes.nbytes; i++) { + for_each_insn_prefix(insn, i, p) { insn_attr_t attr; - attr = inat_get_opcode_attribute(insn->prefixes.bytes[i]); + attr = inat_get_opcode_attribute(p); switch (attr) { case INAT_MAKE_PREFIX(INAT_PFX_CS): idx = INAT_SEG_REG_CS; From 84da009f06e60cf59d5e861f8e2101d2d3885517 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 3 Dec 2020 13:51:01 +0900 Subject: [PATCH 165/296] x86/sev-es: Use new for_each_insn_prefix() macro to loop over prefixes bytes Since insn.prefixes.nbytes can be bigger than the size of insn.prefixes.bytes[] when a prefix is repeated, the proper check must be: insn.prefixes.bytes[i] != 0 and i < 4 instead of using insn.prefixes.nbytes. Use the new for_each_insn_prefix() macro which does it correctly. Debugged by Kees Cook . [ bp: Massage commit message. ] Fixes: 25189d08e516 ("x86/sev-es: Add support for handling IOIO exceptions") Reported-by: syzbot+9b64b619f10f19d19a7c@syzkaller.appspotmail.com Signed-off-by: Masami Hiramatsu Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/160697106089.3146288.2052422845039649176.stgit@devnote2 --- arch/x86/boot/compressed/sev-es.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/x86/boot/compressed/sev-es.c b/arch/x86/boot/compressed/sev-es.c index 954cb2702e23..27826c265aab 100644 --- a/arch/x86/boot/compressed/sev-es.c +++ b/arch/x86/boot/compressed/sev-es.c @@ -32,13 +32,12 @@ struct ghcb *boot_ghcb; */ static bool insn_has_rep_prefix(struct insn *insn) { + insn_byte_t p; int i; insn_get_prefixes(insn); - for (i = 0; i < insn->prefixes.nbytes; i++) { - insn_byte_t p = insn->prefixes.bytes[i]; - + for_each_insn_prefix(insn, i, p) { if (p == 0xf2 || p == 0xf3) return true; } From 264f53b41946dcabb2b3304190839ab5670c7825 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Sat, 5 Dec 2020 14:38:46 -0500 Subject: [PATCH 166/296] Revert "mei: virtio: virtualization frontend driver" This reverts commit d162219c655c8cf8003128a13840d6c1e183fb80. The device uses a VIRTIO device ID out of a not-for-production range. Releasing Linux using an ID out of this range will make it conflict with development setups. An official request to reserve an ID for an MEI device is yet to be submitted to the virtio TC, thus there's no chance it will be reserved and fixed in time before the next release. Once requested it usually takes 2-3 weeks to land in the spec, which means the device can be supported with the official ID in the next Linux version if contributors act quickly. Signed-off-by: Michael S. Tsirkin Cc: Tomas Winkler Cc: Alexander Usyskin Cc: Wang Yu Cc: Liu Shuo Link: https://lore.kernel.org/r/20201205193625.469773-1-mst@redhat.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/Kconfig | 10 - drivers/misc/mei/Makefile | 3 - drivers/misc/mei/hw-virtio.c | 874 ----------------------------------- 3 files changed, 887 deletions(-) delete mode 100644 drivers/misc/mei/hw-virtio.c diff --git a/drivers/misc/mei/Kconfig b/drivers/misc/mei/Kconfig index c06581ffa7bd..f5fd5b786607 100644 --- a/drivers/misc/mei/Kconfig +++ b/drivers/misc/mei/Kconfig @@ -46,14 +46,4 @@ config INTEL_MEI_TXE Supported SoCs: Intel Bay Trail -config INTEL_MEI_VIRTIO - tristate "Intel MEI interface emulation with virtio framework" - select INTEL_MEI - depends on X86 && PCI && VIRTIO_PCI - help - This module implements mei hw emulation over virtio transport. - The module will be called mei_virtio. - Enable this if your virtual machine supports virtual mei - device over virtio. - source "drivers/misc/mei/hdcp/Kconfig" diff --git a/drivers/misc/mei/Makefile b/drivers/misc/mei/Makefile index 52aefaab5c1b..f1c76f7ee804 100644 --- a/drivers/misc/mei/Makefile +++ b/drivers/misc/mei/Makefile @@ -22,9 +22,6 @@ obj-$(CONFIG_INTEL_MEI_TXE) += mei-txe.o mei-txe-objs := pci-txe.o mei-txe-objs += hw-txe.o -obj-$(CONFIG_INTEL_MEI_VIRTIO) += mei-virtio.o -mei-virtio-objs := hw-virtio.o - mei-$(CONFIG_EVENT_TRACING) += mei-trace.o CFLAGS_mei-trace.o = -I$(src) diff --git a/drivers/misc/mei/hw-virtio.c b/drivers/misc/mei/hw-virtio.c deleted file mode 100644 index 899dc1c5e7ca..000000000000 --- a/drivers/misc/mei/hw-virtio.c +++ /dev/null @@ -1,874 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Intel Management Engine Interface (Intel MEI) Linux driver - * Copyright (c) 2018-2020, Intel Corporation. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "mei_dev.h" -#include "hbm.h" -#include "client.h" - -#define MEI_VIRTIO_RPM_TIMEOUT 500 -/* ACRN virtio device types */ -#ifndef VIRTIO_ID_MEI -#define VIRTIO_ID_MEI 0xFFFE /* virtio mei */ -#endif - -/** - * struct mei_virtio_cfg - settings passed from the virtio backend - * @buf_depth: read buffer depth in slots (4bytes) - * @hw_ready: hw is ready for operation - * @host_reset: synchronize reset with virtio backend - * @reserved: reserved for alignment - * @fw_status: FW status - */ -struct mei_virtio_cfg { - u32 buf_depth; - u8 hw_ready; - u8 host_reset; - u8 reserved[2]; - u32 fw_status[MEI_FW_STATUS_MAX]; -} __packed; - -struct mei_virtio_hw { - struct mei_device mdev; - char name[32]; - - struct virtqueue *in; - struct virtqueue *out; - - bool host_ready; - struct work_struct intr_handler; - - u32 *recv_buf; - u8 recv_rdy; - size_t recv_sz; - u32 recv_idx; - u32 recv_len; - - /* send buffer */ - atomic_t hbuf_ready; - const void *send_hdr; - const void *send_buf; - - struct mei_virtio_cfg cfg; -}; - -#define to_virtio_hw(_dev) container_of(_dev, struct mei_virtio_hw, mdev) - -/** - * mei_virtio_fw_status() - read status register of mei - * @dev: mei device - * @fw_status: fw status register values - * - * Return: always 0 - */ -static int mei_virtio_fw_status(struct mei_device *dev, - struct mei_fw_status *fw_status) -{ - struct virtio_device *vdev = dev_to_virtio(dev->dev); - - fw_status->count = MEI_FW_STATUS_MAX; - virtio_cread_bytes(vdev, offsetof(struct mei_virtio_cfg, fw_status), - fw_status->status, sizeof(fw_status->status)); - return 0; -} - -/** - * mei_virtio_pg_state() - translate internal pg state - * to the mei power gating state - * There is no power management in ACRN mode always return OFF - * @dev: mei device - * - * Return: - * * MEI_PG_OFF - if aliveness is on (always) - * * MEI_PG_ON - (never) - */ -static inline enum mei_pg_state mei_virtio_pg_state(struct mei_device *dev) -{ - return MEI_PG_OFF; -} - -/** - * mei_virtio_hw_config() - configure hw dependent settings - * - * @dev: mei device - * - * Return: always 0 - */ -static int mei_virtio_hw_config(struct mei_device *dev) -{ - return 0; -} - -/** - * mei_virtio_hbuf_empty_slots() - counts write empty slots. - * @dev: the device structure - * - * Return: always return frontend buf size if buffer is ready, 0 otherwise - */ -static int mei_virtio_hbuf_empty_slots(struct mei_device *dev) -{ - struct mei_virtio_hw *hw = to_virtio_hw(dev); - - return (atomic_read(&hw->hbuf_ready) == 1) ? hw->cfg.buf_depth : 0; -} - -/** - * mei_virtio_hbuf_is_ready() - checks if write buffer is ready - * @dev: the device structure - * - * Return: true if hbuf is ready - */ -static bool mei_virtio_hbuf_is_ready(struct mei_device *dev) -{ - struct mei_virtio_hw *hw = to_virtio_hw(dev); - - return atomic_read(&hw->hbuf_ready) == 1; -} - -/** - * mei_virtio_hbuf_max_depth() - returns depth of FE write buffer. - * @dev: the device structure - * - * Return: size of frontend write buffer in bytes - */ -static u32 mei_virtio_hbuf_depth(const struct mei_device *dev) -{ - struct mei_virtio_hw *hw = to_virtio_hw(dev); - - return hw->cfg.buf_depth; -} - -/** - * mei_virtio_intr_clear() - clear and stop interrupts - * @dev: the device structure - */ -static void mei_virtio_intr_clear(struct mei_device *dev) -{ - /* - * In our virtio solution, there are two types of interrupts, - * vq interrupt and config change interrupt. - * 1) start/reset rely on virtio config changed interrupt; - * 2) send/recv rely on virtio virtqueue interrupts. - * They are all virtual interrupts. So, we don't have corresponding - * operation to do here. - */ -} - -/** - * mei_virtio_intr_enable() - enables mei BE virtqueues callbacks - * @dev: the device structure - */ -static void mei_virtio_intr_enable(struct mei_device *dev) -{ - struct mei_virtio_hw *hw = to_virtio_hw(dev); - struct virtio_device *vdev = dev_to_virtio(dev->dev); - - virtio_config_enable(vdev); - - virtqueue_enable_cb(hw->in); - virtqueue_enable_cb(hw->out); -} - -/** - * mei_virtio_intr_disable() - disables mei BE virtqueues callbacks - * - * @dev: the device structure - */ -static void mei_virtio_intr_disable(struct mei_device *dev) -{ - struct mei_virtio_hw *hw = to_virtio_hw(dev); - struct virtio_device *vdev = dev_to_virtio(dev->dev); - - virtio_config_disable(vdev); - - virtqueue_disable_cb(hw->in); - virtqueue_disable_cb(hw->out); -} - -/** - * mei_virtio_synchronize_irq() - wait for pending IRQ handlers for all - * virtqueue - * @dev: the device structure - */ -static void mei_virtio_synchronize_irq(struct mei_device *dev) -{ - struct mei_virtio_hw *hw = to_virtio_hw(dev); - - /* - * Now, all IRQ handlers are converted to workqueue. - * Change synchronize irq to flush this work. - */ - flush_work(&hw->intr_handler); -} - -static void mei_virtio_free_outbufs(struct mei_virtio_hw *hw) -{ - kfree(hw->send_hdr); - kfree(hw->send_buf); - hw->send_hdr = NULL; - hw->send_buf = NULL; -} - -/** - * mei_virtio_write_message() - writes a message to mei virtio back-end service. - * @dev: the device structure - * @hdr: mei header of message - * @hdr_len: header length - * @data: message payload will be written - * @data_len: message payload length - * - * Return: - * * 0: on success - * * -EIO: if write has failed - * * -ENOMEM: on memory allocation failure - */ -static int mei_virtio_write_message(struct mei_device *dev, - const void *hdr, size_t hdr_len, - const void *data, size_t data_len) -{ - struct mei_virtio_hw *hw = to_virtio_hw(dev); - struct scatterlist sg[2]; - const void *hbuf, *dbuf; - int ret; - - if (WARN_ON(!atomic_add_unless(&hw->hbuf_ready, -1, 0))) - return -EIO; - - hbuf = kmemdup(hdr, hdr_len, GFP_KERNEL); - hw->send_hdr = hbuf; - - dbuf = kmemdup(data, data_len, GFP_KERNEL); - hw->send_buf = dbuf; - - if (!hbuf || !dbuf) { - ret = -ENOMEM; - goto fail; - } - - sg_init_table(sg, 2); - sg_set_buf(&sg[0], hbuf, hdr_len); - sg_set_buf(&sg[1], dbuf, data_len); - - ret = virtqueue_add_outbuf(hw->out, sg, 2, hw, GFP_KERNEL); - if (ret) { - dev_err(dev->dev, "failed to add outbuf\n"); - goto fail; - } - - virtqueue_kick(hw->out); - return 0; -fail: - - mei_virtio_free_outbufs(hw); - - return ret; -} - -/** - * mei_virtio_count_full_read_slots() - counts read full slots. - * @dev: the device structure - * - * Return: -EOVERFLOW if overflow, otherwise filled slots count - */ -static int mei_virtio_count_full_read_slots(struct mei_device *dev) -{ - struct mei_virtio_hw *hw = to_virtio_hw(dev); - - if (hw->recv_idx > hw->recv_len) - return -EOVERFLOW; - - return hw->recv_len - hw->recv_idx; -} - -/** - * mei_virtio_read_hdr() - Reads 32bit dword from mei virtio receive buffer - * - * @dev: the device structure - * - * Return: 32bit dword of receive buffer (u32) - */ -static inline u32 mei_virtio_read_hdr(const struct mei_device *dev) -{ - struct mei_virtio_hw *hw = to_virtio_hw(dev); - - WARN_ON(hw->cfg.buf_depth < hw->recv_idx + 1); - - return hw->recv_buf[hw->recv_idx++]; -} - -static int mei_virtio_read(struct mei_device *dev, unsigned char *buffer, - unsigned long len) -{ - struct mei_virtio_hw *hw = to_virtio_hw(dev); - u32 slots = mei_data2slots(len); - - if (WARN_ON(hw->cfg.buf_depth < hw->recv_idx + slots)) - return -EOVERFLOW; - - /* - * Assumption: There is only one MEI message in recv_buf each time. - * Backend service need follow this rule too. - */ - memcpy(buffer, hw->recv_buf + hw->recv_idx, len); - hw->recv_idx += slots; - - return 0; -} - -static bool mei_virtio_pg_is_enabled(struct mei_device *dev) -{ - return false; -} - -static bool mei_virtio_pg_in_transition(struct mei_device *dev) -{ - return false; -} - -static void mei_virtio_add_recv_buf(struct mei_virtio_hw *hw) -{ - struct scatterlist sg; - - if (hw->recv_rdy) /* not needed */ - return; - - /* refill the recv_buf to IN virtqueue to get next message */ - sg_init_one(&sg, hw->recv_buf, mei_slots2data(hw->cfg.buf_depth)); - hw->recv_len = 0; - hw->recv_idx = 0; - hw->recv_rdy = 1; - virtqueue_add_inbuf(hw->in, &sg, 1, hw->recv_buf, GFP_KERNEL); - virtqueue_kick(hw->in); -} - -/** - * mei_virtio_hw_is_ready() - check whether the BE(hw) has turned ready - * @dev: mei device - * Return: bool - */ -static bool mei_virtio_hw_is_ready(struct mei_device *dev) -{ - struct mei_virtio_hw *hw = to_virtio_hw(dev); - struct virtio_device *vdev = dev_to_virtio(dev->dev); - - virtio_cread(vdev, struct mei_virtio_cfg, - hw_ready, &hw->cfg.hw_ready); - - dev_dbg(dev->dev, "hw ready %d\n", hw->cfg.hw_ready); - - return hw->cfg.hw_ready; -} - -/** - * mei_virtio_hw_reset - resets virtio hw. - * - * @dev: the device structure - * @intr_enable: virtio use data/config callbacks - * - * Return: 0 on success an error code otherwise - */ -static int mei_virtio_hw_reset(struct mei_device *dev, bool intr_enable) -{ - struct mei_virtio_hw *hw = to_virtio_hw(dev); - struct virtio_device *vdev = dev_to_virtio(dev->dev); - - dev_dbg(dev->dev, "hw reset\n"); - - dev->recvd_hw_ready = false; - hw->host_ready = false; - atomic_set(&hw->hbuf_ready, 0); - hw->recv_len = 0; - hw->recv_idx = 0; - - hw->cfg.host_reset = 1; - virtio_cwrite(vdev, struct mei_virtio_cfg, - host_reset, &hw->cfg.host_reset); - - mei_virtio_hw_is_ready(dev); - - if (intr_enable) - mei_virtio_intr_enable(dev); - - return 0; -} - -/** - * mei_virtio_hw_reset_release() - release device from the reset - * @dev: the device structure - */ -static void mei_virtio_hw_reset_release(struct mei_device *dev) -{ - struct mei_virtio_hw *hw = to_virtio_hw(dev); - struct virtio_device *vdev = dev_to_virtio(dev->dev); - - dev_dbg(dev->dev, "hw reset release\n"); - hw->cfg.host_reset = 0; - virtio_cwrite(vdev, struct mei_virtio_cfg, - host_reset, &hw->cfg.host_reset); -} - -/** - * mei_virtio_hw_ready_wait() - wait until the virtio(hw) has turned ready - * or timeout is reached - * @dev: mei device - * - * Return: 0 on success, error otherwise - */ -static int mei_virtio_hw_ready_wait(struct mei_device *dev) -{ - mutex_unlock(&dev->device_lock); - wait_event_timeout(dev->wait_hw_ready, - dev->recvd_hw_ready, - mei_secs_to_jiffies(MEI_HW_READY_TIMEOUT)); - mutex_lock(&dev->device_lock); - if (!dev->recvd_hw_ready) { - dev_err(dev->dev, "wait hw ready failed\n"); - return -ETIMEDOUT; - } - - dev->recvd_hw_ready = false; - return 0; -} - -/** - * mei_virtio_hw_start() - hw start routine - * @dev: mei device - * - * Return: 0 on success, error otherwise - */ -static int mei_virtio_hw_start(struct mei_device *dev) -{ - struct mei_virtio_hw *hw = to_virtio_hw(dev); - int ret; - - dev_dbg(dev->dev, "hw start\n"); - mei_virtio_hw_reset_release(dev); - - ret = mei_virtio_hw_ready_wait(dev); - if (ret) - return ret; - - mei_virtio_add_recv_buf(hw); - atomic_set(&hw->hbuf_ready, 1); - dev_dbg(dev->dev, "hw is ready\n"); - hw->host_ready = true; - - return 0; -} - -/** - * mei_virtio_host_is_ready() - check whether the FE has turned ready - * @dev: mei device - * - * Return: bool - */ -static bool mei_virtio_host_is_ready(struct mei_device *dev) -{ - struct mei_virtio_hw *hw = to_virtio_hw(dev); - - dev_dbg(dev->dev, "host ready %d\n", hw->host_ready); - - return hw->host_ready; -} - -/** - * mei_virtio_data_in() - The callback of recv virtqueue of virtio mei - * @vq: receiving virtqueue - */ -static void mei_virtio_data_in(struct virtqueue *vq) -{ - struct mei_virtio_hw *hw = vq->vdev->priv; - - /* disable interrupts (enabled again from in the interrupt worker) */ - virtqueue_disable_cb(hw->in); - - schedule_work(&hw->intr_handler); -} - -/** - * mei_virtio_data_out() - The callback of send virtqueue of virtio mei - * @vq: transmitting virtqueue - */ -static void mei_virtio_data_out(struct virtqueue *vq) -{ - struct mei_virtio_hw *hw = vq->vdev->priv; - - schedule_work(&hw->intr_handler); -} - -static void mei_virtio_intr_handler(struct work_struct *work) -{ - struct mei_virtio_hw *hw = - container_of(work, struct mei_virtio_hw, intr_handler); - struct mei_device *dev = &hw->mdev; - LIST_HEAD(complete_list); - s32 slots; - int rets = 0; - void *data; - unsigned int len; - - mutex_lock(&dev->device_lock); - - if (dev->dev_state == MEI_DEV_DISABLED) { - dev_warn(dev->dev, "Interrupt in disabled state.\n"); - mei_virtio_intr_disable(dev); - goto end; - } - - /* check if ME wants a reset */ - if (!mei_hw_is_ready(dev) && dev->dev_state != MEI_DEV_RESETTING) { - dev_warn(dev->dev, "BE service not ready: resetting.\n"); - schedule_work(&dev->reset_work); - goto end; - } - - /* check if we need to start the dev */ - if (!mei_host_is_ready(dev)) { - if (mei_hw_is_ready(dev)) { - dev_dbg(dev->dev, "we need to start the dev.\n"); - dev->recvd_hw_ready = true; - wake_up(&dev->wait_hw_ready); - } else { - dev_warn(dev->dev, "Spurious Interrupt\n"); - } - goto end; - } - - /* read */ - if (hw->recv_rdy) { - data = virtqueue_get_buf(hw->in, &len); - if (!data || !len) { - dev_dbg(dev->dev, "No data %d", len); - } else { - dev_dbg(dev->dev, "data_in %d\n", len); - WARN_ON(data != hw->recv_buf); - hw->recv_len = mei_data2slots(len); - hw->recv_rdy = 0; - } - } - - /* write */ - if (!atomic_read(&hw->hbuf_ready)) { - if (!virtqueue_get_buf(hw->out, &len)) { - dev_warn(dev->dev, "Failed to getbuf\n"); - } else { - mei_virtio_free_outbufs(hw); - atomic_inc(&hw->hbuf_ready); - } - } - - /* check slots available for reading */ - slots = mei_count_full_read_slots(dev); - while (slots > 0) { - dev_dbg(dev->dev, "slots to read = %08x\n", slots); - rets = mei_irq_read_handler(dev, &complete_list, &slots); - - if (rets && - (dev->dev_state != MEI_DEV_RESETTING && - dev->dev_state != MEI_DEV_POWER_DOWN)) { - dev_err(dev->dev, "mei_irq_read_handler ret = %d.\n", - rets); - schedule_work(&dev->reset_work); - goto end; - } - } - - dev->hbuf_is_ready = mei_hbuf_is_ready(dev); - - mei_irq_write_handler(dev, &complete_list); - - dev->hbuf_is_ready = mei_hbuf_is_ready(dev); - - mei_irq_compl_handler(dev, &complete_list); - - mei_virtio_add_recv_buf(hw); - -end: - if (dev->dev_state != MEI_DEV_DISABLED) { - if (!virtqueue_enable_cb(hw->in)) - schedule_work(&hw->intr_handler); - } - - mutex_unlock(&dev->device_lock); -} - -static void mei_virtio_config_changed(struct virtio_device *vdev) -{ - struct mei_virtio_hw *hw = vdev->priv; - struct mei_device *dev = &hw->mdev; - - virtio_cread(vdev, struct mei_virtio_cfg, - hw_ready, &hw->cfg.hw_ready); - - if (dev->dev_state == MEI_DEV_DISABLED) { - dev_dbg(dev->dev, "disabled state don't start\n"); - return; - } - - /* Run intr handler once to handle reset notify */ - schedule_work(&hw->intr_handler); -} - -static void mei_virtio_remove_vqs(struct virtio_device *vdev) -{ - struct mei_virtio_hw *hw = vdev->priv; - - virtqueue_detach_unused_buf(hw->in); - hw->recv_len = 0; - hw->recv_idx = 0; - hw->recv_rdy = 0; - - virtqueue_detach_unused_buf(hw->out); - - mei_virtio_free_outbufs(hw); - - vdev->config->del_vqs(vdev); -} - -/* - * There are two virtqueues, one is for send and another is for recv. - */ -static int mei_virtio_init_vqs(struct mei_virtio_hw *hw, - struct virtio_device *vdev) -{ - struct virtqueue *vqs[2]; - - vq_callback_t *cbs[] = { - mei_virtio_data_in, - mei_virtio_data_out, - }; - static const char * const names[] = { - "in", - "out", - }; - int ret; - - ret = virtio_find_vqs(vdev, 2, vqs, cbs, names, NULL); - if (ret) - return ret; - - hw->in = vqs[0]; - hw->out = vqs[1]; - - return 0; -} - -static const struct mei_hw_ops mei_virtio_ops = { - .fw_status = mei_virtio_fw_status, - .pg_state = mei_virtio_pg_state, - - .host_is_ready = mei_virtio_host_is_ready, - - .hw_is_ready = mei_virtio_hw_is_ready, - .hw_reset = mei_virtio_hw_reset, - .hw_config = mei_virtio_hw_config, - .hw_start = mei_virtio_hw_start, - - .pg_in_transition = mei_virtio_pg_in_transition, - .pg_is_enabled = mei_virtio_pg_is_enabled, - - .intr_clear = mei_virtio_intr_clear, - .intr_enable = mei_virtio_intr_enable, - .intr_disable = mei_virtio_intr_disable, - .synchronize_irq = mei_virtio_synchronize_irq, - - .hbuf_free_slots = mei_virtio_hbuf_empty_slots, - .hbuf_is_ready = mei_virtio_hbuf_is_ready, - .hbuf_depth = mei_virtio_hbuf_depth, - - .write = mei_virtio_write_message, - - .rdbuf_full_slots = mei_virtio_count_full_read_slots, - .read_hdr = mei_virtio_read_hdr, - .read = mei_virtio_read, -}; - -static int mei_virtio_probe(struct virtio_device *vdev) -{ - struct mei_virtio_hw *hw; - int ret; - - hw = devm_kzalloc(&vdev->dev, sizeof(*hw), GFP_KERNEL); - if (!hw) - return -ENOMEM; - - vdev->priv = hw; - - INIT_WORK(&hw->intr_handler, mei_virtio_intr_handler); - - ret = mei_virtio_init_vqs(hw, vdev); - if (ret) - goto vqs_failed; - - virtio_cread(vdev, struct mei_virtio_cfg, - buf_depth, &hw->cfg.buf_depth); - - hw->recv_buf = kzalloc(mei_slots2data(hw->cfg.buf_depth), GFP_KERNEL); - if (!hw->recv_buf) { - ret = -ENOMEM; - goto hbuf_failed; - } - atomic_set(&hw->hbuf_ready, 0); - - virtio_device_ready(vdev); - - mei_device_init(&hw->mdev, &vdev->dev, &mei_virtio_ops); - - pm_runtime_get_noresume(&vdev->dev); - pm_runtime_set_active(&vdev->dev); - pm_runtime_enable(&vdev->dev); - - ret = mei_start(&hw->mdev); - if (ret) - goto mei_start_failed; - - pm_runtime_set_autosuspend_delay(&vdev->dev, MEI_VIRTIO_RPM_TIMEOUT); - pm_runtime_use_autosuspend(&vdev->dev); - - ret = mei_register(&hw->mdev, &vdev->dev); - if (ret) - goto mei_failed; - - pm_runtime_put(&vdev->dev); - - return 0; - -mei_failed: - mei_stop(&hw->mdev); -mei_start_failed: - mei_cancel_work(&hw->mdev); - mei_disable_interrupts(&hw->mdev); - kfree(hw->recv_buf); -hbuf_failed: - vdev->config->del_vqs(vdev); -vqs_failed: - return ret; -} - -static int __maybe_unused mei_virtio_pm_runtime_idle(struct device *device) -{ - struct virtio_device *vdev = dev_to_virtio(device); - struct mei_virtio_hw *hw = vdev->priv; - - dev_dbg(&vdev->dev, "rpm: mei_virtio : runtime_idle\n"); - - if (!hw) - return -ENODEV; - - if (mei_write_is_idle(&hw->mdev)) - pm_runtime_autosuspend(device); - - return -EBUSY; -} - -static int __maybe_unused mei_virtio_pm_runtime_suspend(struct device *device) -{ - return 0; -} - -static int __maybe_unused mei_virtio_pm_runtime_resume(struct device *device) -{ - return 0; -} - -static int __maybe_unused mei_virtio_freeze(struct virtio_device *vdev) -{ - struct mei_virtio_hw *hw = vdev->priv; - - dev_dbg(&vdev->dev, "freeze\n"); - - if (!hw) - return -ENODEV; - - mei_stop(&hw->mdev); - mei_disable_interrupts(&hw->mdev); - cancel_work_sync(&hw->intr_handler); - vdev->config->reset(vdev); - mei_virtio_remove_vqs(vdev); - - return 0; -} - -static int __maybe_unused mei_virtio_restore(struct virtio_device *vdev) -{ - struct mei_virtio_hw *hw = vdev->priv; - int ret; - - dev_dbg(&vdev->dev, "restore\n"); - - if (!hw) - return -ENODEV; - - ret = mei_virtio_init_vqs(hw, vdev); - if (ret) - return ret; - - virtio_device_ready(vdev); - - ret = mei_restart(&hw->mdev); - if (ret) - return ret; - - /* Start timer if stopped in suspend */ - schedule_delayed_work(&hw->mdev.timer_work, HZ); - - return 0; -} - -static const struct dev_pm_ops mei_virtio_pm_ops = { - SET_RUNTIME_PM_OPS(mei_virtio_pm_runtime_suspend, - mei_virtio_pm_runtime_resume, - mei_virtio_pm_runtime_idle) -}; - -static void mei_virtio_remove(struct virtio_device *vdev) -{ - struct mei_virtio_hw *hw = vdev->priv; - - mei_stop(&hw->mdev); - mei_disable_interrupts(&hw->mdev); - cancel_work_sync(&hw->intr_handler); - mei_deregister(&hw->mdev); - vdev->config->reset(vdev); - mei_virtio_remove_vqs(vdev); - kfree(hw->recv_buf); - pm_runtime_disable(&vdev->dev); -} - -static struct virtio_device_id id_table[] = { - { VIRTIO_ID_MEI, VIRTIO_DEV_ANY_ID }, - { } -}; - -static struct virtio_driver mei_virtio_driver = { - .id_table = id_table, - .probe = mei_virtio_probe, - .remove = mei_virtio_remove, - .config_changed = mei_virtio_config_changed, - .driver = { - .name = KBUILD_MODNAME, - .owner = THIS_MODULE, - .pm = &mei_virtio_pm_ops, - }, -#ifdef CONFIG_PM_SLEEP - .freeze = mei_virtio_freeze, - .restore = mei_virtio_restore, -#endif -}; - -module_virtio_driver(mei_virtio_driver); -MODULE_DEVICE_TABLE(virtio, id_table); -MODULE_DESCRIPTION("Virtio MEI frontend driver"); -MODULE_LICENSE("GPL v2"); From 7d32358be8acb119dcfe39b6cf67ec6d94bf1fe7 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 4 Dec 2020 02:55:51 +0900 Subject: [PATCH 167/296] kbuild: avoid split lines in .mod files "xargs echo" is not a safe way to remove line breaks because the input may exceed the command line limit and xargs may break it up into multiple invocations of echo. This should never happen because scripts/gen_autoksyms.sh expects all undefined symbols are placed in the second line of .mod files. One possible way is to replace "xargs echo" with "sed ':x;N;$!bx;s/\n/ /g'" or something, but I rewrote the code by using awk because it is more readable. This issue was reported by Sami Tolvanen; in his Clang LTO patch set, $(multi-used-m) is no longer an ELF object, but a thin archive that contains LLVM bitcode files. llvm-nm prints out symbols for each archive member separately, which results a lot of dupications, in some places, beyond the system-defined limit. This problem must be fixed irrespective of LTO, and we must ensure zero possibility of having this issue. Link: https://lkml.org/lkml/2020/12/1/1658 Reported-by: Sami Tolvanen Signed-off-by: Masahiro Yamada Reviewed-by: Sami Tolvanen --- scripts/Makefile.build | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/scripts/Makefile.build b/scripts/Makefile.build index ae647379b579..4c058f12dd73 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -252,6 +252,9 @@ objtool_dep = $(objtool_obj) \ ifdef CONFIG_TRIM_UNUSED_KSYMS cmd_gen_ksymdeps = \ $(CONFIG_SHELL) $(srctree)/scripts/gen_ksymdeps.sh $@ >> $(dot-target).cmd + +# List module undefined symbols +undefined_syms = $(NM) $< | $(AWK) '$$1 == "U" { printf("%s%s", x++ ? " " : "", $$2) }'; endif define rule_cc_o_c @@ -271,13 +274,6 @@ define rule_as_o_S $(call cmd,modversions_S) endef -# List module undefined symbols (or empty line if not enabled) -ifdef CONFIG_TRIM_UNUSED_KSYMS -cmd_undef_syms = $(NM) $< | sed -n 's/^ *U //p' | xargs echo -else -cmd_undef_syms = echo -endif - # Built-in and composite module parts $(obj)/%.o: $(src)/%.c $(recordmcount_source) $(objtool_dep) FORCE $(call if_changed_rule,cc_o_c) @@ -285,7 +281,7 @@ $(obj)/%.o: $(src)/%.c $(recordmcount_source) $(objtool_dep) FORCE cmd_mod = { \ echo $(if $($*-objs)$($*-y)$($*-m), $(addprefix $(obj)/, $($*-objs) $($*-y) $($*-m)), $(@:.mod=.o)); \ - $(cmd_undef_syms); \ + $(undefined_syms) echo; \ } > $@ $(obj)/%.mod: $(obj)/%.o FORCE From 11fb479ff5d9872ddff02dd533c16d60372c86b2 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 5 Dec 2020 22:14:38 -0800 Subject: [PATCH 168/296] zlib: export S390 symbols for zlib modules Fix build errors when ZLIB_INFLATE=m and ZLIB_DEFLATE=m and ZLIB_DFLTCC=y by exporting the 2 needed symbols in dfltcc_inflate.c. Fixes these build errors: ERROR: modpost: "dfltcc_inflate" [lib/zlib_inflate/zlib_inflate.ko] undefined! ERROR: modpost: "dfltcc_can_inflate" [lib/zlib_inflate/zlib_inflate.ko] undefined! Fixes: 126196100063 ("lib/zlib: add s390 hardware support for kernel zlib_inflate") Reported-by: kernel test robot Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Acked-by: Ilya Leoshkevich Cc: Mikhail Zaslonko Cc: Heiko Carstens Cc: Vasily Gorbik Cc: Christian Borntraeger Link: https://lkml.kernel.org/r/20201123191712.4882-1-rdunlap@infradead.org Signed-off-by: Linus Torvalds --- lib/zlib_dfltcc/dfltcc_inflate.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib/zlib_dfltcc/dfltcc_inflate.c b/lib/zlib_dfltcc/dfltcc_inflate.c index aa9ef23474df..db107016d29b 100644 --- a/lib/zlib_dfltcc/dfltcc_inflate.c +++ b/lib/zlib_dfltcc/dfltcc_inflate.c @@ -4,6 +4,7 @@ #include "dfltcc_util.h" #include "dfltcc.h" #include +#include #include /* @@ -29,6 +30,7 @@ int dfltcc_can_inflate( return is_bit_set(dfltcc_state->af.fns, DFLTCC_XPND) && is_bit_set(dfltcc_state->af.fmts, DFLTCC_FMT0); } +EXPORT_SYMBOL(dfltcc_can_inflate); static int dfltcc_was_inflate_used( z_streamp strm @@ -147,3 +149,4 @@ dfltcc_inflate_action dfltcc_inflate( return (cc == DFLTCC_CC_OP1_TOO_SHORT || cc == DFLTCC_CC_OP2_TOO_SHORT) ? DFLTCC_INFLATE_BREAK : DFLTCC_INFLATE_CONTINUE; } +EXPORT_SYMBOL(dfltcc_inflate); From 2bf509d96d84c3336d08375e8af34d1b85ee71c8 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Sat, 5 Dec 2020 22:14:42 -0800 Subject: [PATCH 169/296] coredump: fix core_pattern parse error 'format_corename()' will splite 'core_pattern' on spaces when it is in pipe mode, and take helper_argv[0] as the path to usermode executable. It works fine in most cases. However, if there is a space between '|' and '/file/path', such as '| /usr/lib/systemd/systemd-coredump %P %u %g', then helper_argv[0] will be parsed as '', and users will get a 'Core dump to | disabled'. It is not friendly to users, as the pattern above was valid previously. Fix this by ignoring the spaces between '|' and '/file/path'. Fixes: 315c69261dd3 ("coredump: split pipe command whitespace before expanding template") Signed-off-by: Menglong Dong Signed-off-by: Andrew Morton Cc: Paul Wise Cc: Jakub Wilk [https://bugs.debian.org/924398] Cc: Neil Horman Cc: Link: https://lkml.kernel.org/r/5fb62870.1c69fb81.8ef5d.af76@mx.google.com Signed-off-by: Linus Torvalds --- fs/coredump.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/coredump.c b/fs/coredump.c index 0cd9056d79cc..c6acfc694f65 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -229,7 +229,8 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm, */ if (ispipe) { if (isspace(*pat_ptr)) { - was_space = true; + if (cn->used != 0) + was_space = true; pat_ptr++; continue; } else if (was_space) { From becaba65f62f88e553ec92ed98370e9d2b18e629 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Sat, 5 Dec 2020 22:14:45 -0800 Subject: [PATCH 170/296] mm: memcg/slab: fix obj_cgroup_charge() return value handling Commit 10befea91b61 ("mm: memcg/slab: use a single set of kmem_caches for all allocations") introduced a regression into the handling of the obj_cgroup_charge() return value. If a non-zero value is returned (indicating of exceeding one of memory.max limits), the allocation should fail, instead of falling back to non-accounted mode. To make the code more readable, move memcg_slab_pre_alloc_hook() and memcg_slab_post_alloc_hook() calling conditions into bodies of these hooks. Fixes: 10befea91b61 ("mm: memcg/slab: use a single set of kmem_caches for all allocations") Signed-off-by: Roman Gushchin Signed-off-by: Andrew Morton Reviewed-by: Shakeel Butt Cc: Johannes Weiner Cc: Michal Hocko Cc: Link: https://lkml.kernel.org/r/20201127161828.GD840171@carbon.dhcp.thefacebook.com Signed-off-by: Linus Torvalds --- mm/slab.h | 40 ++++++++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/mm/slab.h b/mm/slab.h index 6d7c6a5056ba..f9977d6613d6 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -274,22 +274,32 @@ static inline size_t obj_full_size(struct kmem_cache *s) return s->size + sizeof(struct obj_cgroup *); } -static inline struct obj_cgroup *memcg_slab_pre_alloc_hook(struct kmem_cache *s, - size_t objects, - gfp_t flags) +/* + * Returns false if the allocation should fail. + */ +static inline bool memcg_slab_pre_alloc_hook(struct kmem_cache *s, + struct obj_cgroup **objcgp, + size_t objects, gfp_t flags) { struct obj_cgroup *objcg; + if (!memcg_kmem_enabled()) + return true; + + if (!(flags & __GFP_ACCOUNT) && !(s->flags & SLAB_ACCOUNT)) + return true; + objcg = get_obj_cgroup_from_current(); if (!objcg) - return NULL; + return true; if (obj_cgroup_charge(objcg, flags, objects * obj_full_size(s))) { obj_cgroup_put(objcg); - return NULL; + return false; } - return objcg; + *objcgp = objcg; + return true; } static inline void mod_objcg_state(struct obj_cgroup *objcg, @@ -315,7 +325,7 @@ static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s, unsigned long off; size_t i; - if (!objcg) + if (!memcg_kmem_enabled() || !objcg) return; flags &= ~__GFP_ACCOUNT; @@ -400,11 +410,11 @@ static inline void memcg_free_page_obj_cgroups(struct page *page) { } -static inline struct obj_cgroup *memcg_slab_pre_alloc_hook(struct kmem_cache *s, - size_t objects, - gfp_t flags) +static inline bool memcg_slab_pre_alloc_hook(struct kmem_cache *s, + struct obj_cgroup **objcgp, + size_t objects, gfp_t flags) { - return NULL; + return true; } static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s, @@ -508,9 +518,8 @@ static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s, if (should_failslab(s, flags)) return NULL; - if (memcg_kmem_enabled() && - ((flags & __GFP_ACCOUNT) || (s->flags & SLAB_ACCOUNT))) - *objcgp = memcg_slab_pre_alloc_hook(s, size, flags); + if (!memcg_slab_pre_alloc_hook(s, objcgp, size, flags)) + return NULL; return s; } @@ -529,8 +538,7 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s, s->flags, flags); } - if (memcg_kmem_enabled()) - memcg_slab_post_alloc_hook(s, objcg, flags, size, p); + memcg_slab_post_alloc_hook(s, objcg, flags, size, p); } #ifndef CONFIG_SLOB From 8199be001a470209f5c938570cc199abb012fe53 Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Sat, 5 Dec 2020 22:14:48 -0800 Subject: [PATCH 171/296] mm: list_lru: set shrinker map bit when child nr_items is not zero When investigating a slab cache bloat problem, significant amount of negative dentry cache was seen, but confusingly they neither got shrunk by reclaimer (the host has very tight memory) nor be shrunk by dropping cache. The vmcore shows there are over 14M negative dentry objects on lru, but tracing result shows they were even not scanned at all. Further investigation shows the memcg's vfs shrinker_map bit is not set. So the reclaimer or dropping cache just skip calling vfs shrinker. So we have to reboot the hosts to get the memory back. I didn't manage to come up with a reproducer in test environment, and the problem can't be reproduced after rebooting. But it seems there is race between shrinker map bit clear and reparenting by code inspection. The hypothesis is elaborated as below. The memcg hierarchy on our production environment looks like: root / \ system user The main workloads are running under user slice's children, and it creates and removes memcg frequently. So reparenting happens very often under user slice, but no task is under user slice directly. So with the frequent reparenting and tight memory pressure, the below hypothetical race condition may happen: CPU A CPU B reparent dst->nr_items == 0 shrinker: total_objects == 0 add src->nr_items to dst set_bit return SHRINK_EMPTY clear_bit child memcg offline replace child's kmemcg_id with parent's (in memcg_offline_kmem()) list_lru_del() between shrinker runs see parent's kmemcg_id dec dst->nr_items reparent again dst->nr_items may go negative due to concurrent list_lru_del() The second run of shrinker: read nr_items without any synchronization, so it may see intermediate negative nr_items then total_objects may return 0 coincidently keep the bit cleared dst->nr_items != 0 skip set_bit add scr->nr_item to dst After this point dst->nr_item may never go zero, so reparenting will not set shrinker_map bit anymore. And since there is no task under user slice directly, so no new object will be added to its lru to set the shrinker map bit either. That bit is kept cleared forever. How does list_lru_del() race with reparenting? It is because reparenting replaces children's kmemcg_id to parent's without protecting from nlru->lock, so list_lru_del() may see parent's kmemcg_id but actually deleting items from child's lru, but dec'ing parent's nr_items, so the parent's nr_items may go negative as commit 2788cf0c401c ("memcg: reparent list_lrus and free kmemcg_id on css offline") says. Since it is impossible that dst->nr_items goes negative and src->nr_items goes zero at the same time, so it seems we could set the shrinker map bit iff src->nr_items != 0. We could synchronize list_lru_count_one() and reparenting with nlru->lock, but it seems checking src->nr_items in reparenting is the simplest and avoids lock contention. Fixes: fae91d6d8be5 ("mm/list_lru.c: set bit in memcg shrinker bitmap on first list_lru item appearance") Suggested-by: Roman Gushchin Signed-off-by: Yang Shi Signed-off-by: Andrew Morton Reviewed-by: Roman Gushchin Reviewed-by: Shakeel Butt Acked-by: Kirill Tkhai Cc: Vladimir Davydov Cc: [4.19] Link: https://lkml.kernel.org/r/20201202171749.264354-1-shy828301@gmail.com Signed-off-by: Linus Torvalds --- mm/list_lru.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/mm/list_lru.c b/mm/list_lru.c index 5aa6e44bc2ae..fe230081690b 100644 --- a/mm/list_lru.c +++ b/mm/list_lru.c @@ -534,7 +534,6 @@ static void memcg_drain_list_lru_node(struct list_lru *lru, int nid, struct list_lru_node *nlru = &lru->node[nid]; int dst_idx = dst_memcg->kmemcg_id; struct list_lru_one *src, *dst; - bool set; /* * Since list_lru_{add,del} may be called under an IRQ-safe lock, @@ -546,11 +545,12 @@ static void memcg_drain_list_lru_node(struct list_lru *lru, int nid, dst = list_lru_from_memcg_idx(nlru, dst_idx); list_splice_init(&src->list, &dst->list); - set = (!dst->nr_items && src->nr_items); - dst->nr_items += src->nr_items; - if (set) + + if (src->nr_items) { + dst->nr_items += src->nr_items; memcg_set_shrinker_bit(dst_memcg, nid, lru_shrinker_id(lru)); - src->nr_items = 0; + src->nr_items = 0; + } spin_unlock_irq(&nlru->lock); } From e91d8d78237de8d7120c320b3645b7100848f24d Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Sat, 5 Dec 2020 22:14:51 -0800 Subject: [PATCH 172/296] mm/zsmalloc.c: drop ZSMALLOC_PGTABLE_MAPPING While I was doing zram testing, I found sometimes decompression failed since the compression buffer was corrupted. With investigation, I found below commit calls cond_resched unconditionally so it could make a problem in atomic context if the task is reschedule. BUG: sleeping function called from invalid context at mm/vmalloc.c:108 in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 946, name: memhog 3 locks held by memhog/946: #0: ffff9d01d4b193e8 (&mm->mmap_lock#2){++++}-{4:4}, at: __mm_populate+0x103/0x160 #1: ffffffffa3d53de0 (fs_reclaim){+.+.}-{0:0}, at: __alloc_pages_slowpath.constprop.0+0xa98/0x1160 #2: ffff9d01d56b8110 (&zspage->lock){.+.+}-{3:3}, at: zs_map_object+0x8e/0x1f0 CPU: 0 PID: 946 Comm: memhog Not tainted 5.9.3-00011-gc5bfc0287345-dirty #316 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1 04/01/2014 Call Trace: unmap_kernel_range_noflush+0x2eb/0x350 unmap_kernel_range+0x14/0x30 zs_unmap_object+0xd5/0xe0 zram_bvec_rw.isra.0+0x38c/0x8e0 zram_rw_page+0x90/0x101 bdev_write_page+0x92/0xe0 __swap_writepage+0x94/0x4a0 pageout+0xe3/0x3a0 shrink_page_list+0xb94/0xd60 shrink_inactive_list+0x158/0x460 We can fix this by removing the ZSMALLOC_PGTABLE_MAPPING feature (which contains the offending calling code) from zsmalloc. Even though this option showed some amount improvement(e.g., 30%) in some arm32 platforms, it has been headache to maintain since it have abused APIs[1](e.g., unmap_kernel_range in atomic context). Since we are approaching to deprecate 32bit machines and already made the config option available for only builtin build since v5.8, lastly it has been not default option in zsmalloc, it's time to drop the option for better maintenance. [1] http://lore.kernel.org/linux-mm/20201105170249.387069-1-minchan@kernel.org Fixes: e47110e90584 ("mm/vunmap: add cond_resched() in vunmap_pmd_range") Signed-off-by: Minchan Kim Signed-off-by: Andrew Morton Reviewed-by: Sergey Senozhatsky Cc: Tony Lindgren Cc: Christoph Hellwig Cc: Harish Sriram Cc: Uladzislau Rezki Cc: Link: https://lkml.kernel.org/r/20201117202916.GA3856507@google.com Signed-off-by: Linus Torvalds --- arch/arm/configs/omap2plus_defconfig | 1 - include/linux/zsmalloc.h | 1 - mm/Kconfig | 13 ------- mm/zsmalloc.c | 54 ---------------------------- 4 files changed, 69 deletions(-) diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig index 34793aabdb65..58df9fd79a76 100644 --- a/arch/arm/configs/omap2plus_defconfig +++ b/arch/arm/configs/omap2plus_defconfig @@ -81,7 +81,6 @@ CONFIG_PARTITION_ADVANCED=y CONFIG_BINFMT_MISC=y CONFIG_CMA=y CONFIG_ZSMALLOC=m -CONFIG_ZSMALLOC_PGTABLE_MAPPING=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h index 0fdbf653b173..4807ca4d52e0 100644 --- a/include/linux/zsmalloc.h +++ b/include/linux/zsmalloc.h @@ -20,7 +20,6 @@ * zsmalloc mapping modes * * NOTE: These only make a difference when a mapped object spans pages. - * They also have no effect when ZSMALLOC_PGTABLE_MAPPING is selected. */ enum zs_mapmode { ZS_MM_RW, /* normal read-write mapping */ diff --git a/mm/Kconfig b/mm/Kconfig index d42423f884a7..390165ffbb0f 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -707,19 +707,6 @@ config ZSMALLOC returned by an alloc(). This handle must be mapped in order to access the allocated space. -config ZSMALLOC_PGTABLE_MAPPING - bool "Use page table mapping to access object in zsmalloc" - depends on ZSMALLOC=y - help - By default, zsmalloc uses a copy-based object mapping method to - access allocations that span two pages. However, if a particular - architecture (ex, ARM) performs VM mapping faster than copying, - then you should select this. This causes zsmalloc to use page table - mapping rather than copying for object mapping. - - You can check speed with zsmalloc benchmark: - https://github.com/spartacus06/zsmapbench - config ZSMALLOC_STAT bool "Export zsmalloc statistics" depends on ZSMALLOC diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 918c7b019b3d..cdfaaadea8ff 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -293,11 +293,7 @@ struct zspage { }; struct mapping_area { -#ifdef CONFIG_ZSMALLOC_PGTABLE_MAPPING - struct vm_struct *vm; /* vm area for mapping object that span pages */ -#else char *vm_buf; /* copy buffer for objects that span pages */ -#endif char *vm_addr; /* address of kmap_atomic()'ed pages */ enum zs_mapmode vm_mm; /* mapping mode */ }; @@ -1113,54 +1109,6 @@ static struct zspage *find_get_zspage(struct size_class *class) return zspage; } -#ifdef CONFIG_ZSMALLOC_PGTABLE_MAPPING -static inline int __zs_cpu_up(struct mapping_area *area) -{ - /* - * Make sure we don't leak memory if a cpu UP notification - * and zs_init() race and both call zs_cpu_up() on the same cpu - */ - if (area->vm) - return 0; - area->vm = get_vm_area(PAGE_SIZE * 2, 0); - if (!area->vm) - return -ENOMEM; - - /* - * Populate ptes in advance to avoid pte allocation with GFP_KERNEL - * in non-preemtible context of zs_map_object. - */ - return apply_to_page_range(&init_mm, (unsigned long)area->vm->addr, - PAGE_SIZE * 2, NULL, NULL); -} - -static inline void __zs_cpu_down(struct mapping_area *area) -{ - if (area->vm) - free_vm_area(area->vm); - area->vm = NULL; -} - -static inline void *__zs_map_object(struct mapping_area *area, - struct page *pages[2], int off, int size) -{ - unsigned long addr = (unsigned long)area->vm->addr; - - BUG_ON(map_kernel_range(addr, PAGE_SIZE * 2, PAGE_KERNEL, pages) < 0); - area->vm_addr = area->vm->addr; - return area->vm_addr + off; -} - -static inline void __zs_unmap_object(struct mapping_area *area, - struct page *pages[2], int off, int size) -{ - unsigned long addr = (unsigned long)area->vm_addr; - - unmap_kernel_range(addr, PAGE_SIZE * 2); -} - -#else /* CONFIG_ZSMALLOC_PGTABLE_MAPPING */ - static inline int __zs_cpu_up(struct mapping_area *area) { /* @@ -1241,8 +1189,6 @@ out: pagefault_enable(); } -#endif /* CONFIG_ZSMALLOC_PGTABLE_MAPPING */ - static int zs_cpu_prepare(unsigned int cpu) { struct mapping_area *area; From b11a76b37a5aa7b07c3e3eeeaae20b25475bddd3 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Sat, 5 Dec 2020 22:14:55 -0800 Subject: [PATCH 173/296] mm/swapfile: do not sleep with a spin lock held We can't call kvfree() with a spin lock held, so defer it. Fixes a might_sleep() runtime warning. Fixes: 873d7bcfd066 ("mm/swapfile.c: use kvzalloc for swap_info_struct allocation") Signed-off-by: Qian Cai Signed-off-by: Andrew Morton Reviewed-by: Andrew Morton Cc: Hugh Dickins Cc: Link: https://lkml.kernel.org/r/20201202151549.10350-1-qcai@redhat.com Signed-off-by: Linus Torvalds --- mm/swapfile.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mm/swapfile.c b/mm/swapfile.c index c4a613688a17..d58361109066 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -2867,6 +2867,7 @@ late_initcall(max_swapfiles_check); static struct swap_info_struct *alloc_swap_info(void) { struct swap_info_struct *p; + struct swap_info_struct *defer = NULL; unsigned int type; int i; @@ -2895,7 +2896,7 @@ static struct swap_info_struct *alloc_swap_info(void) smp_wmb(); WRITE_ONCE(nr_swapfiles, nr_swapfiles + 1); } else { - kvfree(p); + defer = p; p = swap_info[type]; /* * Do not memset this entry: a racing procfs swap_next() @@ -2908,6 +2909,7 @@ static struct swap_info_struct *alloc_swap_info(void) plist_node_init(&p->avail_lists[i], 0); p->flags = SWP_USED; spin_unlock(&swap_lock); + kvfree(defer); spin_lock_init(&p->lock); spin_lock_init(&p->cont_lock); From 4e60340c5ca560278c938726235bc0daa5fc8c7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sat, 5 Dec 2020 22:14:58 -0800 Subject: [PATCH 174/296] =?UTF-8?q?mailmap:=20add=20two=20more=20addresses?= =?UTF-8?q?=20of=20Uwe=20Kleine-K=C3=B6nig?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes attribution for the commits (among others) - d4097456cd1d ("video/framebuffer: move the probe func into .devinit.text in Blackfin LCD driver") - 0312e024d6cd ("mfd: mc13xxx: Add support for mc34708") Signed-off-by: Uwe Kleine-König Signed-off-by: Andrew Morton Link: https://lkml.kernel.org/r/20201127213358.3440830-1-u.kleine-koenig@pengutronix.de Signed-off-by: Linus Torvalds --- .mailmap | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.mailmap b/.mailmap index d9fb83d67055..225546cc8028 100644 --- a/.mailmap +++ b/.mailmap @@ -322,6 +322,8 @@ TripleX Chung Tsuneo Yoshioka Tycho Andersen Uwe Kleine-König +Uwe Kleine-König +Uwe Kleine-König Uwe Kleine-König Uwe Kleine-König Valdis Kletnieks From d8cbe8bfa7df3c680ddfd5e1eee3a5c86d8dc764 Mon Sep 17 00:00:00 2001 From: Xingxing Su Date: Sat, 5 Dec 2020 22:15:02 -0800 Subject: [PATCH 175/296] tools/testing/selftests/vm: fix build error Only x86 and PowerPC implement the pkey-xxx.h, and an error was reported when compiling protection_keys.c. Add a Arch judgment to compile "protection_keys" in the Makefile. If other arch implement this, add the arch name to the Makefile. eg: ifneq (,$(findstring $(ARCH),powerpc mips ... )) Following build errors: pkey-helpers.h:93:2: error: #error Architecture not supported #error Architecture not supported pkey-helpers.h:96:20: error: `PKEY_DISABLE_ACCESS' undeclared #define PKEY_MASK (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE) ^ protection_keys.c:218:45: error: `PKEY_DISABLE_WRITE' undeclared pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)); ^ Signed-off-by: Xingxing Su Signed-off-by: Andrew Morton Cc: Shuah Khan Cc: Sandipan Das Cc: John Hubbard Cc: Dave Hansen Cc: "Kirill A. Shutemov" Cc: Brian Geffon Cc: Mina Almasry Link: https://lkml.kernel.org/r/1606826876-30656-1-git-send-email-suxingxing@loongson.cn Signed-off-by: Linus Torvalds --- tools/testing/selftests/vm/Makefile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index 30873b19d04b..691893afc15d 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -60,9 +60,13 @@ ifeq ($(CAN_BUILD_X86_64),1) TEST_GEN_FILES += $(BINARIES_64) endif else + +ifneq (,$(findstring $(ARCH),powerpc)) TEST_GEN_FILES += protection_keys endif +endif + ifneq (,$(filter $(MACHINE),arm64 ia64 mips64 parisc64 ppc64 ppc64le riscv64 s390x sh64 sparc64 x86_64)) TEST_GEN_FILES += va_128TBswitch TEST_GEN_FILES += virtual_address_range From 573a259336f8c57739bdaf035aa7abbae7d9a713 Mon Sep 17 00:00:00 2001 From: Axel Rasmussen Date: Sat, 5 Dec 2020 22:15:05 -0800 Subject: [PATCH 176/296] userfaultfd: selftests: fix SIGSEGV if huge mmap fails The error handling in hugetlb_allocate_area() was incorrect for the hugetlb_shared test case. Previously the behavior was: - mmap a hugetlb area - If this fails, set the pointer to NULL, and carry on - mmap an alias of the same hugetlb fd - If this fails, munmap the original area If the original mmap failed, it's likely the second one did too. If both failed, we'd blindly try to munmap a NULL pointer, causing a SIGSEGV. Instead, "goto fail" so we return before trying to mmap the alias. This issue can be hit "in real life" by forgetting to set /proc/sys/vm/nr_hugepages (leaving it at 0), and then trying to run the hugetlb_shared test. Another small improvement is, when the original mmap fails, don't just print "it failed": perror(), so we can see *why*. :) Signed-off-by: Axel Rasmussen Signed-off-by: Andrew Morton Cc: Shuah Khan Cc: Peter Xu Cc: Joe Perches Cc: Mike Rapoport Cc: Andrea Arcangeli Cc: David Alan Gilbert Link: https://lkml.kernel.org/r/20201204203443.2714693-1-axelrasmussen@google.com Signed-off-by: Linus Torvalds --- tools/testing/selftests/vm/userfaultfd.c | 25 +++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index 9b0912a01777..c4425597769a 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -206,19 +206,19 @@ static int hugetlb_release_pages(char *rel_area) return ret; } - static void hugetlb_allocate_area(void **alloc_area) { void *area_alias = NULL; char **alloc_area_alias; + *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE, (map_shared ? MAP_SHARED : MAP_PRIVATE) | MAP_HUGETLB, huge_fd, *alloc_area == area_src ? 0 : nr_pages * page_size); if (*alloc_area == MAP_FAILED) { - fprintf(stderr, "mmap of hugetlbfs file failed\n"); - *alloc_area = NULL; + perror("mmap of hugetlbfs file failed"); + goto fail; } if (map_shared) { @@ -227,14 +227,11 @@ static void hugetlb_allocate_area(void **alloc_area) huge_fd, *alloc_area == area_src ? 0 : nr_pages * page_size); if (area_alias == MAP_FAILED) { - if (munmap(*alloc_area, nr_pages * page_size) < 0) { - perror("hugetlb munmap"); - exit(1); - } - *alloc_area = NULL; - return; + perror("mmap of hugetlb file alias failed"); + goto fail_munmap; } } + if (*alloc_area == area_src) { huge_fd_off0 = *alloc_area; alloc_area_alias = &area_src_alias; @@ -243,6 +240,16 @@ static void hugetlb_allocate_area(void **alloc_area) } if (area_alias) *alloc_area_alias = area_alias; + + return; + +fail_munmap: + if (munmap(*alloc_area, nr_pages * page_size) < 0) { + perror("hugetlb munmap"); + exit(1); + } +fail: + *alloc_area = NULL; } static void hugetlb_alias_mapping(__u64 *start, size_t len, unsigned long offset) From 3351b16af4946fff0d46481d155fb91adb28b1f9 Mon Sep 17 00:00:00 2001 From: Alex Shi Date: Sat, 5 Dec 2020 22:15:09 -0800 Subject: [PATCH 177/296] mm/filemap: add static for function __add_to_page_cache_locked mm/filemap.c:830:14: warning: no previous prototype for `__add_to_page_cache_locked' [-Wmissing-prototypes] Signed-off-by: Alex Shi Signed-off-by: Andrew Morton Cc: Souptick Joarder Link: https://lkml.kernel.org/r/1604661895-5495-1-git-send-email-alex.shi@linux.alibaba.com Signed-off-by: Linus Torvalds --- mm/filemap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/filemap.c b/mm/filemap.c index 0b2067b3c328..331f4261d723 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -827,7 +827,7 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask) } EXPORT_SYMBOL_GPL(replace_page_cache_page); -noinline int __add_to_page_cache_locked(struct page *page, +static noinline int __add_to_page_cache_locked(struct page *page, struct address_space *mapping, pgoff_t offset, gfp_t gfp, void **shadowp) From 7a5bde37983d37783161681ff7c6122dfd081791 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Sat, 5 Dec 2020 22:15:12 -0800 Subject: [PATCH 178/296] hugetlb_cgroup: fix offline of hugetlb cgroup with reservations Adrian Moreno was ruuning a kubernetes 1.19 + containerd/docker workload using hugetlbfs. In this environment the issue is reproduced by: - Start a simple pod that uses the recently added HugePages medium feature (pod yaml attached) - Start a DPDK app. It doesn't need to run successfully (as in transfer packets) nor interact with real hardware. It seems just initializing the EAL layer (which handles hugepage reservation and locking) is enough to trigger the issue - Delete the Pod (or let it "Complete"). This would result in a kworker thread going into a tight loop (top output): 1425 root 20 0 0 0 0 R 99.7 0.0 5:22.45 kworker/28:7+cgroup_destroy 'perf top -g' reports: - 63.28% 0.01% [kernel] [k] worker_thread - 49.97% worker_thread - 52.64% process_one_work - 62.08% css_killed_work_fn - hugetlb_cgroup_css_offline 41.52% _raw_spin_lock - 2.82% _cond_resched rcu_all_qs 2.66% PageHuge - 0.57% schedule - 0.57% __schedule We are spinning in the do-while loop in hugetlb_cgroup_css_offline. Worse yet, we are holding the master cgroup lock (cgroup_mutex) while infinitely spinning. Little else can be done on the system as the cgroup_mutex can not be acquired. Do note that the issue can be reproduced by simply offlining a hugetlb cgroup containing pages with reservation counts. The loop in hugetlb_cgroup_css_offline is moving page counts from the cgroup being offlined to the parent cgroup. This is done for each hstate, and is repeated until hugetlb_cgroup_have_usage returns false. The routine moving counts (hugetlb_cgroup_move_parent) is only moving 'usage' counts. The routine hugetlb_cgroup_have_usage is checking for both 'usage' and 'reservation' counts. Discussion about what to do with reservation counts when reparenting was discussed here: https://lore.kernel.org/linux-kselftest/CAHS8izMFAYTgxym-Hzb_JmkTK1N_S9tGN71uS6MFV+R7swYu5A@mail.gmail.com/ The decision was made to leave a zombie cgroup for with reservation counts. Unfortunately, the code checking reservation counts was incorrectly added to hugetlb_cgroup_have_usage. To fix the issue, simply remove the check for reservation counts. While fixing this issue, a related bug in hugetlb_cgroup_css_offline was noticed. The hstate index is not reinitialized each time through the do-while loop. Fix this as well. Fixes: 1adc4d419aa2 ("hugetlb_cgroup: add interface for charge/uncharge hugetlb reservations") Reported-by: Adrian Moreno Signed-off-by: Mike Kravetz Signed-off-by: Andrew Morton Tested-by: Adrian Moreno Reviewed-by: Shakeel Butt Cc: Mina Almasry Cc: David Rientjes Cc: Greg Thelen Cc: Sandipan Das Cc: Shuah Khan Cc: Link: https://lkml.kernel.org/r/20201203220242.158165-1-mike.kravetz@oracle.com Signed-off-by: Linus Torvalds --- mm/hugetlb_cgroup.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c index 1f87aec9ab5c..9182848dda3e 100644 --- a/mm/hugetlb_cgroup.c +++ b/mm/hugetlb_cgroup.c @@ -82,11 +82,8 @@ static inline bool hugetlb_cgroup_have_usage(struct hugetlb_cgroup *h_cg) for (idx = 0; idx < hugetlb_max_hstate; idx++) { if (page_counter_read( - hugetlb_cgroup_counter_from_cgroup(h_cg, idx)) || - page_counter_read(hugetlb_cgroup_counter_from_cgroup_rsvd( - h_cg, idx))) { + hugetlb_cgroup_counter_from_cgroup(h_cg, idx))) return true; - } } return false; } @@ -202,9 +199,10 @@ static void hugetlb_cgroup_css_offline(struct cgroup_subsys_state *css) struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css); struct hstate *h; struct page *page; - int idx = 0; + int idx; do { + idx = 0; for_each_hstate(h) { spin_lock(&hugetlb_lock); list_for_each_entry(page, &h->hugepage_activelist, lru) From 309d08d9b3a3659ab3f239d27d4e38b670b08fc9 Mon Sep 17 00:00:00 2001 From: Liu Zixian Date: Sat, 5 Dec 2020 22:15:15 -0800 Subject: [PATCH 179/296] mm/mmap.c: fix mmap return value when vma is merged after call_mmap() On success, mmap should return the begin address of newly mapped area, but patch "mm: mmap: merge vma after call_mmap() if possible" set vm_start of newly merged vma to return value addr. Users of mmap will get wrong address if vma is merged after call_mmap(). We fix this by moving the assignment to addr before merging vma. We have a driver which changes vm_flags, and this bug is found by our testcases. Fixes: d70cec898324 ("mm: mmap: merge vma after call_mmap() if possible") Signed-off-by: Liu Zixian Signed-off-by: Andrew Morton Reviewed-by: Jason Gunthorpe Reviewed-by: David Hildenbrand Cc: Miaohe Lin Cc: Hongxiang Lou Cc: Hu Shiyuan Cc: Matthew Wilcox Link: https://lkml.kernel.org/r/20201203085350.22624-1-liuzixian4@huawei.com Signed-off-by: Linus Torvalds --- mm/mmap.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/mm/mmap.c b/mm/mmap.c index d91ecb00d38c..5c8b4485860d 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1808,6 +1808,17 @@ unsigned long mmap_region(struct file *file, unsigned long addr, if (error) goto unmap_and_free_vma; + /* Can addr have changed?? + * + * Answer: Yes, several device drivers can do it in their + * f_op->mmap method. -DaveM + * Bug: If addr is changed, prev, rb_link, rb_parent should + * be updated for vma_link() + */ + WARN_ON_ONCE(addr != vma->vm_start); + + addr = vma->vm_start; + /* If vm_flags changed after call_mmap(), we should try merge vma again * as we may succeed this time. */ @@ -1822,25 +1833,12 @@ unsigned long mmap_region(struct file *file, unsigned long addr, fput(vma->vm_file); vm_area_free(vma); vma = merge; - /* Update vm_flags and possible addr to pick up the change. We don't - * warn here if addr changed as the vma is not linked by vma_link(). - */ - addr = vma->vm_start; + /* Update vm_flags to pick up the change. */ vm_flags = vma->vm_flags; goto unmap_writable; } } - /* Can addr have changed?? - * - * Answer: Yes, several device drivers can do it in their - * f_op->mmap method. -DaveM - * Bug: If addr is changed, prev, rb_link, rb_parent should - * be updated for vma_link() - */ - WARN_ON_ONCE(addr != vma->vm_start); - - addr = vma->vm_start; vm_flags = vma->vm_flags; } else if (vm_flags & VM_SHARED) { error = shmem_zero_setup(vma); From 0477e92881850d44910a7e94fc2c46f96faa131f Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 6 Dec 2020 14:25:12 -0800 Subject: [PATCH 180/296] Linux 5.10-rc7 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index a2ded5029084..9ec53d947628 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 10 SUBLEVEL = 0 -EXTRAVERSION = -rc6 +EXTRAVERSION = -rc7 NAME = Kleptomaniac Octopus # *DOCUMENTATION* From 61f54de2e9194f01874d5eda12037b0978e77519 Mon Sep 17 00:00:00 2001 From: Huazhong Tan Date: Mon, 7 Dec 2020 15:20:25 +0800 Subject: [PATCH 181/296] net: hns3: remove a misused pragma packed hclge_dbg_reg_info[] is defined as an array of packed structure accidentally. However, this array contains pointers, which are no longer aligned naturally, and cannot be relocated on PPC64. Hence, when compile-testing this driver on PPC64 with CONFIG_RELOCATABLE=y (e.g. PowerPC allyesconfig), there will be some warnings. Since each field in structure hclge_qos_pri_map_cmd and hclge_dbg_bitmap_cmd is type u8, the pragma packed is unnecessary for these two structures as well, so remove the pragma packed in hclge_debugfs.h to fix this issue, and this increases hclge_dbg_reg_info[] by 4 bytes per entry. Fixes: a582b78dfc33 ("net: hns3: code optimization for debugfs related to "dump reg"") Reported-by: Stephen Rothwell Signed-off-by: Huazhong Tan Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h index a9066e6ff697..ca2ab6cf84d9 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h @@ -35,8 +35,6 @@ #define HCLGE_DBG_DFX_SSU_2_OFFSET 12 -#pragma pack(1) - struct hclge_qos_pri_map_cmd { u8 pri0_tc : 4, pri1_tc : 4; @@ -85,8 +83,6 @@ struct hclge_dbg_reg_type_info { struct hclge_dbg_reg_common_msg reg_msg; }; -#pragma pack() - static const struct hclge_dbg_dfx_message hclge_dbg_bios_common_reg[] = { {false, "Reserved"}, {true, "BP_CPU_STATE"}, From 10c678bd0a035ac2c64a9b26b222f20556227a53 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 7 Dec 2020 15:55:40 +0800 Subject: [PATCH 182/296] udp: fix the proto value passed to ip_protocol_deliver_rcu for the segments Guillaume noticed that: for segments udp_queue_rcv_one_skb() returns the proto, and it should pass "ret" unmodified to ip_protocol_deliver_rcu(). Otherwize, with a negtive value passed, it will underflow inet_protos. This can be reproduced with IPIP FOU: # ip fou add port 5555 ipproto 4 # ethtool -K eth1 rx-gro-list on Fixes: cf329aa42b66 ("udp: cope with UDP GRO packet misdirection") Reported-by: Guillaume Nault Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/ipv4/udp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 09f0a23d1a01..9eeebd4a0054 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2173,7 +2173,7 @@ static int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) __skb_pull(skb, skb_transport_offset(skb)); ret = udp_queue_rcv_one_skb(sk, skb); if (ret > 0) - ip_protocol_deliver_rcu(dev_net(skb->dev), skb, -ret); + ip_protocol_deliver_rcu(dev_net(skb->dev), skb, ret); } return 0; } From 4165bf015ba9454f45beaad621d16c516d5c5afe Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Mon, 7 Dec 2020 03:19:20 -0600 Subject: [PATCH 183/296] iommu/amd: Set DTE[IntTabLen] to represent 512 IRTEs According to the AMD IOMMU spec, the commit 73db2fc595f3 ("iommu/amd: Increase interrupt remapping table limit to 512 entries") also requires the interrupt table length (IntTabLen) to be set to 9 (power of 2) in the device table mapping entry (DTE). Fixes: 73db2fc595f3 ("iommu/amd: Increase interrupt remapping table limit to 512 entries") Reported-by: Jerry Snitselaar Signed-off-by: Suravee Suthikulpanit Reviewed-by: Jerry Snitselaar Link: https://lore.kernel.org/r/20201207091920.3052-1-suravee.suthikulpanit@amd.com Signed-off-by: Will Deacon --- drivers/iommu/amd/amd_iommu_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 89647700bab2..494b42a31b7a 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -257,7 +257,7 @@ #define DTE_IRQ_REMAP_INTCTL_MASK (0x3ULL << 60) #define DTE_IRQ_TABLE_LEN_MASK (0xfULL << 1) #define DTE_IRQ_REMAP_INTCTL (2ULL << 60) -#define DTE_IRQ_TABLE_LEN (8ULL << 1) +#define DTE_IRQ_TABLE_LEN (9ULL << 1) #define DTE_IRQ_REMAP_ENABLE 1ULL #define PAGE_MODE_NONE 0x00 From 9280f726097b436c8c907825131cd346d7eb0c0f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 4 Dec 2020 00:18:40 +0100 Subject: [PATCH 184/296] ARM: keystone: remove SECTION_SIZE_BITS/MAX_PHYSMEM_BITS These definitions are evidently left over from the days when sparsemem settings were platform specific. This was no longer the case when the platform got merged. There was no warning in the past, but now the asm/sparsemem.h header ends up being included indirectly, causing this warning: In file included from /git/arm-soc/arch/arm/mach-keystone/keystone.c:24: arch/arm/mach-keystone/memory.h:10:9: warning: 'SECTION_SIZE_BITS' macro redefined [-Wmacro-redefined] #define SECTION_SIZE_BITS 34 ^ arch/arm/include/asm/sparsemem.h:23:9: note: previous definition is here #define SECTION_SIZE_BITS 28 ^ Clearly the definitions never had any effect here, so remove them. Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20201203231847.1484900-1-arnd@kernel.org' Signed-off-by: Arnd Bergmann --- arch/arm/mach-keystone/memory.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/arm/mach-keystone/memory.h b/arch/arm/mach-keystone/memory.h index 9147565d0581..1b9ed1271e05 100644 --- a/arch/arm/mach-keystone/memory.h +++ b/arch/arm/mach-keystone/memory.h @@ -6,9 +6,6 @@ #ifndef __MEMORY_H #define __MEMORY_H -#define MAX_PHYSMEM_BITS 36 -#define SECTION_SIZE_BITS 34 - #define KEYSTONE_LOW_PHYS_START 0x80000000ULL #define KEYSTONE_LOW_PHYS_SIZE 0x80000000ULL /* 2G */ #define KEYSTONE_LOW_PHYS_END (KEYSTONE_LOW_PHYS_START + \ From c99055ec2c3974386f36eb648af77a8dbe887ca9 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 1 Dec 2020 23:15:15 +0200 Subject: [PATCH 185/296] MAINTAINERS: correct SoC Git address (formerly: arm-soc) The SoC Git was moved from arm/arm-soc.git to soc/soc.git. Correct the ARM Sub-architectures entry. Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20201201211516.24921-1-krzk@kernel.org' Signed-off-by: Arnd Bergmann --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 6f474153dbec..715c84c25fb5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1489,7 +1489,7 @@ F: drivers/iommu/io-pgtable-arm* ARM SUB-ARCHITECTURES L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained -T: git git://git.kernel.org/pub/scm/linux/kernel/git/arm/arm-soc.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/soc/soc.git F: arch/arm/mach-*/ F: arch/arm/plat-*/ From db2082700a0c7974c3a7787d50abff34b2695b4b Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 1 Dec 2020 23:15:16 +0200 Subject: [PATCH 186/296] MAINTAINERS: add a limited ARM and ARM64 SoC entry It is expected for ARM and ARM64 SoC related code to go through sub-architecture maintainers. Their addresses were therefore not documented to push patch traffic through sub-architecture maintainers. However when patches touch generic code, e.g. multi_v7_defconfig, the patch might not be picked up by them and instead should go to the SoC maintainers - Arnd and Olof. Add a minimal maintainer's entry for SoC covering only Makefile, so it will not appear on most of submissions (except new devicetree boards). It will though serve as a documentation and reference for cases when submitter does not know where to send his SoC-related patches. Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20201201211516.24921-2-krzk@kernel.org' Signed-off-by: Arnd Bergmann --- MAINTAINERS | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 715c84c25fb5..52086876ce40 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1486,6 +1486,16 @@ F: Documentation/devicetree/bindings/iommu/arm,smmu* F: drivers/iommu/arm/ F: drivers/iommu/io-pgtable-arm* +ARM AND ARM64 SoC SUB-ARCHITECTURES (COMMON PARTS) +M: Arnd Bergmann +M: Olof Johansson +M: soc@kernel.org +L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) +S: Maintained +T: git git://git.kernel.org/pub/scm/linux/kernel/git/soc/soc.git +F: arch/arm/boot/dts/Makefile +F: arch/arm64/boot/dts/Makefile + ARM SUB-ARCHITECTURES L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained From d9054a1ff585ba01029584ab730efc794603d68f Mon Sep 17 00:00:00 2001 From: Dongdong Wang Date: Fri, 4 Dec 2020 23:59:45 -0800 Subject: [PATCH 187/296] lwt: Disable BH too in run_lwt_bpf() The per-cpu bpf_redirect_info is shared among all skb_do_redirect() and BPF redirect helpers. Callers on RX path are all in BH context, disabling preemption is not sufficient to prevent BH interruption. In production, we observed strange packet drops because of the race condition between LWT xmit and TC ingress, and we verified this issue is fixed after we disable BH. Although this bug was technically introduced from the beginning, that is commit 3a0af8fd61f9 ("bpf: BPF for lightweight tunnel infrastructure"), at that time call_rcu() had to be call_rcu_bh() to match the RCU context. So this patch may not work well before RCU flavor consolidation has been completed around v5.0. Update the comments above the code too, as call_rcu() is now BH friendly. Signed-off-by: Dongdong Wang Signed-off-by: Alexei Starovoitov Reviewed-by: Cong Wang Link: https://lore.kernel.org/bpf/20201205075946.497763-1-xiyou.wangcong@gmail.com --- net/core/lwt_bpf.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c index 7d3438215f32..4f3cb7c15ddf 100644 --- a/net/core/lwt_bpf.c +++ b/net/core/lwt_bpf.c @@ -39,12 +39,11 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt, { int ret; - /* Preempt disable is needed to protect per-cpu redirect_info between - * BPF prog and skb_do_redirect(). The call_rcu in bpf_prog_put() and - * access to maps strictly require a rcu_read_lock() for protection, - * mixing with BH RCU lock doesn't work. + /* Preempt disable and BH disable are needed to protect per-cpu + * redirect_info between BPF prog and skb_do_redirect(). */ preempt_disable(); + local_bh_disable(); bpf_compute_data_pointers(skb); ret = bpf_prog_run_save_cb(lwt->prog, skb); @@ -78,6 +77,7 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt, break; } + local_bh_enable(); preempt_enable(); return ret; From e3366884b383073a7edc1bad9634412ae0a22d4e Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Fri, 4 Dec 2020 23:59:46 -0800 Subject: [PATCH 188/296] lwt_bpf: Replace preempt_disable() with migrate_disable() migrate_disable() is just a wrapper for preempt_disable() in non-RT kernel. It is safe to replace it, and RT kernel will benefit. Note that it is introduced since Feb 2020. Suggested-by: Alexei Starovoitov Signed-off-by: Cong Wang Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20201205075946.497763-2-xiyou.wangcong@gmail.com --- net/core/lwt_bpf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c index 4f3cb7c15ddf..2f7940bcf715 100644 --- a/net/core/lwt_bpf.c +++ b/net/core/lwt_bpf.c @@ -39,10 +39,10 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt, { int ret; - /* Preempt disable and BH disable are needed to protect per-cpu + /* Migration disable and BH disable are needed to protect per-cpu * redirect_info between BPF prog and skb_do_redirect(). */ - preempt_disable(); + migrate_disable(); local_bh_disable(); bpf_compute_data_pointers(skb); ret = bpf_prog_run_save_cb(lwt->prog, skb); @@ -78,7 +78,7 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt, } local_bh_enable(); - preempt_enable(); + migrate_enable(); return ret; } From e432c04c17993011b2a2f59dcb5738e604bd552e Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Sun, 6 Dec 2020 17:32:38 +0200 Subject: [PATCH 189/296] RDMA/core: Fix empty gid table for non IB/RoCE devices The query_gid_table ioctl skips non IB/RoCE ports, which as a result returns an empty gid table for devices such as EFA which have a GID table, but are not IB/RoCE. Fixes: c4b4d548fabc ("RDMA/core: Introduce new GID table query API") Link: https://lore.kernel.org/r/20201206153238.34878-1-galpress@amazon.com Signed-off-by: Gal Pressman Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cache.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 8017c40dd110..7989b7e1d1c0 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -1269,9 +1269,6 @@ ssize_t rdma_query_gid_table(struct ib_device *device, unsigned long flags; rdma_for_each_port(device, port_num) { - if (!rdma_ib_or_roce(device, port_num)) - continue; - table = rdma_gid_table(device, port_num); read_lock_irqsave(&table->rwlock, flags); for (i = 0; i < table->sz; i++) { From 6247e31b75308c51476e157c9964823aeefbf5dc Mon Sep 17 00:00:00 2001 From: Dong Aisheng Date: Mon, 30 Nov 2020 16:46:24 +0800 Subject: [PATCH 190/296] clk: imx: scu: fix MXC_CLK_SCU module build break This issue can be reproduced by having a kernel config with CONFIG_IMX_MBOX=m and CONFIG_MXC_CLK_SCU=m. It's caused by the Makefile wanting to build clk-scu.o and clk-imx8qxp.o as different targets but that doesn't work (e.g. MXC_CLK_SCU = y while CLK_IMX8QXP = n) "obj-$(CONFIG_MXC_CLK_SCU) += clk-imx-scu.o clk-imx-lpcg-scu.o clk-imx-scu-$(CONFIG_CLK_IMX8QXP) += clk-scu.o clk-imx8qxp.o" Having MXC_CLK_SCU=y/m while CLK_IMX8QXP=n will cause a linker problem like below: LD [M] drivers/clk/imx/clk-imx-scu.o arm-poky-linux-gnueabi-ld: no input files Make MXC_CLK_SCU be un-selectable by users so it can only be selected by the CLK_IMX8QXP option, ensuring the two symbols are built together. Drop COMPILE_TEST too because this option isn't selectable anymore. We can remove it from MXC_CLK_SCU because CLK_IMX8QXP selects MXC_CLK_SCU which already has COMPILE_TEST. Fixes: e0d0d4d86c766 ("clk: imx8qxp: Support building i.MX8QXP clock driver as module") Acked-by: Sebastian Andrzej Siewior Signed-off-by: Dong Aisheng Link: https://lore.kernel.org/r/20201130084624.21113-1-aisheng.dong@nxp.com [sboyd@kernel.org: Rework commit text] Signed-off-by: Stephen Boyd --- drivers/clk/imx/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clk/imx/Kconfig b/drivers/clk/imx/Kconfig index 3b393cb07295..3061896503f3 100644 --- a/drivers/clk/imx/Kconfig +++ b/drivers/clk/imx/Kconfig @@ -5,8 +5,8 @@ config MXC_CLK depends on ARCH_MXC || COMPILE_TEST config MXC_CLK_SCU - tristate "IMX SCU clock" - depends on ARCH_MXC || COMPILE_TEST + tristate + depends on ARCH_MXC depends on IMX_SCU && HAVE_ARM_SMCCC config CLK_IMX1 From ceabbf94c317c6175dee6e91805fca4a6353745a Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 30 Nov 2020 09:57:43 +0100 Subject: [PATCH 191/296] clk: renesas: r9a06g032: Drop __packed for portability The R9A06G032 clock driver uses an array of packed structures to reduce kernel size. However, this array contains pointers, which are no longer aligned naturally, and cannot be relocated on PPC64. Hence when compile-testing this driver on PPC64 with CONFIG_RELOCATABLE=y (e.g. PowerPC allyesconfig), the following warnings are produced: WARNING: 136 bad relocations c000000000616be3 R_PPC64_UADDR64 .rodata+0x00000000000cf338 c000000000616bfe R_PPC64_UADDR64 .rodata+0x00000000000cf370 ... Fix this by dropping the __packed attribute from the r9a06g032_clkdesc definition, trading a small size increase for portability. This increases the 156-entry clock table by 1 byte per entry, but due to the compiler generating more efficient code for unpacked accesses, the net size increase is only 76 bytes (gcc 9.3.0 on arm32). Reported-by: Stephen Rothwell Fixes: 4c3d88526eba2143 ("clk: renesas: Renesas R9A06G032 clock driver") Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20201130085743.1656317-1-geert+renesas@glider.be Tested-by: Stephen Rothwell # PowerPC allyesconfig build Acked-by: Stephen Boyd Signed-off-by: Stephen Boyd --- drivers/clk/renesas/r9a06g032-clocks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/renesas/r9a06g032-clocks.c b/drivers/clk/renesas/r9a06g032-clocks.c index d900f6bf53d0..892e91b92f2c 100644 --- a/drivers/clk/renesas/r9a06g032-clocks.c +++ b/drivers/clk/renesas/r9a06g032-clocks.c @@ -55,7 +55,7 @@ struct r9a06g032_clkdesc { u16 sel, g1, r1, g2, r2; } dual; }; -} __packed; +}; #define I_GATE(_clk, _rst, _rdy, _midle, _scon, _mirack, _mistat) \ { .gate = _clk, .reset = _rst, \ From 5eedf9fe8db23313df104576845cec5f481b9b60 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 7 Dec 2020 16:58:01 +0000 Subject: [PATCH 192/296] powerpc/mm: Fix KUAP warning by providing copy_from_kernel_nofault_allowed() Since commit c33165253492 ("powerpc: use non-set_fs based maccess routines"), userspace access is not granted anymore when using copy_from_kernel_nofault() However, kthread_probe_data() uses copy_from_kernel_nofault() to check validity of pointers. When the pointer is NULL, it points to userspace, leading to a KUAP fault and triggering the following big hammer warning many times when you request a sysrq "show task": [ 1117.202054] ------------[ cut here ]------------ [ 1117.202102] Bug: fault blocked by AP register ! [ 1117.202261] WARNING: CPU: 0 PID: 377 at arch/powerpc/include/asm/nohash/32/kup-8xx.h:66 do_page_fault+0x4a8/0x5ec [ 1117.202310] Modules linked in: [ 1117.202428] CPU: 0 PID: 377 Comm: sh Tainted: G W 5.10.0-rc5-01340-g83f53be2de31-dirty #4175 [ 1117.202499] NIP: c0012048 LR: c0012048 CTR: 00000000 [ 1117.202573] REGS: cacdbb88 TRAP: 0700 Tainted: G W (5.10.0-rc5-01340-g83f53be2de31-dirty) [ 1117.202625] MSR: 00021032 CR: 24082222 XER: 20000000 [ 1117.202899] [ 1117.202899] GPR00: c0012048 cacdbc40 c2929290 00000023 c092e554 00000001 c09865e8 c092e640 [ 1117.202899] GPR08: 00001032 00000000 00000000 00014efc 28082224 100d166a 100a0920 00000000 [ 1117.202899] GPR16: 100cac0c 100b0000 1080c3fc 1080d685 100d0000 100d0000 00000000 100a0900 [ 1117.202899] GPR24: 100d0000 c07892ec 00000000 c0921510 c21f4440 0000005c c0000000 cacdbc80 [ 1117.204362] NIP [c0012048] do_page_fault+0x4a8/0x5ec [ 1117.204461] LR [c0012048] do_page_fault+0x4a8/0x5ec [ 1117.204509] Call Trace: [ 1117.204609] [cacdbc40] [c0012048] do_page_fault+0x4a8/0x5ec (unreliable) [ 1117.204771] [cacdbc70] [c00112f0] handle_page_fault+0x8/0x34 [ 1117.204911] --- interrupt: 301 at copy_from_kernel_nofault+0x70/0x1c0 [ 1117.204979] NIP: c010dbec LR: c010dbac CTR: 00000001 [ 1117.205053] REGS: cacdbc80 TRAP: 0301 Tainted: G W (5.10.0-rc5-01340-g83f53be2de31-dirty) [ 1117.205104] MSR: 00009032 CR: 28082224 XER: 00000000 [ 1117.205416] DAR: 0000005c DSISR: c0000000 [ 1117.205416] GPR00: c0045948 cacdbd38 c2929290 00000001 00000017 00000017 00000027 0000000f [ 1117.205416] GPR08: c09926ec 00000000 00000000 3ffff000 24082224 [ 1117.206106] NIP [c010dbec] copy_from_kernel_nofault+0x70/0x1c0 [ 1117.206202] LR [c010dbac] copy_from_kernel_nofault+0x30/0x1c0 [ 1117.206258] --- interrupt: 301 [ 1117.206372] [cacdbd38] [c004bbb0] kthread_probe_data+0x44/0x70 (unreliable) [ 1117.206561] [cacdbd58] [c0045948] print_worker_info+0xe0/0x194 [ 1117.206717] [cacdbdb8] [c00548ac] sched_show_task+0x134/0x168 [ 1117.206851] [cacdbdd8] [c005a268] show_state_filter+0x70/0x100 [ 1117.206989] [cacdbe08] [c039baa0] sysrq_handle_showstate+0x14/0x24 [ 1117.207122] [cacdbe18] [c039bf18] __handle_sysrq+0xac/0x1d0 [ 1117.207257] [cacdbe48] [c039c0c0] write_sysrq_trigger+0x4c/0x74 [ 1117.207407] [cacdbe68] [c01fba48] proc_reg_write+0xb4/0x114 [ 1117.207550] [cacdbe88] [c0179968] vfs_write+0x12c/0x478 [ 1117.207686] [cacdbf08] [c0179e60] ksys_write+0x78/0x128 [ 1117.207826] [cacdbf38] [c00110d0] ret_from_syscall+0x0/0x34 [ 1117.207938] --- interrupt: c01 at 0xfd4e784 [ 1117.208008] NIP: 0fd4e784 LR: 0fe0f244 CTR: 10048d38 [ 1117.208083] REGS: cacdbf48 TRAP: 0c01 Tainted: G W (5.10.0-rc5-01340-g83f53be2de31-dirty) [ 1117.208134] MSR: 0000d032 CR: 44002222 XER: 00000000 [ 1117.208470] [ 1117.208470] GPR00: 00000004 7fc34090 77bfb4e0 00000001 1080fa40 00000002 7400000f fefefeff [ 1117.208470] GPR08: 7f7f7f7f 10048d38 1080c414 7fc343c0 00000000 [ 1117.209104] NIP [0fd4e784] 0xfd4e784 [ 1117.209180] LR [0fe0f244] 0xfe0f244 [ 1117.209236] --- interrupt: c01 [ 1117.209274] Instruction dump: [ 1117.209353] 714a4000 418200f0 73ca0001 40820084 73ca0032 408200f8 73c90040 4082ff60 [ 1117.209727] 0fe00000 3c60c082 386399f4 48013b65 <0fe00000> 80010034 3860000b 7c0803a6 [ 1117.210102] ---[ end trace 1927c0323393af3e ]--- To avoid that, copy_from_kernel_nofault_allowed() is used to check whether the address is a valid kernel address. But the default version of it returns true for any address. Provide a powerpc version of copy_from_kernel_nofault_allowed() that returns false when the address is below TASK_USER_MAX, so that copy_from_kernel_nofault() will return -ERANGE. Fixes: c33165253492 ("powerpc: use non-set_fs based maccess routines") Reported-by: Qian Cai Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/18bcb456d32a3e74f5ae241fd6f1580c092d07f5.1607360230.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/Makefile | 2 +- arch/powerpc/mm/maccess.c | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) create mode 100644 arch/powerpc/mm/maccess.c diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 5e147986400d..55b4a8bd408a 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -5,7 +5,7 @@ ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) -obj-y := fault.o mem.o pgtable.o mmap.o \ +obj-y := fault.o mem.o pgtable.o mmap.o maccess.o \ init_$(BITS).o pgtable_$(BITS).o \ pgtable-frag.o ioremap.o ioremap_$(BITS).o \ init-common.o mmu_context.o drmem.o diff --git a/arch/powerpc/mm/maccess.c b/arch/powerpc/mm/maccess.c new file mode 100644 index 000000000000..fa9a7a718fc6 --- /dev/null +++ b/arch/powerpc/mm/maccess.c @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include +#include + +bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size) +{ + return is_kernel_addr((unsigned long)unsafe_src); +} From eb96b686fc2c601e78903cc61b6cf4588ddde013 Mon Sep 17 00:00:00 2001 From: Claudiu Manoil Date: Fri, 4 Dec 2020 19:15:05 +0200 Subject: [PATCH 193/296] enetc: Fix reporting of h/w packet counters Noticed some inconsistencies in packet statistics reporting. This patch adds the missing Tx packet counter registers to ethtool reporting and fixes the information strings for a few of them. Fixes: 16eb4c85c964 ("enetc: Add ethtool statistics") Signed-off-by: Claudiu Manoil Link: https://lore.kernel.org/r/20201204171505.21389-1-claudiu.manoil@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/enetc/enetc_ethtool.c | 10 +++++++--- drivers/net/ethernet/freescale/enetc/enetc_hw.h | 10 +++++++--- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c index 8ed1ebd5a183..89e558135432 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c @@ -143,8 +143,8 @@ static const struct { { ENETC_PM0_R255, "MAC rx 128-255 byte packets" }, { ENETC_PM0_R511, "MAC rx 256-511 byte packets" }, { ENETC_PM0_R1023, "MAC rx 512-1023 byte packets" }, - { ENETC_PM0_R1518, "MAC rx 1024-1518 byte packets" }, - { ENETC_PM0_R1519X, "MAC rx 1519 to max-octet packets" }, + { ENETC_PM0_R1522, "MAC rx 1024-1522 byte packets" }, + { ENETC_PM0_R1523X, "MAC rx 1523 to max-octet packets" }, { ENETC_PM0_ROVR, "MAC rx oversized packets" }, { ENETC_PM0_RJBR, "MAC rx jabber packets" }, { ENETC_PM0_RFRG, "MAC rx fragment packets" }, @@ -163,9 +163,13 @@ static const struct { { ENETC_PM0_TBCA, "MAC tx broadcast frames" }, { ENETC_PM0_TPKT, "MAC tx packets" }, { ENETC_PM0_TUND, "MAC tx undersized packets" }, + { ENETC_PM0_T64, "MAC tx 64 byte packets" }, { ENETC_PM0_T127, "MAC tx 65-127 byte packets" }, + { ENETC_PM0_T255, "MAC tx 128-255 byte packets" }, + { ENETC_PM0_T511, "MAC tx 256-511 byte packets" }, { ENETC_PM0_T1023, "MAC tx 512-1023 byte packets" }, - { ENETC_PM0_T1518, "MAC tx 1024-1518 byte packets" }, + { ENETC_PM0_T1522, "MAC tx 1024-1522 byte packets" }, + { ENETC_PM0_T1523X, "MAC tx 1523 to max-octet packets" }, { ENETC_PM0_TCNP, "MAC tx control packets" }, { ENETC_PM0_TDFR, "MAC tx deferred packets" }, { ENETC_PM0_TMCOL, "MAC tx multiple collisions" }, diff --git a/drivers/net/ethernet/freescale/enetc/enetc_hw.h b/drivers/net/ethernet/freescale/enetc/enetc_hw.h index eb6bbf1113c7..4cbf1667d7ff 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_hw.h +++ b/drivers/net/ethernet/freescale/enetc/enetc_hw.h @@ -267,8 +267,8 @@ enum enetc_bdr_type {TX, RX}; #define ENETC_PM0_R255 0x8180 #define ENETC_PM0_R511 0x8188 #define ENETC_PM0_R1023 0x8190 -#define ENETC_PM0_R1518 0x8198 -#define ENETC_PM0_R1519X 0x81A0 +#define ENETC_PM0_R1522 0x8198 +#define ENETC_PM0_R1523X 0x81A0 #define ENETC_PM0_ROVR 0x81A8 #define ENETC_PM0_RJBR 0x81B0 #define ENETC_PM0_RFRG 0x81B8 @@ -287,9 +287,13 @@ enum enetc_bdr_type {TX, RX}; #define ENETC_PM0_TBCA 0x8250 #define ENETC_PM0_TPKT 0x8260 #define ENETC_PM0_TUND 0x8268 +#define ENETC_PM0_T64 0x8270 #define ENETC_PM0_T127 0x8278 +#define ENETC_PM0_T255 0x8280 +#define ENETC_PM0_T511 0x8288 #define ENETC_PM0_T1023 0x8290 -#define ENETC_PM0_T1518 0x8298 +#define ENETC_PM0_T1522 0x8298 +#define ENETC_PM0_T1523X 0x82A0 #define ENETC_PM0_TCNP 0x82C0 #define ENETC_PM0_TDFR 0x82D0 #define ENETC_PM0_TMCOL 0x82D8 From 851d0a73c90e6c8c63fef106c6c1e73df7e05d9d Mon Sep 17 00:00:00 2001 From: Joseph Huang Date: Fri, 4 Dec 2020 18:56:28 -0500 Subject: [PATCH 194/296] bridge: Fix a deadlock when enabling multicast snooping When enabling multicast snooping, bridge module deadlocks on multicast_lock if 1) IPv6 is enabled, and 2) there is an existing querier on the same L2 network. The deadlock was caused by the following sequence: While holding the lock, br_multicast_open calls br_multicast_join_snoopers, which eventually causes IP stack to (attempt to) send out a Listener Report (in igmp6_join_group). Since the destination Ethernet address is a multicast address, br_dev_xmit feeds the packet back to the bridge via br_multicast_rcv, which in turn calls br_multicast_add_group, which then deadlocks on multicast_lock. The fix is to move the call br_multicast_join_snoopers outside of the critical section. This works since br_multicast_join_snoopers only deals with IP and does not modify any multicast data structures of the bridge, so there's no need to hold the lock. Steps to reproduce: 1. sysctl net.ipv6.conf.all.force_mld_version=1 2. have another querier 3. ip link set dev bridge type bridge mcast_snooping 0 && \ ip link set dev bridge type bridge mcast_snooping 1 < deadlock > A typical call trace looks like the following: [ 936.251495] _raw_spin_lock+0x5c/0x68 [ 936.255221] br_multicast_add_group+0x40/0x170 [bridge] [ 936.260491] br_multicast_rcv+0x7ac/0xe30 [bridge] [ 936.265322] br_dev_xmit+0x140/0x368 [bridge] [ 936.269689] dev_hard_start_xmit+0x94/0x158 [ 936.273876] __dev_queue_xmit+0x5ac/0x7f8 [ 936.277890] dev_queue_xmit+0x10/0x18 [ 936.281563] neigh_resolve_output+0xec/0x198 [ 936.285845] ip6_finish_output2+0x240/0x710 [ 936.290039] __ip6_finish_output+0x130/0x170 [ 936.294318] ip6_output+0x6c/0x1c8 [ 936.297731] NF_HOOK.constprop.0+0xd8/0xe8 [ 936.301834] igmp6_send+0x358/0x558 [ 936.305326] igmp6_join_group.part.0+0x30/0xf0 [ 936.309774] igmp6_group_added+0xfc/0x110 [ 936.313787] __ipv6_dev_mc_inc+0x1a4/0x290 [ 936.317885] ipv6_dev_mc_inc+0x10/0x18 [ 936.321677] br_multicast_open+0xbc/0x110 [bridge] [ 936.326506] br_multicast_toggle+0xec/0x140 [bridge] Fixes: 4effd28c1245 ("bridge: join all-snoopers multicast address") Signed-off-by: Joseph Huang Acked-by: Nikolay Aleksandrov Link: https://lore.kernel.org/r/20201204235628.50653-1-Joseph.Huang@garmin.com Signed-off-by: Jakub Kicinski --- net/bridge/br_device.c | 6 ++++++ net/bridge/br_multicast.c | 34 +++++++++++++++++++++++++--------- net/bridge/br_private.h | 10 ++++++++++ 3 files changed, 41 insertions(+), 9 deletions(-) diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 7730c8f3cb53..d3ea9d0779fb 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -177,6 +177,9 @@ static int br_dev_open(struct net_device *dev) br_stp_enable_bridge(br); br_multicast_open(br); + if (br_opt_get(br, BROPT_MULTICAST_ENABLED)) + br_multicast_join_snoopers(br); + return 0; } @@ -197,6 +200,9 @@ static int br_dev_stop(struct net_device *dev) br_stp_disable_bridge(br); br_multicast_stop(br); + if (br_opt_get(br, BROPT_MULTICAST_ENABLED)) + br_multicast_leave_snoopers(br); + netif_stop_queue(dev); return 0; diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index eae898c3cff7..54cb82a69056 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -3286,7 +3286,7 @@ static inline void br_ip6_multicast_join_snoopers(struct net_bridge *br) } #endif -static void br_multicast_join_snoopers(struct net_bridge *br) +void br_multicast_join_snoopers(struct net_bridge *br) { br_ip4_multicast_join_snoopers(br); br_ip6_multicast_join_snoopers(br); @@ -3317,7 +3317,7 @@ static inline void br_ip6_multicast_leave_snoopers(struct net_bridge *br) } #endif -static void br_multicast_leave_snoopers(struct net_bridge *br) +void br_multicast_leave_snoopers(struct net_bridge *br) { br_ip4_multicast_leave_snoopers(br); br_ip6_multicast_leave_snoopers(br); @@ -3336,9 +3336,6 @@ static void __br_multicast_open(struct net_bridge *br, void br_multicast_open(struct net_bridge *br) { - if (br_opt_get(br, BROPT_MULTICAST_ENABLED)) - br_multicast_join_snoopers(br); - __br_multicast_open(br, &br->ip4_own_query); #if IS_ENABLED(CONFIG_IPV6) __br_multicast_open(br, &br->ip6_own_query); @@ -3354,9 +3351,6 @@ void br_multicast_stop(struct net_bridge *br) del_timer_sync(&br->ip6_other_query.timer); del_timer_sync(&br->ip6_own_query.timer); #endif - - if (br_opt_get(br, BROPT_MULTICAST_ENABLED)) - br_multicast_leave_snoopers(br); } void br_multicast_dev_del(struct net_bridge *br) @@ -3487,6 +3481,7 @@ static void br_multicast_start_querier(struct net_bridge *br, int br_multicast_toggle(struct net_bridge *br, unsigned long val) { struct net_bridge_port *port; + bool change_snoopers = false; spin_lock_bh(&br->multicast_lock); if (!!br_opt_get(br, BROPT_MULTICAST_ENABLED) == !!val) @@ -3495,7 +3490,7 @@ int br_multicast_toggle(struct net_bridge *br, unsigned long val) br_mc_disabled_update(br->dev, val); br_opt_toggle(br, BROPT_MULTICAST_ENABLED, !!val); if (!br_opt_get(br, BROPT_MULTICAST_ENABLED)) { - br_multicast_leave_snoopers(br); + change_snoopers = true; goto unlock; } @@ -3506,9 +3501,30 @@ int br_multicast_toggle(struct net_bridge *br, unsigned long val) list_for_each_entry(port, &br->port_list, list) __br_multicast_enable_port(port); + change_snoopers = true; + unlock: spin_unlock_bh(&br->multicast_lock); + /* br_multicast_join_snoopers has the potential to cause + * an MLD Report/Leave to be delivered to br_multicast_rcv, + * which would in turn call br_multicast_add_group, which would + * attempt to acquire multicast_lock. This function should be + * called after the lock has been released to avoid deadlocks on + * multicast_lock. + * + * br_multicast_leave_snoopers does not have the problem since + * br_multicast_rcv first checks BROPT_MULTICAST_ENABLED, and + * returns without calling br_multicast_ipv4/6_rcv if it's not + * enabled. Moved both functions out just for symmetry. + */ + if (change_snoopers) { + if (br_opt_get(br, BROPT_MULTICAST_ENABLED)) + br_multicast_join_snoopers(br); + else + br_multicast_leave_snoopers(br); + } + return 0; } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 345118e35c42..8424464186a6 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -792,6 +792,8 @@ void br_multicast_del_port(struct net_bridge_port *port); void br_multicast_enable_port(struct net_bridge_port *port); void br_multicast_disable_port(struct net_bridge_port *port); void br_multicast_init(struct net_bridge *br); +void br_multicast_join_snoopers(struct net_bridge *br); +void br_multicast_leave_snoopers(struct net_bridge *br); void br_multicast_open(struct net_bridge *br); void br_multicast_stop(struct net_bridge *br); void br_multicast_dev_del(struct net_bridge *br); @@ -969,6 +971,14 @@ static inline void br_multicast_init(struct net_bridge *br) { } +static inline void br_multicast_join_snoopers(struct net_bridge *br) +{ +} + +static inline void br_multicast_leave_snoopers(struct net_bridge *br) +{ +} + static inline void br_multicast_open(struct net_bridge *br) { } From f55628b3e7648198e9c072b52080c5dea8678adf Mon Sep 17 00:00:00 2001 From: Jianguo Wu Date: Sat, 5 Dec 2020 15:56:33 +0800 Subject: [PATCH 195/296] mptcp: print new line in mptcp_seq_show() if mptcp isn't in use When do cat /proc/net/netstat, the output isn't append with a new line, it looks like this: [root@localhost ~]# cat /proc/net/netstat ... MPTcpExt: 0 0 0 0 0 0 0 0 0 0 0 0 0[root@localhost ~]# This is because in mptcp_seq_show(), if mptcp isn't in use, net->mib.mptcp_statistics is NULL, so it just puts all 0 after "MPTcpExt:", and return, forgot the '\n'. After this patch: [root@localhost ~]# cat /proc/net/netstat ... MPTcpExt: 0 0 0 0 0 0 0 0 0 0 0 0 0 [root@localhost ~]# Fixes: fc518953bc9c8d7d ("mptcp: add and use MIB counter infrastructure") Signed-off-by: Jianguo Wu Acked-by: Florian Westphal Link: https://lore.kernel.org/r/142e2fd9-58d9-bb13-fb75-951cccc2331e@163.com Signed-off-by: Jakub Kicinski --- net/mptcp/mib.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c index 84d119436b22..b921cbdd9aaa 100644 --- a/net/mptcp/mib.c +++ b/net/mptcp/mib.c @@ -67,6 +67,7 @@ void mptcp_seq_show(struct seq_file *seq) for (i = 0; mptcp_snmp_list[i].name; i++) seq_puts(seq, " 0"); + seq_putc(seq, '\n'); return; } From bbef72c630b522a9ffbf62dae19b59c880da6ea1 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 6 Dec 2020 16:13:39 +0100 Subject: [PATCH 196/296] dpaa2-mac: Add a missing of_node_put after of_device_is_available Add an 'of_node_put()' call when a tested device node is not available. Fixes: 94ae899b2096 ("dpaa2-mac: add PCS support through the Lynx module") Signed-off-by: Christophe JAILLET Reviewed-by: Ioana Ciornei Link: https://lore.kernel.org/r/20201206151339.44306-1-christophe.jaillet@wanadoo.fr Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c index 90cd243070d7..828c177df03d 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c @@ -269,6 +269,7 @@ static int dpaa2_pcs_create(struct dpaa2_mac *mac, if (!of_device_is_available(node)) { netdev_err(mac->net_dev, "pcs-handle node not available\n"); + of_node_put(node); return -ENODEV; } From 82ca4c922b8992013a238d65cf4e60cc33e12f36 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sat, 5 Dec 2020 22:32:07 +0100 Subject: [PATCH 197/296] net: stmmac: dwmac-meson8b: fix mask definition of the m250_sel mux The m250_sel mux clock uses bit 4 in the PRG_ETH0 register. Fix this by shifting the PRG_ETH0_CLK_M250_SEL_MASK accordingly as the "mask" in struct clk_mux expects the mask relative to the "shift" field in the same struct. While here, get rid of the PRG_ETH0_CLK_M250_SEL_SHIFT macro and use __ffs() to determine it from the existing PRG_ETH0_CLK_M250_SEL_MASK macro. Fixes: 566e8251625304 ("net: stmmac: add a glue driver for the Amlogic Meson 8b / GXBB DWMAC") Signed-off-by: Martin Blumenstingl Reviewed-by: Jerome Brunet Link: https://lore.kernel.org/r/20201205213207.519341-1-martin.blumenstingl@googlemail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c index 5afcf05bbf9c..6d6bd77bb6af 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c @@ -30,7 +30,6 @@ #define PRG_ETH0_EXT_RMII_MODE 4 /* mux to choose between fclk_div2 (bit unset) and mpll2 (bit set) */ -#define PRG_ETH0_CLK_M250_SEL_SHIFT 4 #define PRG_ETH0_CLK_M250_SEL_MASK GENMASK(4, 4) /* TX clock delay in ns = "8ns / 4 * tx_dly_val" (where 8ns are exactly one @@ -155,8 +154,9 @@ static int meson8b_init_rgmii_tx_clk(struct meson8b_dwmac *dwmac) return -ENOMEM; clk_configs->m250_mux.reg = dwmac->regs + PRG_ETH0; - clk_configs->m250_mux.shift = PRG_ETH0_CLK_M250_SEL_SHIFT; - clk_configs->m250_mux.mask = PRG_ETH0_CLK_M250_SEL_MASK; + clk_configs->m250_mux.shift = __ffs(PRG_ETH0_CLK_M250_SEL_MASK); + clk_configs->m250_mux.mask = PRG_ETH0_CLK_M250_SEL_MASK >> + clk_configs->m250_mux.shift; clk = meson8b_dwmac_register_clk(dwmac, "m250_sel", mux_parents, ARRAY_SIZE(mux_parents), &clk_mux_ops, &clk_configs->m250_mux.hw); From 3d1387b3b8f6cc1ccdbb5f0d7af24df02f4baef9 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 8 Dec 2020 07:57:13 +0100 Subject: [PATCH 198/296] media: vidtv: fix some warnings As reported by sparse: drivers/media/test-drivers/vidtv/vidtv_ts.h:47:47: warning: array of flexible structures drivers/media/test-drivers/vidtv/vidtv_channel.c:458:54: warning: incorrect type in argument 3 (different base types) drivers/media/test-drivers/vidtv/vidtv_channel.c:458:54: expected unsigned short [usertype] service_id drivers/media/test-drivers/vidtv/vidtv_channel.c:458:54: got restricted __be16 [usertype] service_id drivers/media/test-drivers/vidtv/vidtv_s302m.c:471 vidtv_s302m_encoder_init() warn: possible memory leak of 'e' Address such warnings. Signed-off-by: Mauro Carvalho Chehab --- drivers/media/test-drivers/vidtv/vidtv_psi.h | 2 +- drivers/media/test-drivers/vidtv/vidtv_s302m.c | 4 +++- drivers/media/test-drivers/vidtv/vidtv_ts.h | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/media/test-drivers/vidtv/vidtv_psi.h b/drivers/media/test-drivers/vidtv/vidtv_psi.h index 6651cc91bda1..fdc825e54138 100644 --- a/drivers/media/test-drivers/vidtv/vidtv_psi.h +++ b/drivers/media/test-drivers/vidtv/vidtv_psi.h @@ -743,7 +743,7 @@ struct vidtv_psi_table_eit { struct vidtv_psi_table_eit *vidtv_psi_eit_table_init(u16 network_id, u16 transport_stream_id, - u16 service_id); + __be16 service_id); /** * struct vidtv_psi_eit_write_args - Arguments for writing an EIT section diff --git a/drivers/media/test-drivers/vidtv/vidtv_s302m.c b/drivers/media/test-drivers/vidtv/vidtv_s302m.c index ce7dd6cafc8b..d79b65854627 100644 --- a/drivers/media/test-drivers/vidtv/vidtv_s302m.c +++ b/drivers/media/test-drivers/vidtv/vidtv_s302m.c @@ -467,8 +467,10 @@ struct vidtv_encoder e->is_video_encoder = false; ctx = kzalloc(priv_sz, GFP_KERNEL); - if (!ctx) + if (!ctx) { + kfree(e); return NULL; + } e->ctx = ctx; ctx->last_duration = 0; diff --git a/drivers/media/test-drivers/vidtv/vidtv_ts.h b/drivers/media/test-drivers/vidtv/vidtv_ts.h index 10838a2b8389..f5e8e1f37f05 100644 --- a/drivers/media/test-drivers/vidtv/vidtv_ts.h +++ b/drivers/media/test-drivers/vidtv/vidtv_ts.h @@ -44,7 +44,7 @@ struct vidtv_mpeg_ts { u8 adaptation_field:1; u8 scrambling:2; } __packed; - struct vidtv_mpeg_ts_adaption adaption[]; + struct vidtv_mpeg_ts_adaption *adaption; } __packed; /** From 7aeb353802611a8e655e019f09a370ff682af1a6 Mon Sep 17 00:00:00 2001 From: Andrew Jeffery Date: Thu, 26 Nov 2020 17:03:37 +1030 Subject: [PATCH 199/296] pinctrl: aspeed: Fix GPIO requests on pass-through banks Commit 6726fbff19bf ("pinctrl: aspeed: Fix GPI only function problem.") fixes access to GPIO banks T and U on the AST2600. Both banks contain input-only pins and the GPIO pin function is named GPITx and GPIUx respectively. Unfortunately the fix had a negative impact on GPIO banks D and E for the AST2400 and AST2500 where the GPIO pass-through functions take similar "GPI"-style names. The net effect on the older SoCs was that when the GPIO subsystem requested a pin in banks D or E be muxed for GPIO, they were instead muxed for pass-through mode. Mistakenly muxing pass-through mode e.g. breaks booting the host on IBM's Witherspoon (AC922) platform where GPIOE0 is used for FSI. Further exploit the names in the provided expression structure to differentiate pass-through from pin-specific GPIO modes. This follow-up fix gives the expected behaviour for the following tests: Witherspoon BMC (AST2500): 1. Power-on the Witherspoon host 2. Request GPIOD1 be muxed via /sys/class/gpio/export 3. Request GPIOE1 be muxed via /sys/class/gpio/export 4. Request the balls for GPIOs E2 and E3 be muxed as GPIO pass-through ("GPIE2" mode) via a pinctrl hog in the devicetree Rainier BMC (AST2600): 5. Request GPIT0 be muxed via /sys/class/gpio/export 6. Request GPIU0 be muxed via /sys/class/gpio/export Together the tests demonstrate that all three pieces of functionality (general GPIOs via 1, 2 and 3, input-only GPIOs via 5 and 6, pass-through mode via 4) operate as desired across old and new SoCs. Fixes: 9b92f5c51e9a ("pinctrl: aspeed: Fix GPI only function problem.") Signed-off-by: Andrew Jeffery Tested-by: Joel Stanley Reviewed-by: Joel Stanley Cc: Billy Tsai Cc: Joel Stanley Link: https://lore.kernel.org/r/20201126063337.489927-1-andrew@aj.id.au Signed-off-by: Linus Walleij --- drivers/pinctrl/aspeed/pinctrl-aspeed.c | 74 +++++++++++++++++++++++-- drivers/pinctrl/aspeed/pinmux-aspeed.h | 7 ++- 2 files changed, 72 insertions(+), 9 deletions(-) diff --git a/drivers/pinctrl/aspeed/pinctrl-aspeed.c b/drivers/pinctrl/aspeed/pinctrl-aspeed.c index d6b849552a1e..9c65d560d48f 100644 --- a/drivers/pinctrl/aspeed/pinctrl-aspeed.c +++ b/drivers/pinctrl/aspeed/pinctrl-aspeed.c @@ -286,14 +286,76 @@ int aspeed_pinmux_set_mux(struct pinctrl_dev *pctldev, unsigned int function, static bool aspeed_expr_is_gpio(const struct aspeed_sig_expr *expr) { /* - * The signal type is GPIO if the signal name has "GPI" as a prefix. - * strncmp (rather than strcmp) is used to implement the prefix - * requirement. + * We need to differentiate between GPIO and non-GPIO signals to + * implement the gpio_request_enable() interface. For better or worse + * the ASPEED pinctrl driver uses the expression names to determine + * whether an expression will mux a pin for GPIO. * - * expr->signal might look like "GPIOB1" in the GPIO case. - * expr->signal might look like "GPIT0" in the GPI case. + * Generally we have the following - A GPIO such as B1 has: + * + * - expr->signal set to "GPIOB1" + * - expr->function set to "GPIOB1" + * + * Using this fact we can determine whether the provided expression is + * a GPIO expression by testing the signal name for the string prefix + * "GPIO". + * + * However, some GPIOs are input-only, and the ASPEED datasheets name + * them differently. An input-only GPIO such as T0 has: + * + * - expr->signal set to "GPIT0" + * - expr->function set to "GPIT0" + * + * It's tempting to generalise the prefix test from "GPIO" to "GPI" to + * account for both GPIOs and GPIs, but in doing so we run aground on + * another feature: + * + * Some pins in the ASPEED BMC SoCs have a "pass-through" GPIO + * function where the input state of one pin is replicated as the + * output state of another (as if they were shorted together - a mux + * configuration that is typically enabled by hardware strapping). + * This feature allows the BMC to pass e.g. power button state through + * to the host while the BMC is yet to boot, but take control of the + * button state once the BMC has booted by muxing each pin as a + * separate, pin-specific GPIO. + * + * Conceptually this pass-through mode is a form of GPIO and is named + * as such in the datasheets, e.g. "GPID0". This naming similarity + * trips us up with the simple GPI-prefixed-signal-name scheme + * discussed above, as the pass-through configuration is not what we + * want when muxing a pin as GPIO for the GPIO subsystem. + * + * On e.g. the AST2400, a pass-through function "GPID0" is grouped on + * balls A18 and D16, where we have: + * + * For ball A18: + * - expr->signal set to "GPID0IN" + * - expr->function set to "GPID0" + * + * For ball D16: + * - expr->signal set to "GPID0OUT" + * - expr->function set to "GPID0" + * + * By contrast, the pin-specific GPIO expressions for the same pins are + * as follows: + * + * For ball A18: + * - expr->signal looks like "GPIOD0" + * - expr->function looks like "GPIOD0" + * + * For ball D16: + * - expr->signal looks like "GPIOD1" + * - expr->function looks like "GPIOD1" + * + * Testing both the signal _and_ function names gives us the means + * differentiate the pass-through GPIO pinmux configuration from the + * pin-specific configuration that the GPIO subsystem is after: An + * expression is a pin-specific (non-pass-through) GPIO configuration + * if the signal prefix is "GPI" and the signal name matches the + * function name. */ - return strncmp(expr->signal, "GPI", 3) == 0; + return !strncmp(expr->signal, "GPI", 3) && + !strcmp(expr->signal, expr->function); } static bool aspeed_gpio_in_exprs(const struct aspeed_sig_expr **exprs) diff --git a/drivers/pinctrl/aspeed/pinmux-aspeed.h b/drivers/pinctrl/aspeed/pinmux-aspeed.h index f86739e800c3..dba5875ff276 100644 --- a/drivers/pinctrl/aspeed/pinmux-aspeed.h +++ b/drivers/pinctrl/aspeed/pinmux-aspeed.h @@ -452,10 +452,11 @@ struct aspeed_sig_desc { * evaluation of the descriptors. * * @signal: The signal name for the priority level on the pin. If the signal - * type is GPIO, then the signal name must begin with the string - * "GPIO", e.g. GPIOA0, GPIOT4 etc. + * type is GPIO, then the signal name must begin with the + * prefix "GPI", e.g. GPIOA0, GPIT0 etc. * @function: The name of the function the signal participates in for the - * associated expression + * associated expression. For pin-specific GPIO, the function + * name must match the signal name. * @ndescs: The number of signal descriptors in the expression * @descs: Pointer to an array of signal descriptors that comprise the * function expression From cc00bcaa589914096edef7fb87ca5cee4a166b5c Mon Sep 17 00:00:00 2001 From: Subash Abhinov Kasiviswanathan Date: Wed, 25 Nov 2020 11:27:22 -0700 Subject: [PATCH 200/296] netfilter: x_tables: Switch synchronization to RCU When running concurrent iptables rules replacement with data, the per CPU sequence count is checked after the assignment of the new information. The sequence count is used to synchronize with the packet path without the use of any explicit locking. If there are any packets in the packet path using the table information, the sequence count is incremented to an odd value and is incremented to an even after the packet process completion. The new table value assignment is followed by a write memory barrier so every CPU should see the latest value. If the packet path has started with the old table information, the sequence counter will be odd and the iptables replacement will wait till the sequence count is even prior to freeing the old table info. However, this assumes that the new table information assignment and the memory barrier is actually executed prior to the counter check in the replacement thread. If CPU decides to execute the assignment later as there is no user of the table information prior to the sequence check, the packet path in another CPU may use the old table information. The replacement thread would then free the table information under it leading to a use after free in the packet processing context- Unable to handle kernel NULL pointer dereference at virtual address 000000000000008e pc : ip6t_do_table+0x5d0/0x89c lr : ip6t_do_table+0x5b8/0x89c ip6t_do_table+0x5d0/0x89c ip6table_filter_hook+0x24/0x30 nf_hook_slow+0x84/0x120 ip6_input+0x74/0xe0 ip6_rcv_finish+0x7c/0x128 ipv6_rcv+0xac/0xe4 __netif_receive_skb+0x84/0x17c process_backlog+0x15c/0x1b8 napi_poll+0x88/0x284 net_rx_action+0xbc/0x23c __do_softirq+0x20c/0x48c This could be fixed by forcing instruction order after the new table information assignment or by switching to RCU for the synchronization. Fixes: 80055dab5de0 ("netfilter: x_tables: make xt_replace_table wait until old rules are not used anymore") Reported-by: Sean Tranchetti Reported-by: kernel test robot Suggested-by: Florian Westphal Signed-off-by: Subash Abhinov Kasiviswanathan Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 5 ++- net/ipv4/netfilter/arp_tables.c | 14 ++++----- net/ipv4/netfilter/ip_tables.c | 14 ++++----- net/ipv6/netfilter/ip6_tables.c | 14 ++++----- net/netfilter/x_tables.c | 49 +++++++++--------------------- 5 files changed, 40 insertions(+), 56 deletions(-) diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 5deb099d156d..8ebb64193757 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -227,7 +227,7 @@ struct xt_table { unsigned int valid_hooks; /* Man behind the curtain... */ - struct xt_table_info *private; + struct xt_table_info __rcu *private; /* Set this to THIS_MODULE if you are a module, otherwise NULL */ struct module *me; @@ -448,6 +448,9 @@ xt_get_per_cpu_counter(struct xt_counters *cnt, unsigned int cpu) struct nf_hook_ops *xt_hook_ops_alloc(const struct xt_table *, nf_hookfn *); +struct xt_table_info +*xt_table_get_private_protected(const struct xt_table *table); + #ifdef CONFIG_COMPAT #include diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index d1e04d2b5170..563b62b76a5f 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -203,7 +203,7 @@ unsigned int arpt_do_table(struct sk_buff *skb, local_bh_disable(); addend = xt_write_recseq_begin(); - private = READ_ONCE(table->private); /* Address dependency. */ + private = rcu_access_pointer(table->private); cpu = smp_processor_id(); table_base = private->entries; jumpstack = (struct arpt_entry **)private->jumpstack[cpu]; @@ -649,7 +649,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; - const struct xt_table_info *private = table->private; + const struct xt_table_info *private = xt_table_get_private_protected(table); /* We need atomic snapshot of counters: rest doesn't change * (other than comefrom, which userspace doesn't care @@ -673,7 +673,7 @@ static int copy_entries_to_user(unsigned int total_size, unsigned int off, num; const struct arpt_entry *e; struct xt_counters *counters; - struct xt_table_info *private = table->private; + struct xt_table_info *private = xt_table_get_private_protected(table); int ret = 0; void *loc_cpu_entry; @@ -807,7 +807,7 @@ static int get_info(struct net *net, void __user *user, const int *len) t = xt_request_find_table_lock(net, NFPROTO_ARP, name); if (!IS_ERR(t)) { struct arpt_getinfo info; - const struct xt_table_info *private = t->private; + const struct xt_table_info *private = xt_table_get_private_protected(t); #ifdef CONFIG_COMPAT struct xt_table_info tmp; @@ -860,7 +860,7 @@ static int get_entries(struct net *net, struct arpt_get_entries __user *uptr, t = xt_find_table_lock(net, NFPROTO_ARP, get.name); if (!IS_ERR(t)) { - const struct xt_table_info *private = t->private; + const struct xt_table_info *private = xt_table_get_private_protected(t); if (get.size == private->size) ret = copy_entries_to_user(private->size, @@ -1017,7 +1017,7 @@ static int do_add_counters(struct net *net, sockptr_t arg, unsigned int len) } local_bh_disable(); - private = t->private; + private = xt_table_get_private_protected(t); if (private->number != tmp.num_counters) { ret = -EINVAL; goto unlock_up_free; @@ -1330,7 +1330,7 @@ static int compat_copy_entries_to_user(unsigned int total_size, void __user *userptr) { struct xt_counters *counters; - const struct xt_table_info *private = table->private; + const struct xt_table_info *private = xt_table_get_private_protected(table); void __user *pos; unsigned int size; int ret = 0; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index f15bc21d7301..6e2851f8d3a3 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -258,7 +258,7 @@ ipt_do_table(struct sk_buff *skb, WARN_ON(!(table->valid_hooks & (1 << hook))); local_bh_disable(); addend = xt_write_recseq_begin(); - private = READ_ONCE(table->private); /* Address dependency. */ + private = rcu_access_pointer(table->private); cpu = smp_processor_id(); table_base = private->entries; jumpstack = (struct ipt_entry **)private->jumpstack[cpu]; @@ -791,7 +791,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; - const struct xt_table_info *private = table->private; + const struct xt_table_info *private = xt_table_get_private_protected(table); /* We need atomic snapshot of counters: rest doesn't change (other than comefrom, which userspace doesn't care @@ -815,7 +815,7 @@ copy_entries_to_user(unsigned int total_size, unsigned int off, num; const struct ipt_entry *e; struct xt_counters *counters; - const struct xt_table_info *private = table->private; + const struct xt_table_info *private = xt_table_get_private_protected(table); int ret = 0; const void *loc_cpu_entry; @@ -964,7 +964,7 @@ static int get_info(struct net *net, void __user *user, const int *len) t = xt_request_find_table_lock(net, AF_INET, name); if (!IS_ERR(t)) { struct ipt_getinfo info; - const struct xt_table_info *private = t->private; + const struct xt_table_info *private = xt_table_get_private_protected(t); #ifdef CONFIG_COMPAT struct xt_table_info tmp; @@ -1018,7 +1018,7 @@ get_entries(struct net *net, struct ipt_get_entries __user *uptr, t = xt_find_table_lock(net, AF_INET, get.name); if (!IS_ERR(t)) { - const struct xt_table_info *private = t->private; + const struct xt_table_info *private = xt_table_get_private_protected(t); if (get.size == private->size) ret = copy_entries_to_user(private->size, t, uptr->entrytable); @@ -1173,7 +1173,7 @@ do_add_counters(struct net *net, sockptr_t arg, unsigned int len) } local_bh_disable(); - private = t->private; + private = xt_table_get_private_protected(t); if (private->number != tmp.num_counters) { ret = -EINVAL; goto unlock_up_free; @@ -1543,7 +1543,7 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table, void __user *userptr) { struct xt_counters *counters; - const struct xt_table_info *private = table->private; + const struct xt_table_info *private = xt_table_get_private_protected(table); void __user *pos; unsigned int size; int ret = 0; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 2e2119bfcf13..c4f532f4d311 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -280,7 +280,7 @@ ip6t_do_table(struct sk_buff *skb, local_bh_disable(); addend = xt_write_recseq_begin(); - private = READ_ONCE(table->private); /* Address dependency. */ + private = rcu_access_pointer(table->private); cpu = smp_processor_id(); table_base = private->entries; jumpstack = (struct ip6t_entry **)private->jumpstack[cpu]; @@ -807,7 +807,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; - const struct xt_table_info *private = table->private; + const struct xt_table_info *private = xt_table_get_private_protected(table); /* We need atomic snapshot of counters: rest doesn't change (other than comefrom, which userspace doesn't care @@ -831,7 +831,7 @@ copy_entries_to_user(unsigned int total_size, unsigned int off, num; const struct ip6t_entry *e; struct xt_counters *counters; - const struct xt_table_info *private = table->private; + const struct xt_table_info *private = xt_table_get_private_protected(table); int ret = 0; const void *loc_cpu_entry; @@ -980,7 +980,7 @@ static int get_info(struct net *net, void __user *user, const int *len) t = xt_request_find_table_lock(net, AF_INET6, name); if (!IS_ERR(t)) { struct ip6t_getinfo info; - const struct xt_table_info *private = t->private; + const struct xt_table_info *private = xt_table_get_private_protected(t); #ifdef CONFIG_COMPAT struct xt_table_info tmp; @@ -1035,7 +1035,7 @@ get_entries(struct net *net, struct ip6t_get_entries __user *uptr, t = xt_find_table_lock(net, AF_INET6, get.name); if (!IS_ERR(t)) { - struct xt_table_info *private = t->private; + struct xt_table_info *private = xt_table_get_private_protected(t); if (get.size == private->size) ret = copy_entries_to_user(private->size, t, uptr->entrytable); @@ -1189,7 +1189,7 @@ do_add_counters(struct net *net, sockptr_t arg, unsigned int len) } local_bh_disable(); - private = t->private; + private = xt_table_get_private_protected(t); if (private->number != tmp.num_counters) { ret = -EINVAL; goto unlock_up_free; @@ -1552,7 +1552,7 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table, void __user *userptr) { struct xt_counters *counters; - const struct xt_table_info *private = table->private; + const struct xt_table_info *private = xt_table_get_private_protected(table); void __user *pos; unsigned int size; int ret = 0; diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index af22dbe85e2c..acce622582e3 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -1349,6 +1349,14 @@ struct xt_counters *xt_counters_alloc(unsigned int counters) } EXPORT_SYMBOL(xt_counters_alloc); +struct xt_table_info +*xt_table_get_private_protected(const struct xt_table *table) +{ + return rcu_dereference_protected(table->private, + mutex_is_locked(&xt[table->af].mutex)); +} +EXPORT_SYMBOL(xt_table_get_private_protected); + struct xt_table_info * xt_replace_table(struct xt_table *table, unsigned int num_counters, @@ -1356,7 +1364,6 @@ xt_replace_table(struct xt_table *table, int *error) { struct xt_table_info *private; - unsigned int cpu; int ret; ret = xt_jumpstack_alloc(newinfo); @@ -1366,47 +1373,20 @@ xt_replace_table(struct xt_table *table, } /* Do the substitution. */ - local_bh_disable(); - private = table->private; + private = xt_table_get_private_protected(table); /* Check inside lock: is the old number correct? */ if (num_counters != private->number) { pr_debug("num_counters != table->private->number (%u/%u)\n", num_counters, private->number); - local_bh_enable(); *error = -EAGAIN; return NULL; } newinfo->initial_entries = private->initial_entries; - /* - * Ensure contents of newinfo are visible before assigning to - * private. - */ - smp_wmb(); - table->private = newinfo; - /* make sure all cpus see new ->private value */ - smp_wmb(); - - /* - * Even though table entries have now been swapped, other CPU's - * may still be using the old entries... - */ - local_bh_enable(); - - /* ... so wait for even xt_recseq on all cpus */ - for_each_possible_cpu(cpu) { - seqcount_t *s = &per_cpu(xt_recseq, cpu); - u32 seq = raw_read_seqcount(s); - - if (seq & 1) { - do { - cond_resched(); - cpu_relax(); - } while (seq == raw_read_seqcount(s)); - } - } + rcu_assign_pointer(table->private, newinfo); + synchronize_rcu(); audit_log_nfcfg(table->name, table->af, private->number, !private->number ? AUDIT_XT_OP_REGISTER : @@ -1442,12 +1422,12 @@ struct xt_table *xt_register_table(struct net *net, } /* Simplifies replace_table code. */ - table->private = bootstrap; + rcu_assign_pointer(table->private, bootstrap); if (!xt_replace_table(table, 0, newinfo, &ret)) goto unlock; - private = table->private; + private = xt_table_get_private_protected(table); pr_debug("table->private->number = %u\n", private->number); /* save number of initial entries */ @@ -1470,7 +1450,8 @@ void *xt_unregister_table(struct xt_table *table) struct xt_table_info *private; mutex_lock(&xt[table->af].mutex); - private = table->private; + private = xt_table_get_private_protected(table); + RCU_INIT_POINTER(table->private, NULL); list_del(&table->list); mutex_unlock(&xt[table->af].mutex); audit_log_nfcfg(table->name, table->af, private->number, From 352ded44fbd8622670ff95e90ca732a5b036ce79 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 3 Dec 2020 11:45:17 +0300 Subject: [PATCH 201/296] drm/i915/gem: Check the correct variable in selftest There is a copy and paste bug in this code. It's supposed to check "obj2" instead of checking "obj" a second time. Fixes: 80f0b679d6f0 ("drm/i915: Add an implementation for i915_gem_ww_ctx locking, v2.") Signed-off-by: Dan Carpenter Reviewed-by: Chris Wilson Reviewed-by: Andi Shyti Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/8ilneOcJAjwqU4t@mwand (cherry picked from commit 14f2d7604f7ce4cb3d303aea17292d119dfafa75) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/selftests/i915_gem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c index 23a6132c5f4e..412e21604a05 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem.c @@ -211,8 +211,8 @@ static int igt_gem_ww_ctx(void *arg) return PTR_ERR(obj); obj2 = i915_gem_object_create_internal(i915, PAGE_SIZE); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); + if (IS_ERR(obj2)) { + err = PTR_ERR(obj2); goto put1; } From 0e124e19ce52d20b28ee9f1d5cdb22e2106bfd29 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 3 Dec 2020 10:34:32 +0000 Subject: [PATCH 202/296] drm/i915/gem: Propagate error from cancelled submit due to context closure In the course of discovering and closing many races with context closure and execbuf submission, since commit 61231f6bd056 ("drm/i915/gem: Check that the context wasn't closed during setup") we started checking that the context was not closed by another userspace thread during the execbuf ioctl. In doing so we cancelled the inflight request (by telling it to be skipped), but kept reporting success since we do submit a request, albeit one that doesn't execute. As the error is known before we return from the ioctl, we can report the error we detect immediately, rather than leave it on the fence status. With the immediate propagation of the error, it is easier for userspace to handle. Fixes: 61231f6bd056 ("drm/i915/gem: Check that the context wasn't closed during setup") Testcase: igt/gem_ctx_exec/basic-close-race Signed-off-by: Chris Wilson Cc: # v5.7+ Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20201203103432.31526-1-chris@chris-wilson.co.uk (cherry picked from commit ba38b79eaeaeed29d2383f122d5c711ebf5ed3d1) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 1904e6e5ea64..b07dc1156a0e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -3097,7 +3097,7 @@ static void retire_requests(struct intel_timeline *tl, struct i915_request *end) break; } -static void eb_request_add(struct i915_execbuffer *eb) +static int eb_request_add(struct i915_execbuffer *eb, int err) { struct i915_request *rq = eb->request; struct intel_timeline * const tl = i915_request_timeline(rq); @@ -3118,6 +3118,7 @@ static void eb_request_add(struct i915_execbuffer *eb) /* Serialise with context_close via the add_to_timeline */ i915_request_set_error_once(rq, -ENOENT); __i915_request_skip(rq); + err = -ENOENT; /* override any transient errors */ } __i915_request_queue(rq, &attr); @@ -3127,6 +3128,8 @@ static void eb_request_add(struct i915_execbuffer *eb) retire_requests(tl, prev); mutex_unlock(&tl->mutex); + + return err; } static const i915_user_extension_fn execbuf_extensions[] = { @@ -3332,7 +3335,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, err = eb_submit(&eb, batch); err_request: i915_request_get(eb.request); - eb_request_add(&eb); + err = eb_request_add(&eb, err); if (eb.fences) signal_fence_array(&eb); From 5419d93ffd774127b195b8543b063b2b4fa5aea9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 4 Dec 2020 15:12:31 +0000 Subject: [PATCH 203/296] drm/i915/gt: Ignore repeated attempts to suspend request flow across reset Before reseting the engine, we suspend the execution of the guilty request, so that we can continue execution with a new context while we slowly compress the captured error state for the guilty context. However, if the reset fails, we will promptly attempt to reset the same request again, and discover the ongoing capture. Ignore the second attempt to suspend and capture the same request. Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/1168 Fixes: 32ff621fd744 ("drm/i915/gt: Allow temporary suspension of inflight requests") Signed-off-by: Chris Wilson Cc: # v5.7+ Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20201204151234.19729-1-chris@chris-wilson.co.uk (cherry picked from commit b969540500bce60cf1cdfff5464388af32b9a553) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_lrc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 0952bf157234..03fd1fa42a75 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -2788,6 +2788,9 @@ static void __execlists_hold(struct i915_request *rq) static bool execlists_hold(struct intel_engine_cs *engine, struct i915_request *rq) { + if (i915_request_on_hold(rq)) + return false; + spin_lock_irq(&engine->active.lock); if (i915_request_completed(rq)) { /* too late! */ From 0fe8bf4d3edce7aad6c14b9d5d92ff54dc19f0ba Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 4 Dec 2020 15:12:32 +0000 Subject: [PATCH 204/296] drm/i915/gt: Cancel the preemption timeout on responding to it We currently presume that the engine reset is successful, cancelling the expired preemption timer in the process. However, engine resets can fail, leaving the timeout still pending and we will then respond to the timeout again next time the tasklet fires. What we want is for the failed engine reset to be promoted to a full device reset, which is kicked by the heartbeat once the engine stops processing events. Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/1168 Fixes: 3a7a92aba8fb ("drm/i915/execlists: Force preemption") Signed-off-by: Chris Wilson Cc: # v5.5+ Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20201204151234.19729-2-chris@chris-wilson.co.uk (cherry picked from commit d997e240ceecb4f732611985d3a939ad1bfc1893) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_lrc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 03fd1fa42a75..724b2cb897d3 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -3172,8 +3172,10 @@ static void execlists_submission_tasklet(unsigned long data) spin_unlock_irqrestore(&engine->active.lock, flags); /* Recheck after serialising with direct-submission */ - if (unlikely(timeout && preempt_timeout(engine))) + if (unlikely(timeout && preempt_timeout(engine))) { + cancel_timer(&engine->execlists.preempt); execlists_reset(engine, "preemption time out"); + } } } From 88c52d805eb61da99aa4607fb5131f41c0ff6bd4 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 2 Oct 2020 18:03:54 +0100 Subject: [PATCH 205/296] drm/i915: fix size_t greater or equal to zero comparison Currently the check that the unsigned size_t variable i is >= 0 is always true because the unsigned variable will never be negative, causing the loop to run forever. Fix this by changing the pre-decrement check to a zero check on i followed by a decrement of i. Addresses-Coverity: ("Unsigned compared against 0") Fixes: bfed6708d6c9 ("drm/i915: use vmap in shmem_pin_map") Signed-off-by: Colin Ian King Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20201002170354.94627-1-colin.king@canonical.com (cherry picked from commit e70956a2498dc81d8f2522cba074f55ae910e13c) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/shmem_utils.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/shmem_utils.c b/drivers/gpu/drm/i915/gt/shmem_utils.c index 463af675fadd..5982b62f913d 100644 --- a/drivers/gpu/drm/i915/gt/shmem_utils.c +++ b/drivers/gpu/drm/i915/gt/shmem_utils.c @@ -73,7 +73,7 @@ void *shmem_pin_map(struct file *file) mapping_set_unevictable(file->f_mapping); return vaddr; err_page: - while (--i >= 0) + while (i--) put_page(pages[i]); kvfree(pages); return NULL; From f6cbe49be65ed800863ac5ba695555057363f9c2 Mon Sep 17 00:00:00 2001 From: Manasi Navare Date: Fri, 4 Dec 2020 12:58:04 -0800 Subject: [PATCH 206/296] drm/i915/display/dp: Compute the correct slice count for VDSC on DP This patch fixes the slice count computation algorithm for calculating the slice count based on Peak pixel rate and the max slice width allowed on the DSC engines. We need to ensure slice count > min slice count req as per DP spec based on peak pixel rate and that it is greater than min slice count based on the max slice width advertised by DPCD. So use max of these two. In the prev patch we were using min of these 2 causing it to violate the max slice width limitation causing a blank screen on 8K@60. Fixes: d9218c8f6cf4 ("drm/i915/dp: Add helpers for Compressed BPP and Slice Count for DSC") Cc: Ankit Nautiyal Cc: Jani Nikula Cc: # v5.0+ Signed-off-by: Manasi Navare Reviewed-by: Ankit Nautiyal Link: https://patchwork.freedesktop.org/patch/msgid/20201204205804.25225-1-manasi.d.navare@intel.com (cherry picked from commit d371d6ea92ad2a47f42bbcaa786ee5f6069c9c14) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_dp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index bf1e9cf1c0f3..9bc59fd2f95f 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -573,7 +573,7 @@ static u8 intel_dp_dsc_get_slice_count(struct intel_dp *intel_dp, return 0; } /* Also take into account max slice width */ - min_slice_count = min_t(u8, min_slice_count, + min_slice_count = max_t(u8, min_slice_count, DIV_ROUND_UP(mode_hdisplay, max_slice_width)); From 7c5c15dffe1e3c42f44735ce9552afb7207f1584 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 27 Nov 2020 10:25:40 +0000 Subject: [PATCH 207/296] drm/i915/gt: Declare gen9 has 64 mocs entries! We checked the table size against a hardcoded number of entries, and that number was excluding the special mocs registers at the end. Fixes: 777a7717d60c ("drm/i915/gt: Program mocs:63 for cache eviction on gen9") Signed-off-by: Chris Wilson Cc: # v4.3+ Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20201127102540.13117-1-chris@chris-wilson.co.uk (cherry picked from commit 444fbf5d7058099447c5366ba8bb60d610aeb44b) Signed-off-by: Rodrigo Vivi [backported and updated the Fixes sha] --- drivers/gpu/drm/i915/gt/intel_mocs.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 4f74706967fd..413dadfac2d1 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -59,8 +59,7 @@ struct drm_i915_mocs_table { #define _L3_CACHEABILITY(value) ((value) << 4) /* Helper defines */ -#define GEN9_NUM_MOCS_ENTRIES 62 /* 62 out of 64 - 63 & 64 are reserved. */ -#define GEN11_NUM_MOCS_ENTRIES 64 /* 63-64 are reserved, but configured. */ +#define GEN9_NUM_MOCS_ENTRIES 64 /* 63-64 are reserved, but configured. */ /* (e)LLC caching options */ /* @@ -328,11 +327,11 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915, if (INTEL_GEN(i915) >= 12) { table->size = ARRAY_SIZE(tgl_mocs_table); table->table = tgl_mocs_table; - table->n_entries = GEN11_NUM_MOCS_ENTRIES; + table->n_entries = GEN9_NUM_MOCS_ENTRIES; } else if (IS_GEN(i915, 11)) { table->size = ARRAY_SIZE(icl_mocs_table); table->table = icl_mocs_table; - table->n_entries = GEN11_NUM_MOCS_ENTRIES; + table->n_entries = GEN9_NUM_MOCS_ENTRIES; } else if (IS_GEN9_BC(i915) || IS_CANNONLAKE(i915)) { table->size = ARRAY_SIZE(skl_mocs_table); table->n_entries = GEN9_NUM_MOCS_ENTRIES; From 932c60558109a9131e54dacfda6070147fd1cdfb Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 4 Dec 2020 15:20:01 -0800 Subject: [PATCH 208/296] tools/bpftool: Fix PID fetching with a lot of results In case of having so many PID results that they don't fit into a singe page (4096) bytes, bpftool will erroneously conclude that it got corrupted data due to 4096 not being a multiple of struct pid_iter_entry, so the last entry will be partially truncated. Fix this by sizing the buffer to fit exactly N entries with no truncation in the middle of record. Fixes: d53dee3fe013 ("tools/bpftool: Show info for processes holding BPF map/prog/link/btf FDs") Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20201204232002.3589803-1-andrii@kernel.org --- tools/bpf/bpftool/pids.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/bpf/bpftool/pids.c b/tools/bpf/bpftool/pids.c index df7d8ec76036..477e55d59c34 100644 --- a/tools/bpf/bpftool/pids.c +++ b/tools/bpf/bpftool/pids.c @@ -89,9 +89,9 @@ libbpf_print_none(__maybe_unused enum libbpf_print_level level, int build_obj_refs_table(struct obj_refs_table *table, enum bpf_obj_type type) { - char buf[4096]; - struct pid_iter_bpf *skel; struct pid_iter_entry *e; + char buf[4096 / sizeof(*e) * sizeof(*e)]; + struct pid_iter_bpf *skel; int err, ret, fd = -1, i; libbpf_print_fn_t default_print; From 007ab5345545aba2f9cbe4c096cc35d2fd3275ac Mon Sep 17 00:00:00 2001 From: Jarod Wilson Date: Sat, 5 Dec 2020 12:22:29 -0500 Subject: [PATCH 209/296] bonding: fix feature flag setting at init time Don't try to adjust XFRM support flags if the bond device isn't yet registered. Bad things can currently happen when netdev_change_features() is called without having wanted_features fully filled in yet. This code runs both on post-module-load mode changes, as well as at module init time, and when run at module init time, it is before register_netdevice() has been called and filled in wanted_features. The empty wanted_features led to features also getting emptied out, which was definitely not the intended behavior, so prevent that from happening. Originally, I'd hoped to stop adjusting wanted_features at all in the bonding driver, as it's documented as being something only the network core should touch, but we actually do need to do this to properly update both the features and wanted_features fields when changing the bond type, or we get to a situation where ethtool sees: esp-hw-offload: off [requested on] I do think we should be using netdev_update_features instead of netdev_change_features here though, so we only send notifiers when the features actually changed. Fixes: a3b658cfb664 ("bonding: allow xfrm offload setup post-module-load") Reported-by: Ivan Vecera Suggested-by: Ivan Vecera Cc: Jay Vosburgh Cc: Veaceslav Falico Cc: Andy Gospodarek Signed-off-by: Jarod Wilson Link: https://lore.kernel.org/r/20201205172229.576587-1-jarod@redhat.com Signed-off-by: Jakub Kicinski --- drivers/net/bonding/bond_options.c | 22 +++++++++++++++------- include/net/bonding.h | 2 -- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index 9abfaae1c6f7..a4e4e15f574d 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -745,6 +745,19 @@ const struct bond_option *bond_opt_get(unsigned int option) return &bond_opts[option]; } +static void bond_set_xfrm_features(struct net_device *bond_dev, u64 mode) +{ + if (!IS_ENABLED(CONFIG_XFRM_OFFLOAD)) + return; + + if (mode == BOND_MODE_ACTIVEBACKUP) + bond_dev->wanted_features |= BOND_XFRM_FEATURES; + else + bond_dev->wanted_features &= ~BOND_XFRM_FEATURES; + + netdev_update_features(bond_dev); +} + static int bond_option_mode_set(struct bonding *bond, const struct bond_opt_value *newval) { @@ -767,13 +780,8 @@ static int bond_option_mode_set(struct bonding *bond, if (newval->value == BOND_MODE_ALB) bond->params.tlb_dynamic_lb = 1; -#ifdef CONFIG_XFRM_OFFLOAD - if (newval->value == BOND_MODE_ACTIVEBACKUP) - bond->dev->wanted_features |= BOND_XFRM_FEATURES; - else - bond->dev->wanted_features &= ~BOND_XFRM_FEATURES; - netdev_change_features(bond->dev); -#endif /* CONFIG_XFRM_OFFLOAD */ + if (bond->dev->reg_state == NETREG_REGISTERED) + bond_set_xfrm_features(bond->dev, newval->value); /* don't cache arp_validate between modes */ bond->params.arp_validate = BOND_ARP_VALIDATE_NONE; diff --git a/include/net/bonding.h b/include/net/bonding.h index d9d0ff3b0ad3..adc3da776970 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -86,10 +86,8 @@ #define bond_for_each_slave_rcu(bond, pos, iter) \ netdev_for_each_lower_private_rcu((bond)->dev, pos, iter) -#ifdef CONFIG_XFRM_OFFLOAD #define BOND_XFRM_FEATURES (NETIF_F_HW_ESP | NETIF_F_HW_ESP_TX_CSUM | \ NETIF_F_GSO_ESP) -#endif /* CONFIG_XFRM_OFFLOAD */ #ifdef CONFIG_NET_POLL_CONTROLLER extern atomic_t netpoll_block_tx; From 917d80d376ffbaa9725fde9e3c0282f63643f278 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 8 Dec 2020 18:25:53 +0100 Subject: [PATCH 210/296] netfilter: nft_dynset: fix timeouts later than 23 days Use nf_msecs_to_jiffies64 and nf_jiffies64_to_msecs as provided by 8e1102d5a159 ("netfilter: nf_tables: support timeouts larger than 23 days"), otherwise ruleset listing breaks. Fixes: a8b1e36d0d1d ("netfilter: nft_dynset: fix element timeout for HZ != 1000") Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 4 ++++ net/netfilter/nf_tables_api.c | 4 ++-- net/netfilter/nft_dynset.c | 8 +++++--- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 55b4cadf290a..c1c0a4ff92ae 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1524,4 +1524,8 @@ void __init nft_chain_route_init(void); void nft_chain_route_fini(void); void nf_tables_trans_destroy_flush_work(void); + +int nf_msecs_to_jiffies64(const struct nlattr *nla, u64 *result); +__be64 nf_jiffies64_to_msecs(u64 input); + #endif /* _NET_NF_TABLES_H */ diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 23abf1578594..c2f59879a48d 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3719,7 +3719,7 @@ cont: return 0; } -static int nf_msecs_to_jiffies64(const struct nlattr *nla, u64 *result) +int nf_msecs_to_jiffies64(const struct nlattr *nla, u64 *result) { u64 ms = be64_to_cpu(nla_get_be64(nla)); u64 max = (u64)(~((u64)0)); @@ -3733,7 +3733,7 @@ static int nf_msecs_to_jiffies64(const struct nlattr *nla, u64 *result) return 0; } -static __be64 nf_jiffies64_to_msecs(u64 input) +__be64 nf_jiffies64_to_msecs(u64 input) { return cpu_to_be64(jiffies64_to_msecs(input)); } diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 64ca13a1885b..9af4f93c7f0e 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -157,8 +157,10 @@ static int nft_dynset_init(const struct nft_ctx *ctx, if (tb[NFTA_DYNSET_TIMEOUT] != NULL) { if (!(set->flags & NFT_SET_TIMEOUT)) return -EINVAL; - timeout = msecs_to_jiffies(be64_to_cpu(nla_get_be64( - tb[NFTA_DYNSET_TIMEOUT]))); + + err = nf_msecs_to_jiffies64(tb[NFTA_DYNSET_TIMEOUT], &timeout); + if (err) + return err; } priv->sreg_key = nft_parse_register(tb[NFTA_DYNSET_SREG_KEY]); @@ -267,7 +269,7 @@ static int nft_dynset_dump(struct sk_buff *skb, const struct nft_expr *expr) if (nla_put_string(skb, NFTA_DYNSET_SET_NAME, priv->set->name)) goto nla_put_failure; if (nla_put_be64(skb, NFTA_DYNSET_TIMEOUT, - cpu_to_be64(jiffies_to_msecs(priv->timeout)), + nf_jiffies64_to_msecs(priv->timeout), NFTA_DYNSET_PAD)) goto nla_put_failure; if (priv->expr && nft_expr_dump(skb, NFTA_DYNSET_EXPR, priv->expr)) From 42f1c27120906a54e73101a7d6a12f58813f6a9f Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 8 Dec 2020 18:57:02 +0100 Subject: [PATCH 211/296] netfilter: nftables: comment indirect serialization of commit_mutex with rtnl_mutex Add an explicit comment in the code to describe the indirect serialization of the holders of the commit_mutex with the rtnl_mutex. Commit 90d2723c6d4c ("netfilter: nf_tables: do not hold reference on netdevice from preparation phase") already describes this, but a comment in this case is better for reference. Reported-by: Vladimir Oltean Reviewed-by: Vladimir Oltean Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index c2f59879a48d..9a080767667b 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1723,6 +1723,10 @@ static struct nft_hook *nft_netdev_hook_alloc(struct net *net, } nla_strlcpy(ifname, attr, IFNAMSIZ); + /* nf_tables_netdev_event() is called under rtnl_mutex, this is + * indirectly serializing all the other holders of the commit_mutex with + * the rtnl_mutex. + */ dev = __dev_get_by_name(net, ifname); if (!dev) { err = -ENOENT; From 6220e48d9640538ff700f2e7d24c2f9277555fd6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 8 Dec 2020 16:37:47 -0500 Subject: [PATCH 212/296] [regression fix] really dumb fuckup in sparc64 __csum_partial_copy() changes ~0U is -1, not 1 Reported-by: Anatoly Pugachev Tested-by: Anatoly Pugachev Fixes: fdf8bee96f9a "sparc64: propagate the calling convention changes down to __csum_partial_copy_...()" X-brown-paperbag: yes Signed-off-by: Al Viro --- arch/sparc/lib/csum_copy.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sparc/lib/csum_copy.S b/arch/sparc/lib/csum_copy.S index 0c0268e77155..d839956407a7 100644 --- a/arch/sparc/lib/csum_copy.S +++ b/arch/sparc/lib/csum_copy.S @@ -71,7 +71,7 @@ FUNC_NAME: /* %o0=src, %o1=dst, %o2=len */ LOAD(prefetch, %o0 + 0x000, #n_reads) xor %o0, %o1, %g1 - mov 1, %o3 + mov -1, %o3 clr %o4 andcc %g1, 0x3, %g0 bne,pn %icc, 95f From 9d14edfdeabf37d8d8f045e63e5873712aadcd6b Mon Sep 17 00:00:00 2001 From: Fugang Duan Date: Mon, 7 Dec 2020 18:51:37 +0800 Subject: [PATCH 213/296] net: stmmac: increase the timeout for dma reset Current timeout value is not enough for gmac5 dma reset on imx8mp platform, increase the timeout range. Signed-off-by: Fugang Duan Signed-off-by: Joakim Zhang Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c index 6e30d7eb4983..0b4ee2dbb691 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c @@ -22,7 +22,7 @@ int dwmac4_dma_reset(void __iomem *ioaddr) return readl_poll_timeout(ioaddr + DMA_BUS_MODE, value, !(value & DMA_BUS_MODE_SFT_RESET), - 10000, 100000); + 10000, 1000000); } void dwmac4_set_rx_tail_ptr(void __iomem *ioaddr, u32 tail_ptr, u32 chan) From 36d18b5664ef617ccf4da266291d2f2342fab89d Mon Sep 17 00:00:00 2001 From: Fugang Duan Date: Mon, 7 Dec 2020 18:51:38 +0800 Subject: [PATCH 214/296] net: stmmac: start phylink instance before stmmac_hw_setup() Start phylink instance and resume back the PHY to supply RX clock to MAC before MAC layer initialization by calling .stmmac_hw_setup(), since DMA reset depends on the RX clock, otherwise DMA reset cost maximum timeout value then finally timeout. Fixes: 74371272f97f ("net: stmmac: Convert to phylink and remove phylib logic") Signed-off-by: Fugang Duan Signed-off-by: Joakim Zhang Signed-off-by: David S. Miller --- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index ba45fe237512..0cef414f1289 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -5247,6 +5247,14 @@ int stmmac_resume(struct device *dev) return ret; } + if (!device_may_wakeup(priv->device) || !priv->plat->pmt) { + rtnl_lock(); + phylink_start(priv->phylink); + /* We may have called phylink_speed_down before */ + phylink_speed_up(priv->phylink); + rtnl_unlock(); + } + rtnl_lock(); mutex_lock(&priv->lock); @@ -5265,14 +5273,6 @@ int stmmac_resume(struct device *dev) mutex_unlock(&priv->lock); rtnl_unlock(); - if (!device_may_wakeup(priv->device) || !priv->plat->pmt) { - rtnl_lock(); - phylink_start(priv->phylink); - /* We may have called phylink_speed_down before */ - phylink_speed_up(priv->phylink); - rtnl_unlock(); - } - phylink_mac_change(priv->phylink, true); netif_device_attach(ndev); From 4ec236c7c51f89abb0224a4da4a6b77f9beb6600 Mon Sep 17 00:00:00 2001 From: Fugang Duan Date: Mon, 7 Dec 2020 18:51:39 +0800 Subject: [PATCH 215/296] net: stmmac: free tx skb buffer in stmmac_resume() When do suspend/resume test, there have WARN_ON() log dump from stmmac_xmit() funciton, the code logic: entry = tx_q->cur_tx; first_entry = entry; WARN_ON(tx_q->tx_skbuff[first_entry]); In normal case, tx_q->tx_skbuff[txq->cur_tx] should be NULL because the skb should be handled and freed in stmmac_tx_clean(). But stmmac_resume() reset queue parameters like below, skb buffers may not be freed. tx_q->cur_tx = 0; tx_q->dirty_tx = 0; So free tx skb buffer in stmmac_resume() to avoid warning and memory leak. log: [ 46.139824] ------------[ cut here ]------------ [ 46.144453] WARNING: CPU: 0 PID: 0 at drivers/net/ethernet/stmicro/stmmac/stmmac_main.c:3235 stmmac_xmit+0x7a0/0x9d0 [ 46.154969] Modules linked in: crct10dif_ce vvcam(O) flexcan can_dev [ 46.161328] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G O 5.4.24-2.1.0+g2ad925d15481 #1 [ 46.170369] Hardware name: NXP i.MX8MPlus EVK board (DT) [ 46.175677] pstate: 80000005 (Nzcv daif -PAN -UAO) [ 46.180465] pc : stmmac_xmit+0x7a0/0x9d0 [ 46.184387] lr : dev_hard_start_xmit+0x94/0x158 [ 46.188913] sp : ffff800010003cc0 [ 46.192224] x29: ffff800010003cc0 x28: ffff000177e2a100 [ 46.197533] x27: ffff000176ef0840 x26: ffff000176ef0090 [ 46.202842] x25: 0000000000000000 x24: 0000000000000000 [ 46.208151] x23: 0000000000000003 x22: ffff8000119ddd30 [ 46.213460] x21: ffff00017636f000 x20: ffff000176ef0cc0 [ 46.218769] x19: 0000000000000003 x18: 0000000000000000 [ 46.224078] x17: 0000000000000000 x16: 0000000000000000 [ 46.229386] x15: 0000000000000079 x14: 0000000000000000 [ 46.234695] x13: 0000000000000003 x12: 0000000000000003 [ 46.240003] x11: 0000000000000010 x10: 0000000000000010 [ 46.245312] x9 : ffff00017002b140 x8 : 0000000000000000 [ 46.250621] x7 : ffff00017636f000 x6 : 0000000000000010 [ 46.255930] x5 : 0000000000000001 x4 : ffff000176ef0000 [ 46.261238] x3 : 0000000000000003 x2 : 00000000ffffffff [ 46.266547] x1 : ffff000177e2a000 x0 : 0000000000000000 [ 46.271856] Call trace: [ 46.274302] stmmac_xmit+0x7a0/0x9d0 [ 46.277874] dev_hard_start_xmit+0x94/0x158 [ 46.282056] sch_direct_xmit+0x11c/0x338 [ 46.285976] __qdisc_run+0x118/0x5f0 [ 46.289549] net_tx_action+0x110/0x198 [ 46.293297] __do_softirq+0x120/0x23c [ 46.296958] irq_exit+0xb8/0xd8 [ 46.300098] __handle_domain_irq+0x64/0xb8 [ 46.304191] gic_handle_irq+0x5c/0x148 [ 46.307936] el1_irq+0xb8/0x180 [ 46.311076] cpuidle_enter_state+0x84/0x360 [ 46.315256] cpuidle_enter+0x34/0x48 [ 46.318829] call_cpuidle+0x18/0x38 [ 46.322314] do_idle+0x1e0/0x280 [ 46.325539] cpu_startup_entry+0x24/0x40 [ 46.329460] rest_init+0xd4/0xe0 [ 46.332687] arch_call_rest_init+0xc/0x14 [ 46.336695] start_kernel+0x420/0x44c [ 46.340353] ---[ end trace bc1ee695123cbacd ]--- Fixes: 47dd7a540b8a0 ("net: add support for STMicroelectronics Ethernet controllers.") Signed-off-by: Fugang Duan Signed-off-by: Joakim Zhang Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 0cef414f1289..7452f3c1cab9 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1533,6 +1533,19 @@ static void dma_free_tx_skbufs(struct stmmac_priv *priv, u32 queue) stmmac_free_tx_buffer(priv, queue, i); } +/** + * stmmac_free_tx_skbufs - free TX skb buffers + * @priv: private structure + */ +static void stmmac_free_tx_skbufs(struct stmmac_priv *priv) +{ + u32 tx_queue_cnt = priv->plat->tx_queues_to_use; + u32 queue; + + for (queue = 0; queue < tx_queue_cnt; queue++) + dma_free_tx_skbufs(priv, queue); +} + /** * free_dma_rx_desc_resources - free RX dma desc resources * @priv: private structure @@ -5260,6 +5273,7 @@ int stmmac_resume(struct device *dev) stmmac_reset_queues_param(priv); + stmmac_free_tx_skbufs(priv); stmmac_clear_descriptors(priv); stmmac_hw_setup(ndev, false); From 5f58591323bf3f342920179f24515935c4b5fd60 Mon Sep 17 00:00:00 2001 From: Fugang Duan Date: Mon, 7 Dec 2020 18:51:40 +0800 Subject: [PATCH 216/296] net: stmmac: delete the eee_ctrl_timer after napi disabled There have chance to re-enable the eee_ctrl_timer and fire the timer in napi callback after delete the timer in .stmmac_release(), which introduces to access eee registers in the timer function after clocks are disabled then causes system hang. Found this issue when do suspend/resume and reboot stress test. It is safe to delete the timer after napi disabled and disable lpi mode. Fixes: d765955d2ae0b ("stmmac: add the Energy Efficient Ethernet support") Signed-off-by: Fugang Duan Signed-off-by: Joakim Zhang Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 7452f3c1cab9..d2521ebb8217 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2908,9 +2908,6 @@ static int stmmac_release(struct net_device *dev) struct stmmac_priv *priv = netdev_priv(dev); u32 chan; - if (priv->eee_enabled) - del_timer_sync(&priv->eee_ctrl_timer); - if (device_may_wakeup(priv->device)) phylink_speed_down(priv->phylink, false); /* Stop and disconnect the PHY */ @@ -2929,6 +2926,11 @@ static int stmmac_release(struct net_device *dev) if (priv->lpi_irq > 0) free_irq(priv->lpi_irq, dev); + if (priv->eee_enabled) { + priv->tx_path_in_lpi_mode = false; + del_timer_sync(&priv->eee_ctrl_timer); + } + /* Stop TX/RX DMA and clear the descriptors */ stmmac_stop_all_dma(priv); @@ -5155,6 +5157,11 @@ int stmmac_suspend(struct device *dev) for (chan = 0; chan < priv->plat->tx_queues_to_use; chan++) del_timer_sync(&priv->tx_queue[chan].txtimer); + if (priv->eee_enabled) { + priv->tx_path_in_lpi_mode = false; + del_timer_sync(&priv->eee_ctrl_timer); + } + /* Stop TX/RX DMA */ stmmac_stop_all_dma(priv); From f119cc9818eb33b66e977ad3af75aef6500bbdc3 Mon Sep 17 00:00:00 2001 From: Fugang Duan Date: Mon, 7 Dec 2020 18:51:41 +0800 Subject: [PATCH 217/296] net: stmmac: overwrite the dma_cap.addr64 according to HW design The current IP register MAC_HW_Feature1[ADDR64] only defines 32/40/64 bit width, but some SOCs support others like i.MX8MP support 34 bits but it maps to 40 bits width in MAC_HW_Feature1[ADDR64]. So overwrite dma_cap.addr64 according to HW real design. Fixes: 94abdad6974a ("net: ethernet: dwmac: add ethernet glue logic for NXP imx8 chip") Signed-off-by: Fugang Duan Signed-off-by: Joakim Zhang Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c | 9 +-------- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 8 ++++++++ include/linux/stmmac.h | 1 + 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c index efef5476a577..223f69da7e95 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c @@ -246,13 +246,7 @@ static int imx_dwmac_probe(struct platform_device *pdev) goto err_parse_dt; } - ret = dma_set_mask_and_coherent(&pdev->dev, - DMA_BIT_MASK(dwmac->ops->addr_width)); - if (ret) { - dev_err(&pdev->dev, "DMA mask set failed\n"); - goto err_dma_mask; - } - + plat_dat->addr64 = dwmac->ops->addr_width; plat_dat->init = imx_dwmac_init; plat_dat->exit = imx_dwmac_exit; plat_dat->fix_mac_speed = imx_dwmac_fix_speed; @@ -272,7 +266,6 @@ static int imx_dwmac_probe(struct platform_device *pdev) err_dwmac_init: err_drv_probe: imx_dwmac_exit(pdev, plat_dat->bsp_priv); -err_dma_mask: err_parse_dt: err_match_data: stmmac_remove_config_dt(pdev, plat_dat); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index d2521ebb8217..c33db79cdd0a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -4945,6 +4945,14 @@ int stmmac_dvr_probe(struct device *device, dev_info(priv->device, "SPH feature enabled\n"); } + /* The current IP register MAC_HW_Feature1[ADDR64] only define + * 32/40/64 bit width, but some SOC support others like i.MX8MP + * support 34 bits but it map to 40 bits width in MAC_HW_Feature1[ADDR64]. + * So overwrite dma_cap.addr64 according to HW real design. + */ + if (priv->plat->addr64) + priv->dma_cap.addr64 = priv->plat->addr64; + if (priv->dma_cap.addr64) { ret = dma_set_mask_and_coherent(device, DMA_BIT_MASK(priv->dma_cap.addr64)); diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 628e28903b8b..15ca6b4167cc 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -170,6 +170,7 @@ struct plat_stmmacenet_data { int unicast_filter_entries; int tx_fifo_size; int rx_fifo_size; + u32 addr64; u32 rx_queues_to_use; u32 tx_queues_to_use; u8 rx_sched_algorithm; From 1a0e1943d8798cb3241fb5edb9a836af1611b60a Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 8 Dec 2020 15:00:36 -0800 Subject: [PATCH 218/296] Revert "scsi: megaraid_sas: Added support for shared host tagset for cpuhotplug" This reverts commit 103fbf8e4020845e4fcf63819288cedb092a3c91. It turns out that it causes long boot-time latencies (to the point of timeouts and failed boots). The cause is the increase in request queues, and a fix for that is queued up for 5.11, but we're reverting this commit that triggered the problem for now. Reported-and-tested-by: John Garry Reported-and-tested-by: Julia Lawall Reported-by: Qian Cai Acked-by: Jens Axboe Acked-by: Martin K. Petersen Link: https://lore.kernel.org/linux-scsi/fe3dff7dae4494e5a88caffbb4d877bbf472dceb.camel@redhat.com/ Link: https://lore.kernel.org/lkml/alpine.DEB.2.22.394.2012081813310.2680@hadrien/ Link: https://lore.kernel.org/linux-block/20201203012638.543321-1-ming.lei@redhat.com/ Signed-off-by: Linus Torvalds --- drivers/scsi/megaraid/megaraid_sas_base.c | 39 --------------------- drivers/scsi/megaraid/megaraid_sas_fusion.c | 29 +++++++-------- 2 files changed, 13 insertions(+), 55 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 41cd66fc7d81..e158d3d62056 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -37,7 +37,6 @@ #include #include #include -#include #include #include @@ -114,10 +113,6 @@ unsigned int enable_sdev_max_qd; module_param(enable_sdev_max_qd, int, 0444); MODULE_PARM_DESC(enable_sdev_max_qd, "Enable sdev max qd as can_queue. Default: 0"); -int host_tagset_enable = 1; -module_param(host_tagset_enable, int, 0444); -MODULE_PARM_DESC(host_tagset_enable, "Shared host tagset enable/disable Default: enable(1)"); - MODULE_LICENSE("GPL"); MODULE_VERSION(MEGASAS_VERSION); MODULE_AUTHOR("megaraidlinux.pdl@broadcom.com"); @@ -3124,19 +3119,6 @@ megasas_bios_param(struct scsi_device *sdev, struct block_device *bdev, return 0; } -static int megasas_map_queues(struct Scsi_Host *shost) -{ - struct megasas_instance *instance; - - instance = (struct megasas_instance *)shost->hostdata; - - if (shost->nr_hw_queues == 1) - return 0; - - return blk_mq_pci_map_queues(&shost->tag_set.map[HCTX_TYPE_DEFAULT], - instance->pdev, instance->low_latency_index_start); -} - static void megasas_aen_polling(struct work_struct *work); /** @@ -3445,7 +3427,6 @@ static struct scsi_host_template megasas_template = { .eh_timed_out = megasas_reset_timer, .shost_attrs = megaraid_host_attrs, .bios_param = megasas_bios_param, - .map_queues = megasas_map_queues, .change_queue_depth = scsi_change_queue_depth, .max_segment_size = 0xffffffff, }; @@ -6827,26 +6808,6 @@ static int megasas_io_attach(struct megasas_instance *instance) host->max_lun = MEGASAS_MAX_LUN; host->max_cmd_len = 16; - /* Use shared host tagset only for fusion adaptors - * if there are managed interrupts (smp affinity enabled case). - * Single msix_vectors in kdump, so shared host tag is also disabled. - */ - - host->host_tagset = 0; - host->nr_hw_queues = 1; - - if ((instance->adapter_type != MFI_SERIES) && - (instance->msix_vectors > instance->low_latency_index_start) && - host_tagset_enable && - instance->smp_affinity_enable) { - host->host_tagset = 1; - host->nr_hw_queues = instance->msix_vectors - - instance->low_latency_index_start; - } - - dev_info(&instance->pdev->dev, - "Max firmware commands: %d shared with nr_hw_queues = %d\n", - instance->max_fw_cmds, host->nr_hw_queues); /* * Notify the mid-layer about the new controller */ diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c index fd607287608e..b0c01cf0428f 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.c +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c @@ -359,29 +359,24 @@ megasas_get_msix_index(struct megasas_instance *instance, { int sdev_busy; - /* TBD - if sml remove device_busy in future, driver - * should track counter in internal structure. - */ - sdev_busy = atomic_read(&scmd->device->device_busy); + /* nr_hw_queue = 1 for MegaRAID */ + struct blk_mq_hw_ctx *hctx = + scmd->device->request_queue->queue_hw_ctx[0]; + + sdev_busy = atomic_read(&hctx->nr_active); if (instance->perf_mode == MR_BALANCED_PERF_MODE && - sdev_busy > (data_arms * MR_DEVICE_HIGH_IOPS_DEPTH)) { + sdev_busy > (data_arms * MR_DEVICE_HIGH_IOPS_DEPTH)) cmd->request_desc->SCSIIO.MSIxIndex = mega_mod64((atomic64_add_return(1, &instance->high_iops_outstanding) / MR_HIGH_IOPS_BATCH_COUNT), instance->low_latency_index_start); - } else if (instance->msix_load_balance) { + else if (instance->msix_load_balance) cmd->request_desc->SCSIIO.MSIxIndex = (mega_mod64(atomic64_add_return(1, &instance->total_io_count), instance->msix_vectors)); - } else if (instance->host->nr_hw_queues > 1) { - u32 tag = blk_mq_unique_tag(scmd->request); - - cmd->request_desc->SCSIIO.MSIxIndex = blk_mq_unique_tag_to_hwq(tag) + - instance->low_latency_index_start; - } else { + else cmd->request_desc->SCSIIO.MSIxIndex = instance->reply_map[raw_smp_processor_id()]; - } } /** @@ -961,6 +956,9 @@ megasas_alloc_cmds_fusion(struct megasas_instance *instance) if (megasas_alloc_cmdlist_fusion(instance)) goto fail_exit; + dev_info(&instance->pdev->dev, "Configured max firmware commands: %d\n", + instance->max_fw_cmds); + /* The first 256 bytes (SMID 0) is not used. Don't add to the cmd list */ io_req_base = fusion->io_request_frames + MEGA_MPI2_RAID_DEFAULT_IO_FRAME_SIZE; io_req_base_phys = fusion->io_request_frames_phys + MEGA_MPI2_RAID_DEFAULT_IO_FRAME_SIZE; @@ -1104,9 +1102,8 @@ megasas_ioc_init_fusion(struct megasas_instance *instance) MR_HIGH_IOPS_QUEUE_COUNT) && cur_intr_coalescing) instance->perf_mode = MR_BALANCED_PERF_MODE; - dev_info(&instance->pdev->dev, "Performance mode :%s (latency index = %d)\n", - MEGASAS_PERF_MODE_2STR(instance->perf_mode), - instance->low_latency_index_start); + dev_info(&instance->pdev->dev, "Performance mode :%s\n", + MEGASAS_PERF_MODE_2STR(instance->perf_mode)); instance->fw_sync_cache_support = (scratch_pad_1 & MR_CAN_HANDLE_SYNC_CACHE_OFFSET) ? 1 : 0; From 0398ba9e5a4b5675aa571e0445689d3c2e499c2d Mon Sep 17 00:00:00 2001 From: Cengiz Can Date: Mon, 7 Dec 2020 11:14:24 +0300 Subject: [PATCH 219/296] net: tipc: prevent possible null deref of link `tipc_node_apply_property` does a null check on a `tipc_link_entry` pointer but also accesses the same pointer out of the null check block. This triggers a warning on Coverity Static Analyzer because we're implying that `e->link` can BE null. Move "Update MTU for node link entry" line into if block to make sure that we're not in a state that `e->link` is null. Signed-off-by: Cengiz Can Signed-off-by: David S. Miller --- net/tipc/node.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/tipc/node.c b/net/tipc/node.c index c95d037fde51..83978d5dae59 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -2181,9 +2181,11 @@ void tipc_node_apply_property(struct net *net, struct tipc_bearer *b, &xmitq); else if (prop == TIPC_NLA_PROP_MTU) tipc_link_set_mtu(e->link, b->mtu); + + /* Update MTU for node link entry */ + e->mtu = tipc_link_mss(e->link); } - /* Update MTU for node link entry */ - e->mtu = tipc_link_mss(e->link); + tipc_node_write_unlock(n); tipc_bearer_xmit(net, bearer_id, &xmitq, &e->maddr, NULL); } From 4cb682964706deffb4861f0a91329ab3a705039f Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 8 Dec 2020 23:52:03 +0000 Subject: [PATCH 220/296] afs: Fix memory leak when mounting with multiple source parameters There's a memory leak in afs_parse_source() whereby multiple source= parameters overwrite fc->source in the fs_context struct without freeing the previously recorded source. Fix this by only permitting a single source parameter and rejecting with an error all subsequent ones. This was caught by syzbot with the kernel memory leak detector, showing something like the following trace: unreferenced object 0xffff888114375440 (size 32): comm "repro", pid 5168, jiffies 4294923723 (age 569.948s) backtrace: slab_post_alloc_hook+0x42/0x79 __kmalloc_track_caller+0x125/0x16a kmemdup_nul+0x24/0x3c vfs_parse_fs_string+0x5a/0xa1 generic_parse_monolithic+0x9d/0xc5 do_new_mount+0x10d/0x15a do_mount+0x5f/0x8e __do_sys_mount+0xff/0x127 do_syscall_64+0x2d/0x3a entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: 13fcc6837049 ("afs: Add fs_context support") Reported-by: syzbot+86dc6632faaca40133ab@syzkaller.appspotmail.com Signed-off-by: David Howells cc: Randy Dunlap Signed-off-by: Linus Torvalds --- fs/afs/super.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/afs/super.c b/fs/afs/super.c index 6c5900df6aa5..e38bb1e7a4d2 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -230,6 +230,9 @@ static int afs_parse_source(struct fs_context *fc, struct fs_parameter *param) _enter(",%s", name); + if (fc->source) + return invalf(fc, "kAFS: Multiple sources not supported"); + if (!name) { printk(KERN_ERR "kAFS: no volume name specified\n"); return -EINVAL; From cc6596fc7295e9dcd78156ed42f9f8e1221f7530 Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Tue, 8 Dec 2020 09:53:42 +0800 Subject: [PATCH 221/296] net: ll_temac: Fix potential NULL dereference in temac_probe() platform_get_resource() may fail and in this case a NULL dereference will occur. Fix it to use devm_platform_ioremap_resource() instead of calling platform_get_resource() and devm_ioremap(). This is detected by Coccinelle semantic patch. @@ expression pdev, res, n, t, e, e1, e2; @@ res = \(platform_get_resource\|platform_get_resource_byname\)(pdev, t, n); + if (!res) + return -EINVAL; ... when != res == NULL e = devm_ioremap(e1, res->start, e2); Fixes: 8425c41d1ef7 ("net: ll_temac: Extend support to non-device-tree platforms") Signed-off-by: Zhang Changzhong Acked-by: Esben Haabendal Signed-off-by: David S. Miller --- drivers/net/ethernet/xilinx/ll_temac_main.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c index 60c199fcb91e..030185301014 100644 --- a/drivers/net/ethernet/xilinx/ll_temac_main.c +++ b/drivers/net/ethernet/xilinx/ll_temac_main.c @@ -1351,7 +1351,6 @@ static int temac_probe(struct platform_device *pdev) struct device_node *temac_np = dev_of_node(&pdev->dev), *dma_np; struct temac_local *lp; struct net_device *ndev; - struct resource *res; const void *addr; __be32 *p; bool little_endian; @@ -1500,13 +1499,11 @@ static int temac_probe(struct platform_device *pdev) of_node_put(dma_np); } else if (pdata) { /* 2nd memory resource specifies DMA registers */ - res = platform_get_resource(pdev, IORESOURCE_MEM, 1); - lp->sdma_regs = devm_ioremap(&pdev->dev, res->start, - resource_size(res)); - if (!lp->sdma_regs) { + lp->sdma_regs = devm_platform_ioremap_resource(pdev, 1); + if (IS_ERR(lp->sdma_regs)) { dev_err(&pdev->dev, "could not map DMA registers\n"); - return -ENOMEM; + return PTR_ERR(lp->sdma_regs); } if (pdata->dma_little_endian) { lp->dma_in = temac_dma_in32_le; From 72d05c00d7ecda85df29abd046da7e41cc071c17 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 8 Dec 2020 08:21:31 -0800 Subject: [PATCH 222/296] tcp: select sane initial rcvq_space.space for big MSS Before commit a337531b942b ("tcp: up initial rmem to 128KB and SYN rwin to around 64KB") small tcp_rmem[1] values were overridden by tcp_fixup_rcvbuf() to accommodate various MSS. This is no longer the case, and Hazem Mohamed Abuelfotoh reported that DRS would not work for MTU 9000 endpoints receiving regular (1500 bytes) frames. Root cause is that tcp_init_buffer_space() uses tp->rcv_wnd for upper limit of rcvq_space.space computation, while it can select later a smaller value for tp->rcv_ssthresh and tp->window_clamp. ss -temoi on receiver would show : skmem:(r0,rb131072,t0,tb46080,f0,w0,o0,bl0,d0) rcv_space:62496 rcv_ssthresh:56596 This means that TCP can not increase its window in tcp_grow_window(), and that DRS can never kick. Fix this by making sure that rcvq_space.space is not bigger than number of bytes that can be held in TCP receive queue. People unable/unwilling to change their kernel can work around this issue by selecting a bigger tcp_rmem[1] value as in : echo "4096 196608 6291456" >/proc/sys/net/ipv4/tcp_rmem Based on an initial report and patch from Hazem Mohamed Abuelfotoh https://lore.kernel.org/netdev/20201204180622.14285-1-abuehaze@amazon.com/ Fixes: a337531b942b ("tcp: up initial rmem to 128KB and SYN rwin to around 64KB") Fixes: 041a14d26715 ("tcp: start receiver buffer autotuning sooner") Reported-by: Hazem Mohamed Abuelfotoh Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 389d1b340248..ef4bdb038a4b 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -510,7 +510,6 @@ static void tcp_init_buffer_space(struct sock *sk) if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK)) tcp_sndbuf_expand(sk); - tp->rcvq_space.space = min_t(u32, tp->rcv_wnd, TCP_INIT_CWND * tp->advmss); tcp_mstamp_refresh(tp); tp->rcvq_space.time = tp->tcp_mstamp; tp->rcvq_space.seq = tp->copied_seq; @@ -534,6 +533,8 @@ static void tcp_init_buffer_space(struct sock *sk) tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp); tp->snd_cwnd_stamp = tcp_jiffies32; + tp->rcvq_space.space = min3(tp->rcv_ssthresh, tp->rcv_wnd, + (u32)TCP_INIT_CWND * tp->advmss); } /* 4. Recalculate window clamp after socket hit its memory bounds. */ From b62527005d46d52b4733cbc57f2f9b514b673ed9 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Tue, 8 Dec 2020 22:49:00 +0100 Subject: [PATCH 223/296] bpf, doc: Update KP's email in MAINTAINERS Helps me use a single account to sign off and send patches use appropriate email redirection without needing to update MAINTAINERS. Signed-off-by: KP Singh Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20201208214900.80684-1-kpsingh@kernel.org --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 98f0bd050ff5..ba63f61e4ef1 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3237,7 +3237,7 @@ R: Martin KaFai Lau R: Song Liu R: Yonghong Song R: John Fastabend -R: KP Singh +R: KP Singh L: netdev@vger.kernel.org L: bpf@vger.kernel.org S: Supported @@ -3356,7 +3356,7 @@ F: arch/x86/net/ X: arch/x86/net/bpf_jit_comp32.c BPF LSM (Security Audit and Enforcement using BPF) -M: KP Singh +M: KP Singh R: Florent Revest R: Brendan Jackman L: bpf@vger.kernel.org From 0e830d2872cf6e75ef6619edd23050ddf3673358 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 1 Dec 2020 17:44:58 -0500 Subject: [PATCH 224/296] drm/amdgpu/powerplay: parse fan table for CI asics Set up all the parameters required for SMU fan control if supported. Bug: https://bugzilla.kernel.org/show_bug.cgi?id=201539 Acked-by: Evan Quan Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- .../amd/pm/powerplay/hwmgr/processpptables.c | 103 +++++++++++++++++- 1 file changed, 102 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/processpptables.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/processpptables.c index 719597c5d27d..6606511891e3 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/processpptables.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/processpptables.c @@ -24,6 +24,8 @@ #include #include #include +#include + #include #include "processpptables.h" #include @@ -984,6 +986,8 @@ static int init_thermal_controller( struct pp_hwmgr *hwmgr, const ATOM_PPLIB_POWERPLAYTABLE *powerplay_table) { + struct amdgpu_device *adev = hwmgr->adev; + hwmgr->thermal_controller.ucType = powerplay_table->sThermalController.ucType; hwmgr->thermal_controller.ucI2cLine = @@ -1008,7 +1012,104 @@ static int init_thermal_controller( ATOM_PP_THERMALCONTROLLER_NONE != hwmgr->thermal_controller.ucType, PHM_PlatformCaps_ThermalController); - hwmgr->thermal_controller.use_hw_fan_control = 1; + if (powerplay_table->usTableSize >= sizeof(ATOM_PPLIB_POWERPLAYTABLE3)) { + const ATOM_PPLIB_POWERPLAYTABLE3 *powerplay_table3 = + (const ATOM_PPLIB_POWERPLAYTABLE3 *)powerplay_table; + + if (0 == le16_to_cpu(powerplay_table3->usFanTableOffset)) { + hwmgr->thermal_controller.use_hw_fan_control = 1; + return 0; + } else { + const ATOM_PPLIB_FANTABLE *fan_table = + (const ATOM_PPLIB_FANTABLE *)(((unsigned long)powerplay_table) + + le16_to_cpu(powerplay_table3->usFanTableOffset)); + + if (1 <= fan_table->ucFanTableFormat) { + hwmgr->thermal_controller.advanceFanControlParameters.ucTHyst = + fan_table->ucTHyst; + hwmgr->thermal_controller.advanceFanControlParameters.usTMin = + le16_to_cpu(fan_table->usTMin); + hwmgr->thermal_controller.advanceFanControlParameters.usTMed = + le16_to_cpu(fan_table->usTMed); + hwmgr->thermal_controller.advanceFanControlParameters.usTHigh = + le16_to_cpu(fan_table->usTHigh); + hwmgr->thermal_controller.advanceFanControlParameters.usPWMMin = + le16_to_cpu(fan_table->usPWMMin); + hwmgr->thermal_controller.advanceFanControlParameters.usPWMMed = + le16_to_cpu(fan_table->usPWMMed); + hwmgr->thermal_controller.advanceFanControlParameters.usPWMHigh = + le16_to_cpu(fan_table->usPWMHigh); + hwmgr->thermal_controller.advanceFanControlParameters.usTMax = 10900; + hwmgr->thermal_controller.advanceFanControlParameters.ulCycleDelay = 100000; + + phm_cap_set(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_MicrocodeFanControl); + } + + if (2 <= fan_table->ucFanTableFormat) { + const ATOM_PPLIB_FANTABLE2 *fan_table2 = + (const ATOM_PPLIB_FANTABLE2 *)(((unsigned long)powerplay_table) + + le16_to_cpu(powerplay_table3->usFanTableOffset)); + hwmgr->thermal_controller.advanceFanControlParameters.usTMax = + le16_to_cpu(fan_table2->usTMax); + } + + if (3 <= fan_table->ucFanTableFormat) { + const ATOM_PPLIB_FANTABLE3 *fan_table3 = + (const ATOM_PPLIB_FANTABLE3 *) (((unsigned long)powerplay_table) + + le16_to_cpu(powerplay_table3->usFanTableOffset)); + + hwmgr->thermal_controller.advanceFanControlParameters.ucFanControlMode = + fan_table3->ucFanControlMode; + + if ((3 == fan_table->ucFanTableFormat) && + (0x67B1 == adev->pdev->device)) + hwmgr->thermal_controller.advanceFanControlParameters.usDefaultMaxFanPWM = + 47; + else + hwmgr->thermal_controller.advanceFanControlParameters.usDefaultMaxFanPWM = + le16_to_cpu(fan_table3->usFanPWMMax); + + hwmgr->thermal_controller.advanceFanControlParameters.usDefaultFanOutputSensitivity = + 4836; + hwmgr->thermal_controller.advanceFanControlParameters.usFanOutputSensitivity = + le16_to_cpu(fan_table3->usFanOutputSensitivity); + } + + if (6 <= fan_table->ucFanTableFormat) { + const ATOM_PPLIB_FANTABLE4 *fan_table4 = + (const ATOM_PPLIB_FANTABLE4 *)(((unsigned long)powerplay_table) + + le16_to_cpu(powerplay_table3->usFanTableOffset)); + + phm_cap_set(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_FanSpeedInTableIsRPM); + + hwmgr->thermal_controller.advanceFanControlParameters.usDefaultMaxFanRPM = + le16_to_cpu(fan_table4->usFanRPMMax); + } + + if (7 <= fan_table->ucFanTableFormat) { + const ATOM_PPLIB_FANTABLE5 *fan_table5 = + (const ATOM_PPLIB_FANTABLE5 *)(((unsigned long)powerplay_table) + + le16_to_cpu(powerplay_table3->usFanTableOffset)); + + if (0x67A2 == adev->pdev->device || + 0x67A9 == adev->pdev->device || + 0x67B9 == adev->pdev->device) { + phm_cap_set(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_GeminiRegulatorFanControlSupport); + hwmgr->thermal_controller.advanceFanControlParameters.usFanCurrentLow = + le16_to_cpu(fan_table5->usFanCurrentLow); + hwmgr->thermal_controller.advanceFanControlParameters.usFanCurrentHigh = + le16_to_cpu(fan_table5->usFanCurrentHigh); + hwmgr->thermal_controller.advanceFanControlParameters.usFanRPMLow = + le16_to_cpu(fan_table5->usFanRPMLow); + hwmgr->thermal_controller.advanceFanControlParameters.usFanRPMHigh = + le16_to_cpu(fan_table5->usFanRPMHigh); + } + } + } + } return 0; } From a68a0262abdaa251e12c53715f48e698a18ef402 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Tue, 8 Dec 2020 20:57:18 -0800 Subject: [PATCH 225/296] mm/madvise: remove racy mm ownership check Jann spotted the security hole due to race of mm ownership check. If the task is sharing the mm_struct but goes through execve() before mm_access(), it could skip process_madvise_behavior_valid check. That makes *any advice hint* to reach into the remote process. This patch removes the mm ownership check. With it, it will lose the ability that local process could give *any* advice hint with vector interface for some reason (e.g., performance). Since there is no concrete example in upstream yet, it would be better to remove the abiliity at this moment and need to review when such new advice comes up. Fixes: ecb8ac8b1f14 ("mm/madvise: introduce process_madvise() syscall: an external memory hinting API") Reported-by: Jann Horn Suggested-by: Jann Horn Signed-off-by: Minchan Kim Signed-off-by: Linus Torvalds --- mm/madvise.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mm/madvise.c b/mm/madvise.c index a8d8d48a57fe..13f5677b9322 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -1204,8 +1204,7 @@ SYSCALL_DEFINE5(process_madvise, int, pidfd, const struct iovec __user *, vec, goto put_pid; } - if (task->mm != current->mm && - !process_madvise_behavior_valid(behavior)) { + if (!process_madvise_behavior_valid(behavior)) { ret = -EINVAL; goto release_task; } From e8873c0afd34beb67ec492cd648dd0095b911f65 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 8 Dec 2020 20:24:03 +0200 Subject: [PATCH 226/296] pinctrl: intel: Actually disable Tx and Rx buffers on GPIO request Mistakenly the buffers (input and output) become enabled together for a short period of time during GPIO request. This is problematic, because instead of initial motive to disable them in the commit af7e3eeb84e2 ("pinctrl: intel: Disable input and output buffer when switching to GPIO"), the driven value on the pin, which might be used as an IRQ line, brings firmwares of some touch pads to an awkward state that needs a full power off to recover. Fix this, as stated in the culprit commit, by disabling the buffers. Fixes: af7e3eeb84e2 ("pinctrl: intel: Disable input and output buffer when switching to GPIO") BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=210497 Reported-by: Pierre-Louis Bossart Signed-off-by: Andy Shevchenko Acked-by: Mika Westerberg Tested-by: Pierre-Louis Bossart Tested-by: Kai-Heng Feng Link: https://lore.kernel.org/r/20201208182403.40435-1-andriy.shevchenko@linux.intel.com Signed-off-by: Linus Walleij --- drivers/pinctrl/intel/pinctrl-intel.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/intel/pinctrl-intel.c b/drivers/pinctrl/intel/pinctrl-intel.c index 1c10ab184783..b6ef1911c1dd 100644 --- a/drivers/pinctrl/intel/pinctrl-intel.c +++ b/drivers/pinctrl/intel/pinctrl-intel.c @@ -442,8 +442,8 @@ static void intel_gpio_set_gpio_mode(void __iomem *padcfg0) value |= PADCFG0_PMODE_GPIO; /* Disable input and output buffers */ - value &= ~PADCFG0_GPIORXDIS; - value &= ~PADCFG0_GPIOTXDIS; + value |= PADCFG0_GPIORXDIS; + value |= PADCFG0_GPIOTXDIS; /* Disable SCI/SMI/NMI generation */ value &= ~(PADCFG0_GPIROUTIOXAPIC | PADCFG0_GPIROUTSCI); From 263ade7166a2e589c5b605272690c155c0637dcb Mon Sep 17 00:00:00 2001 From: Chunyan Zhang Date: Wed, 9 Dec 2020 13:51:06 +0800 Subject: [PATCH 227/296] gpio: eic-sprd: break loop when getting NULL device resource EIC controller have unfixed numbers of banks on different Spreadtrum SoCs, and each bank has its own base address, the loop of getting there base address in driver should break if the resource gotten via platform_get_resource() is NULL already. The later ones would be all NULL even if the loop continues. Fixes: 25518e024e3a ("gpio: Add Spreadtrum EIC driver support") Signed-off-by: Chunyan Zhang Link: https://lore.kernel.org/r/20201209055106.840100-1-zhang.lyra@gmail.com Signed-off-by: Linus Walleij --- drivers/gpio/gpio-eic-sprd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-eic-sprd.c b/drivers/gpio/gpio-eic-sprd.c index ad61daf6c212..865ab2b34fdd 100644 --- a/drivers/gpio/gpio-eic-sprd.c +++ b/drivers/gpio/gpio-eic-sprd.c @@ -598,7 +598,7 @@ static int sprd_eic_probe(struct platform_device *pdev) */ res = platform_get_resource(pdev, IORESOURCE_MEM, i); if (!res) - continue; + break; sprd_eic->base[i] = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(sprd_eic->base[i])) From 2d94b20b95b009eec1a267dcf026b01af627c0cd Mon Sep 17 00:00:00 2001 From: Brett Mastbergen Date: Tue, 8 Dec 2020 16:39:24 -0500 Subject: [PATCH 228/296] netfilter: nft_ct: Remove confirmation check for NFT_CT_ID Since commit 656c8e9cc1ba ("netfilter: conntrack: Use consistent ct id hash calculation") the ct id will not change from initialization to confirmation. Removing the confirmation check allows for things like adding an element to a 'typeof ct id' set in prerouting upon reception of the first packet of a new connection, and then being able to reference that set consistently both before and after the connection is confirmed. Fixes: 656c8e9cc1ba ("netfilter: conntrack: Use consistent ct id hash calculation") Signed-off-by: Brett Mastbergen Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_ct.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 322bd674963e..a1b0aac46e9e 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -177,8 +177,6 @@ static void nft_ct_get_eval(const struct nft_expr *expr, } #endif case NFT_CT_ID: - if (!nf_ct_is_confirmed(ct)) - goto err; *dest = nf_ct_get_id(ct); return; default: From 578b6c487899179fed730e710ffec0b069917971 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 3 Dec 2020 16:06:26 -0500 Subject: [PATCH 229/296] drm/amdgpu/disply: set num_crtc earlier To avoid a recently added warning: Bogus possible_crtcs: [ENCODER:65:TMDS-65] possible_crtcs=0xf (full crtc mask=0x7) WARNING: CPU: 3 PID: 439 at drivers/gpu/drm/drm_mode_config.c:617 drm_mode_config_validate+0x178/0x200 [drm] In this case the warning is harmless, but confusing to users. Fixes: 0df108237433 ("drm: Validate encoder->possible_crtcs") Bug: https://bugzilla.kernel.org/show_bug.cgi?id=209123 Reviewed-by: Daniel Vetter Reviewed-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 9b6809f309f4..0f7749e9424d 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1058,9 +1058,6 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) goto error; } - /* Update the actual used number of crtc */ - adev->mode_info.num_crtc = adev->dm.display_indexes_num; - /* create fake encoders for MST */ dm_dp_create_fake_mst_encoders(adev); @@ -3251,6 +3248,10 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) enum dc_connection_type new_connection_type = dc_connection_none; const struct dc_plane_cap *plane; + dm->display_indexes_num = dm->dc->caps.max_streams; + /* Update the actual used number of crtc */ + adev->mode_info.num_crtc = adev->dm.display_indexes_num; + link_cnt = dm->dc->caps.max_links; if (amdgpu_dm_mode_config_init(dm->adev)) { DRM_ERROR("DM: Failed to initialize mode config\n"); @@ -3312,8 +3313,6 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) goto fail; } - dm->display_indexes_num = dm->dc->caps.max_streams; - /* loops over all connectors on the board */ for (i = 0; i < link_cnt; i++) { struct dc_link *link = NULL; From 2343e9d2c5a94459b9de92649f1650e36eb79a10 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 4 Dec 2020 00:06:43 +0100 Subject: [PATCH 230/296] drm/amdgpu: fix debugfs creation/removal, again There is still a warning when CONFIG_DEBUG_FS is disabled: drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c:1145:13: error: 'amdgpu_ras_debugfs_create_ctrl_node' defined but not used [-Werror=unused-function] 1145 | static void amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *adev) Change the code again to make the compiler actually drop this code but not warn about it. Fixes: ae2bf61ff39e ("drm/amdgpu: guard ras debugfs creation/removal based on CONFIG_DEBUG_FS") Reviewed-by: Tao Zhou Signed-off-by: Arnd Bergmann Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 13 +++++-------- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 6 ------ 2 files changed, 5 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 4e36551ab50b..82cd8e55595a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1172,7 +1172,7 @@ static void amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *adev) con->dir, &con->disable_ras_err_cnt_harvest); } -void amdgpu_ras_debugfs_create(struct amdgpu_device *adev, +static void amdgpu_ras_debugfs_create(struct amdgpu_device *adev, struct ras_fs_if *head) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); @@ -1194,7 +1194,6 @@ void amdgpu_ras_debugfs_create(struct amdgpu_device *adev, void amdgpu_ras_debugfs_create_all(struct amdgpu_device *adev) { -#if defined(CONFIG_DEBUG_FS) struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_manager *obj; struct ras_fs_if fs_info; @@ -1203,7 +1202,7 @@ void amdgpu_ras_debugfs_create_all(struct amdgpu_device *adev) * it won't be called in resume path, no need to check * suspend and gpu reset status */ - if (!con) + if (!IS_ENABLED(CONFIG_DEBUG_FS) || !con) return; amdgpu_ras_debugfs_create_ctrl_node(adev); @@ -1217,10 +1216,9 @@ void amdgpu_ras_debugfs_create_all(struct amdgpu_device *adev) amdgpu_ras_debugfs_create(adev, &fs_info); } } -#endif } -void amdgpu_ras_debugfs_remove(struct amdgpu_device *adev, +static void amdgpu_ras_debugfs_remove(struct amdgpu_device *adev, struct ras_common_if *head) { struct ras_manager *obj = amdgpu_ras_find_obj(adev, head); @@ -1234,7 +1232,6 @@ void amdgpu_ras_debugfs_remove(struct amdgpu_device *adev, static void amdgpu_ras_debugfs_remove_all(struct amdgpu_device *adev) { -#if defined(CONFIG_DEBUG_FS) struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_manager *obj, *tmp; @@ -1243,7 +1240,6 @@ static void amdgpu_ras_debugfs_remove_all(struct amdgpu_device *adev) } con->dir = NULL; -#endif } /* debugfs end */ @@ -1291,7 +1287,8 @@ static int amdgpu_ras_fs_init(struct amdgpu_device *adev) static int amdgpu_ras_fs_fini(struct amdgpu_device *adev) { - amdgpu_ras_debugfs_remove_all(adev); + if (IS_ENABLED(CONFIG_DEBUG_FS)) + amdgpu_ras_debugfs_remove_all(adev); amdgpu_ras_sysfs_remove_all(adev); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 6b8d7bb83bb3..ec398ed7deb8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -607,14 +607,8 @@ int amdgpu_ras_sysfs_create(struct amdgpu_device *adev, int amdgpu_ras_sysfs_remove(struct amdgpu_device *adev, struct ras_common_if *head); -void amdgpu_ras_debugfs_create(struct amdgpu_device *adev, - struct ras_fs_if *head); - void amdgpu_ras_debugfs_create_all(struct amdgpu_device *adev); -void amdgpu_ras_debugfs_remove(struct amdgpu_device *adev, - struct ras_common_if *head); - int amdgpu_ras_error_query(struct amdgpu_device *adev, struct ras_query_if *info); From c2ffe78b8b1354603a7d5afb719b2a6dfbb582da Mon Sep 17 00:00:00 2001 From: Chris Park Date: Tue, 24 Nov 2020 20:11:25 -0500 Subject: [PATCH 231/296] drm/amd/display: Prevent bandwidth overflow [Why] At very high pixel clock, bandwidth calculation exceeds 32 bit size and overflow value. This causes the resulting selection of link rate to be inaccurate. [How] Change order of operation and use fixed point to deal with integer accuracy. Also address bug found when forcing link rate. Signed-off-by: Chris Park Reviewed-by: Wenjing Liu Acked-by: Eryk Brol Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index fec87a2e210c..5b0cedfa824a 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -3394,10 +3394,13 @@ uint32_t dc_bandwidth_in_kbps_from_timing( { uint32_t bits_per_channel = 0; uint32_t kbps; + struct fixed31_32 link_bw_kbps; if (timing->flags.DSC) { - kbps = (timing->pix_clk_100hz * timing->dsc_cfg.bits_per_pixel); - kbps = kbps / 160 + ((kbps % 160) ? 1 : 0); + link_bw_kbps = dc_fixpt_from_int(timing->pix_clk_100hz); + link_bw_kbps = dc_fixpt_div_int(link_bw_kbps, 160); + link_bw_kbps = dc_fixpt_mul_int(link_bw_kbps, timing->dsc_cfg.bits_per_pixel); + kbps = dc_fixpt_ceil(link_bw_kbps); return kbps; } From 369b7ebe1792b620b7a9404e7b71daaae13ebfd6 Mon Sep 17 00:00:00 2001 From: Sung Lee Date: Mon, 23 Nov 2020 15:38:54 -0500 Subject: [PATCH 232/296] drm/amd/display: Add wm table for Renoir [Why] Without additional HostVM Latency, Renoir takes 2us longer to exit self-refresh. This causes underflow in certain cases. [How] Add table for Renoir with updated sr exit latencies for WM set A. Signed-off-by: Sung Lee Reviewed-by: Yongqiang Sun Reviewed-by: Roman Li Acked-by: Eryk Brol Signed-off-by: Alex Deucher --- .../amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c | 93 ++++++++++++++++++- 1 file changed, 89 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c index c001307b0a59..6b431db146cd 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c @@ -579,7 +579,7 @@ static struct clk_bw_params rn_bw_params = { }; -static struct wm_table ddr4_wm_table = { +static struct wm_table ddr4_wm_table_gs = { .entries = { { .wm_inst = WM_A, @@ -616,7 +616,7 @@ static struct wm_table ddr4_wm_table = { } }; -static struct wm_table lpddr4_wm_table = { +static struct wm_table lpddr4_wm_table_gs = { .entries = { { .wm_inst = WM_A, @@ -690,6 +690,80 @@ static struct wm_table lpddr4_wm_table_with_disabled_ppt = { } }; +static struct wm_table ddr4_wm_table_rn = { + .entries = { + { + .wm_inst = WM_A, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 9.09, + .sr_enter_plus_exit_time_us = 10.14, + .valid = true, + }, + { + .wm_inst = WM_B, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 10.12, + .sr_enter_plus_exit_time_us = 11.48, + .valid = true, + }, + { + .wm_inst = WM_C, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 10.12, + .sr_enter_plus_exit_time_us = 11.48, + .valid = true, + }, + { + .wm_inst = WM_D, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 10.12, + .sr_enter_plus_exit_time_us = 11.48, + .valid = true, + }, + } +}; + +static struct wm_table lpddr4_wm_table_rn = { + .entries = { + { + .wm_inst = WM_A, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.65333, + .sr_exit_time_us = 7.32, + .sr_enter_plus_exit_time_us = 8.38, + .valid = true, + }, + { + .wm_inst = WM_B, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.65333, + .sr_exit_time_us = 9.82, + .sr_enter_plus_exit_time_us = 11.196, + .valid = true, + }, + { + .wm_inst = WM_C, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.65333, + .sr_exit_time_us = 9.89, + .sr_enter_plus_exit_time_us = 11.24, + .valid = true, + }, + { + .wm_inst = WM_D, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.65333, + .sr_exit_time_us = 9.748, + .sr_enter_plus_exit_time_us = 11.102, + .valid = true, + }, + } +}; + static unsigned int find_dcfclk_for_voltage(struct dpm_clocks *clock_table, unsigned int voltage) { int i; @@ -771,6 +845,11 @@ void rn_clk_mgr_construct( struct dc_debug_options *debug = &ctx->dc->debug; struct dpm_clocks clock_table = { 0 }; enum pp_smu_status status = 0; + int is_green_sardine = 0; + +#if defined(CONFIG_DRM_AMD_DC_DCN) + is_green_sardine = ASICREV_IS_GREEN_SARDINE(ctx->asic_id.hw_internal_rev); +#endif clk_mgr->base.ctx = ctx; clk_mgr->base.funcs = &dcn21_funcs; @@ -811,10 +890,16 @@ void rn_clk_mgr_construct( if (clk_mgr->periodic_retraining_disabled) { rn_bw_params.wm_table = lpddr4_wm_table_with_disabled_ppt; } else { - rn_bw_params.wm_table = lpddr4_wm_table; + if (is_green_sardine) + rn_bw_params.wm_table = lpddr4_wm_table_gs; + else + rn_bw_params.wm_table = lpddr4_wm_table_rn; } } else { - rn_bw_params.wm_table = ddr4_wm_table; + if (is_green_sardine) + rn_bw_params.wm_table = ddr4_wm_table_gs; + else + rn_bw_params.wm_table = ddr4_wm_table_rn; } /* Saved clocks configured at boot for debug purposes */ rn_dump_clk_registers(&clk_mgr->base.boot_snapshot, &clk_mgr->base, &log_info); From 6896887b8676d8fb445c85ea56333b9661a6a8aa Mon Sep 17 00:00:00 2001 From: "Stanley.Yang" Date: Mon, 7 Dec 2020 14:38:33 +0800 Subject: [PATCH 233/296] drm/amdgpu: fix sdma instance fw version and feature version init each sdma instance fw_version and feature_version should be set right value when asic type isn't between SIENNA_CICHILD and CHIP_DIMGREY_CAVEFISH Signed-off-by: Stanley.Yang Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index 9f3952723c63..2a485052e3ab 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -186,7 +186,7 @@ static int sdma_v5_2_init_microcode(struct amdgpu_device *adev) if (err) goto out; - err = sdma_v5_2_init_inst_ctx(&adev->sdma.instance[0]); + err = sdma_v5_2_init_inst_ctx(&adev->sdma.instance[i]); if (err) goto out; } From ab6e4e9de8dd7febfdf6719741f10dc6693d8ce9 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Tue, 8 Dec 2020 12:23:15 -0500 Subject: [PATCH 234/296] drm/amdkfd: Fix leak in dmabuf import Release dmabuf reference before returning from kfd_ioctl_import_dmabuf. amdgpu_amdkfd_gpuvm_import_dmabuf takes a reference to the underlying GEM BO and doesn't keep the reference to the dmabuf wrapper. Signed-off-by: Felix Kuehling Reviewed-by: Kent Russell Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 222f1df1a6b6..8cc51cec988a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1736,6 +1736,7 @@ static int kfd_ioctl_import_dmabuf(struct file *filep, } mutex_unlock(&p->mutex); + dma_buf_put(dmabuf); args->handle = MAKE_HANDLE(args->gpu_id, idr_handle); @@ -1745,6 +1746,7 @@ err_free: amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem, NULL); err_unlock: mutex_unlock(&p->mutex); + dma_buf_put(dmabuf); return r; } From c9918d1f63a3e77ec20997a77c997a6fa7282f2f Mon Sep 17 00:00:00 2001 From: Changfeng Date: Mon, 7 Dec 2020 15:42:29 +0800 Subject: [PATCH 235/296] drm/amd/pm: update smu10.h WORKLOAD_PPLIB setting for raven When using old WORKLOAD_PPLIB setting in smu10.h, there is problem that it can't be able to switch to mak gpu clk during compute workload. It needs to update WORKLOAD_PPLIB setting to fix this issue. Signed-off-by: Changfeng Reviewed-by: Huang Rui Acked-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/inc/smu10.h | 14 ++++++-------- .../gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c | 9 +++------ 2 files changed, 9 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/inc/smu10.h b/drivers/gpu/drm/amd/pm/inc/smu10.h index b96520528240..9e837a5014c5 100644 --- a/drivers/gpu/drm/amd/pm/inc/smu10.h +++ b/drivers/gpu/drm/amd/pm/inc/smu10.h @@ -136,14 +136,12 @@ #define FEATURE_CORE_CSTATES_MASK (1 << FEATURE_CORE_CSTATES_BIT) /* Workload bits */ -#define WORKLOAD_DEFAULT_BIT 0 -#define WORKLOAD_PPLIB_FULL_SCREEN_3D_BIT 1 -#define WORKLOAD_PPLIB_POWER_SAVING_BIT 2 -#define WORKLOAD_PPLIB_VIDEO_BIT 3 -#define WORKLOAD_PPLIB_VR_BIT 4 -#define WORKLOAD_PPLIB_COMPUTE_BIT 5 -#define WORKLOAD_PPLIB_CUSTOM_BIT 6 -#define WORKLOAD_PPLIB_COUNT 7 +#define WORKLOAD_PPLIB_FULL_SCREEN_3D_BIT 0 +#define WORKLOAD_PPLIB_VIDEO_BIT 2 +#define WORKLOAD_PPLIB_VR_BIT 3 +#define WORKLOAD_PPLIB_COMPUTE_BIT 4 +#define WORKLOAD_PPLIB_CUSTOM_BIT 5 +#define WORKLOAD_PPLIB_COUNT 6 typedef struct { /* MP1_EXT_SCRATCH0 */ diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c index cf60f3992303..e6f40ee9f313 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c @@ -1297,15 +1297,9 @@ static int conv_power_profile_to_pplib_workload(int power_profile) int pplib_workload = 0; switch (power_profile) { - case PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT: - pplib_workload = WORKLOAD_DEFAULT_BIT; - break; case PP_SMC_POWER_PROFILE_FULLSCREEN3D: pplib_workload = WORKLOAD_PPLIB_FULL_SCREEN_3D_BIT; break; - case PP_SMC_POWER_PROFILE_POWERSAVING: - pplib_workload = WORKLOAD_PPLIB_POWER_SAVING_BIT; - break; case PP_SMC_POWER_PROFILE_VIDEO: pplib_workload = WORKLOAD_PPLIB_VIDEO_BIT; break; @@ -1315,6 +1309,9 @@ static int conv_power_profile_to_pplib_workload(int power_profile) case PP_SMC_POWER_PROFILE_COMPUTE: pplib_workload = WORKLOAD_PPLIB_COMPUTE_BIT; break; + case PP_SMC_POWER_PROFILE_CUSTOM: + pplib_workload = WORKLOAD_PPLIB_CUSTOM_BIT; + break; } return pplib_workload; From 157fe68d74c2ad2db438c91af9ed3d3a51de4ed7 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 7 Dec 2020 13:12:29 -0500 Subject: [PATCH 236/296] drm/amdgpu: fix size calculation with stolen vga memory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If we need to keep the stolen vga memory, make sure it is at least as big as the legacy vga size. Acked-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 36604d751d62..3e4892b7b7d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -499,6 +499,9 @@ void amdgpu_gmc_get_vbios_allocations(struct amdgpu_device *adev) else size = amdgpu_gmc_get_vbios_fb_size(adev); + if (adev->mman.keep_stolen_vga_memory) + size = max(size, (unsigned)AMDGPU_VBIOS_VGA_ALLOCATION); + /* set to 0 if the pre-OS buffer uses up most of vram */ if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024)) size = 0; From ab43234d0bafcf2accd9db4fc4d193180b752f94 Mon Sep 17 00:00:00 2001 From: Andrey Grodzovsky Date: Tue, 8 Dec 2020 15:16:15 -0500 Subject: [PATCH 237/296] drm/amdgpu: Initialise drm_gem_object_funcs for imported BOs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For BOs imported from outside of amdgpu, setting of amdgpu_gem_object_funcs was missing in amdgpu_dma_buf_create_obj. Fix by refactoring BO creation and amdgpu_gem_object_funcs setting into single function called from both code paths. Fixes: d693def4fd1c ("drm: Remove obsolete GEM and PRIME callbacks from struct drm_driver") v2: Use use amdgpu_gem_object_create() directly v3: fix warning Reviewed-by: Christian König Signed-off-by: Andrey Grodzovsky Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 8 +++- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 41 ++++++++++++--------- 2 files changed, 29 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index 957934926b24..1b56dbc1f304 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -459,6 +459,7 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf) struct amdgpu_device *adev = drm_to_adev(dev); struct amdgpu_bo *bo; struct amdgpu_bo_param bp; + struct drm_gem_object *gobj; int ret; memset(&bp, 0, sizeof(bp)); @@ -469,17 +470,20 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf) bp.type = ttm_bo_type_sg; bp.resv = resv; dma_resv_lock(resv, NULL); - ret = amdgpu_bo_create(adev, &bp, &bo); + ret = amdgpu_gem_object_create(adev, dma_buf->size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_CPU, + 0, ttm_bo_type_sg, resv, &gobj); if (ret) goto error; + bo = gem_to_amdgpu_bo(gobj); bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT; bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT; if (dma_buf->ops != &amdgpu_dmabuf_ops) bo->prime_shared_count = 1; dma_resv_unlock(resv); - return &bo->tbo.base; + return gobj; error: dma_resv_unlock(resv); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 7e8265da9f25..e8c76bd8c501 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -66,26 +66,12 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size, bp.type = type; bp.resv = resv; bp.preferred_domain = initial_domain; -retry: bp.flags = flags; bp.domain = initial_domain; r = amdgpu_bo_create(adev, &bp, &bo); - if (r) { - if (r != -ERESTARTSYS) { - if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) { - flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; - goto retry; - } - - if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) { - initial_domain |= AMDGPU_GEM_DOMAIN_GTT; - goto retry; - } - DRM_DEBUG("Failed to allocate GEM object (%ld, %d, %u, %d)\n", - size, initial_domain, alignment, r); - } + if (r) return r; - } + *obj = &bo->tbo.base; return 0; @@ -225,7 +211,7 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, uint64_t size = args->in.bo_size; struct dma_resv *resv = NULL; struct drm_gem_object *gobj; - uint32_t handle; + uint32_t handle, initial_domain; int r; /* reject invalid gem flags */ @@ -269,9 +255,28 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, resv = vm->root.base.bo->tbo.base.resv; } +retry: + initial_domain = (u32)(0xffffffff & args->in.domains); r = amdgpu_gem_object_create(adev, size, args->in.alignment, - (u32)(0xffffffff & args->in.domains), + initial_domain, flags, ttm_bo_type_device, resv, &gobj); + if (r) { + if (r != -ERESTARTSYS) { + if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) { + flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + goto retry; + } + + if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) { + initial_domain |= AMDGPU_GEM_DOMAIN_GTT; + goto retry; + } + DRM_DEBUG("Failed to allocate GEM object (%llu, %d, %llu, %d)\n", + size, initial_domain, args->in.alignment, r); + } + return r; + } + if (flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) { if (!r) { struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj); From 998f17296234aa8d3676b4a13962eb39f4ad24e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Wed, 9 Dec 2020 14:57:37 +0100 Subject: [PATCH 238/296] xdp: Remove the xdp_attachment_flags_ok() callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit 7f0a838254bd ("bpf, xdp: Maintain info on attached XDP BPF programs in net_device"), the XDP program attachment info is now maintained in the core code. This interacts badly with the xdp_attachment_flags_ok() check that prevents unloading an XDP program with different load flags than it was loaded with. In practice, two kinds of failures are seen: - An XDP program loaded without specifying a mode (and which then ends up in driver mode) cannot be unloaded if the program mode is specified on unload. - The dev_xdp_uninstall() hook always calls the driver callback with the mode set to the type of the program but an empty flags argument, which means the flags_ok() check prevents the program from being removed, leading to bpf prog reference leaks. The original reason this check was added was to avoid ambiguity when multiple programs were loaded. With the way the checks are done in the core now, this is quite simple to enforce in the core code, so let's add a check there and get rid of the xdp_attachment_flags_ok() callback entirely. Fixes: 7f0a838254bd ("bpf, xdp: Maintain info on attached XDP BPF programs in net_device") Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Daniel Borkmann Acked-by: Jakub Kicinski Link: https://lore.kernel.org/bpf/160752225751.110217.10267659521308669050.stgit@toke.dk --- .../ethernet/netronome/nfp/nfp_net_common.c | 6 ----- drivers/net/ethernet/ti/cpsw_priv.c | 3 --- drivers/net/netdevsim/bpf.c | 3 --- include/net/xdp.h | 2 -- net/core/dev.c | 22 +++++++++++++++++-- net/core/xdp.c | 12 ---------- 6 files changed, 20 insertions(+), 28 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index b150da43adb2..437226866ce8 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -3562,9 +3562,6 @@ static int nfp_net_xdp_setup_drv(struct nfp_net *nn, struct netdev_bpf *bpf) struct nfp_net_dp *dp; int err; - if (!xdp_attachment_flags_ok(&nn->xdp, bpf)) - return -EBUSY; - if (!prog == !nn->dp.xdp_prog) { WRITE_ONCE(nn->dp.xdp_prog, prog); xdp_attachment_setup(&nn->xdp, bpf); @@ -3593,9 +3590,6 @@ static int nfp_net_xdp_setup_hw(struct nfp_net *nn, struct netdev_bpf *bpf) { int err; - if (!xdp_attachment_flags_ok(&nn->xdp_hw, bpf)) - return -EBUSY; - err = nfp_app_xdp_offload(nn->app, nn, bpf->prog, bpf->extack); if (err) return err; diff --git a/drivers/net/ethernet/ti/cpsw_priv.c b/drivers/net/ethernet/ti/cpsw_priv.c index 31c5e36ff706..424e644724e4 100644 --- a/drivers/net/ethernet/ti/cpsw_priv.c +++ b/drivers/net/ethernet/ti/cpsw_priv.c @@ -1265,9 +1265,6 @@ static int cpsw_xdp_prog_setup(struct cpsw_priv *priv, struct netdev_bpf *bpf) if (!priv->xdpi.prog && !prog) return 0; - if (!xdp_attachment_flags_ok(&priv->xdpi, bpf)) - return -EBUSY; - WRITE_ONCE(priv->xdp_prog, prog); xdp_attachment_setup(&priv->xdpi, bpf); diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c index 2e90512f3bbe..85546664bdd5 100644 --- a/drivers/net/netdevsim/bpf.c +++ b/drivers/net/netdevsim/bpf.c @@ -190,9 +190,6 @@ nsim_xdp_set_prog(struct netdevsim *ns, struct netdev_bpf *bpf, { int err; - if (!xdp_attachment_flags_ok(xdp, bpf)) - return -EBUSY; - if (bpf->command == XDP_SETUP_PROG && !ns->bpf_xdpdrv_accept) { NSIM_EA(bpf->extack, "driver XDP disabled in DebugFS"); return -EOPNOTSUPP; diff --git a/include/net/xdp.h b/include/net/xdp.h index 3814fb631d52..9dab2bc6f187 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -240,8 +240,6 @@ struct xdp_attachment_info { }; struct netdev_bpf; -bool xdp_attachment_flags_ok(struct xdp_attachment_info *info, - struct netdev_bpf *bpf); void xdp_attachment_setup(struct xdp_attachment_info *info, struct netdev_bpf *bpf); diff --git a/net/core/dev.c b/net/core/dev.c index 8588ade790cb..38412e70f761 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -8917,6 +8917,17 @@ static struct bpf_prog *dev_xdp_prog(struct net_device *dev, return dev->xdp_state[mode].prog; } +static u8 dev_xdp_prog_count(struct net_device *dev) +{ + u8 count = 0; + int i; + + for (i = 0; i < __MAX_XDP_MODE; i++) + if (dev->xdp_state[i].prog || dev->xdp_state[i].link) + count++; + return count; +} + u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode) { struct bpf_prog *prog = dev_xdp_prog(dev, mode); @@ -9007,6 +9018,7 @@ static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack struct bpf_xdp_link *link, struct bpf_prog *new_prog, struct bpf_prog *old_prog, u32 flags) { + unsigned int num_modes = hweight32(flags & XDP_FLAGS_MODES); struct bpf_prog *cur_prog; enum bpf_xdp_mode mode; bpf_op_t bpf_op; @@ -9022,11 +9034,17 @@ static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack NL_SET_ERR_MSG(extack, "Invalid XDP flags for BPF link attachment"); return -EINVAL; } - /* just one XDP mode bit should be set, zero defaults to SKB mode */ - if (hweight32(flags & XDP_FLAGS_MODES) > 1) { + /* just one XDP mode bit should be set, zero defaults to drv/skb mode */ + if (num_modes > 1) { NL_SET_ERR_MSG(extack, "Only one XDP mode flag can be set"); return -EINVAL; } + /* avoid ambiguity if offload + drv/skb mode progs are both loaded */ + if (!num_modes && dev_xdp_prog_count(dev) > 1) { + NL_SET_ERR_MSG(extack, + "More than one program loaded, unset mode is ambiguous"); + return -EINVAL; + } /* old_prog != NULL implies XDP_FLAGS_REPLACE is set */ if (old_prog && !(flags & XDP_FLAGS_REPLACE)) { NL_SET_ERR_MSG(extack, "XDP_FLAGS_REPLACE is not specified"); diff --git a/net/core/xdp.c b/net/core/xdp.c index 491ad569a79c..d900cebc0acd 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -403,18 +403,6 @@ void __xdp_release_frame(void *data, struct xdp_mem_info *mem) } EXPORT_SYMBOL_GPL(__xdp_release_frame); -bool xdp_attachment_flags_ok(struct xdp_attachment_info *info, - struct netdev_bpf *bpf) -{ - if (info->prog && (bpf->flags ^ info->flags) & XDP_FLAGS_MODES) { - NL_SET_ERR_MSG(bpf->extack, - "program loaded with different flags"); - return false; - } - return true; -} -EXPORT_SYMBOL_GPL(xdp_attachment_flags_ok); - void xdp_attachment_setup(struct xdp_attachment_info *info, struct netdev_bpf *bpf) { From 0b5b6e747c86e57b7ebd64ccb84314a227ccfcc2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Wed, 9 Dec 2020 14:57:38 +0100 Subject: [PATCH 239/296] selftests/bpf/test_offload.py: Remove check for program load flags match MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since we just removed the xdp_attachment_flags_ok() callback, also remove the check for it in test_offload.py, and replace it with a test for the new ambiguity-avoid check when multiple programs are loaded. Fixes: 7f0a838254bd ("bpf, xdp: Maintain info on attached XDP BPF programs in net_device") Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Daniel Borkmann Acked-by: Jakub Kicinski Link: https://lore.kernel.org/bpf/160752225858.110217.13036901876869496246.stgit@toke.dk --- tools/testing/selftests/bpf/test_offload.py | 22 +++++---------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py index 43c9cda199b8..becd27b2f4ba 100755 --- a/tools/testing/selftests/bpf/test_offload.py +++ b/tools/testing/selftests/bpf/test_offload.py @@ -716,13 +716,11 @@ def test_multi_prog(simdev, sim, obj, modename, modeid): fail(ret == 0, "Replaced one of programs without -force") check_extack(err, "XDP program already attached.", args) - if modename == "" or modename == "drv": - othermode = "" if modename == "drv" else "drv" - start_test("Test multi-attachment XDP - detach...") - ret, _, err = sim.unset_xdp(othermode, force=True, - fail=False, include_stderr=True) - fail(ret == 0, "Removed program with a bad mode") - check_extack(err, "program loaded with different flags.", args) + start_test("Test multi-attachment XDP - remove without mode...") + ret, _, err = sim.unset_xdp("", force=True, + fail=False, include_stderr=True) + fail(ret == 0, "Removed program without a mode flag") + check_extack(err, "More than one program loaded, unset mode is ambiguous.", args) sim.unset_xdp("offload") xdp = sim.ip_link_show(xdp=True)["xdp"] @@ -1001,16 +999,6 @@ try: check_extack(err, "native and generic XDP can't be active at the same time.", args) - ret, _, err = sim.set_xdp(obj, "", force=True, - fail=False, include_stderr=True) - fail(ret == 0, "Replaced XDP program with a program in different mode") - check_extack(err, "program loaded with different flags.", args) - - start_test("Test XDP prog remove with bad flags...") - ret, _, err = sim.unset_xdp("", force=True, - fail=False, include_stderr=True) - fail(ret == 0, "Removed program with a bad mode") - check_extack(err, "program loaded with different flags.", args) start_test("Test MTU restrictions...") ret, _ = sim.set_mtu(9000, fail=False) From e4ff5aa469403462091eb22e2b0843b894167e10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Wed, 9 Dec 2020 14:57:39 +0100 Subject: [PATCH 240/296] netdevsim: Add debugfs toggle to reject BPF programs in verifier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds a new debugfs toggle ('bpf_bind_verifier_accept') that can be used to make netdevsim reject BPF programs from being accepted by the verifier. If this toggle (which defaults to true) is set to false, nsim_bpf_verify_insn() will return EOPNOTSUPP on the last instruction (after outputting the 'Hello from netdevsim' verifier message). This makes it possible to check the verification callback in the driver from test_offload.py in selftests, since the verifier now clears the verifier log on a successful load, hiding the message from the driver. Fixes: 6f8a57ccf851 ("bpf: Make verifier log more relevant by default") Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Daniel Borkmann Acked-by: Jakub Kicinski Link: https://lore.kernel.org/bpf/160752225964.110217.12584017165318065332.stgit@toke.dk --- drivers/net/netdevsim/bpf.c | 12 ++++++++++-- drivers/net/netdevsim/netdevsim.h | 1 + 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c index 85546664bdd5..90aafb56f140 100644 --- a/drivers/net/netdevsim/bpf.c +++ b/drivers/net/netdevsim/bpf.c @@ -63,15 +63,20 @@ static int nsim_bpf_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn) { struct nsim_bpf_bound_prog *state; + int ret = 0; state = env->prog->aux->offload->dev_priv; if (state->nsim_dev->bpf_bind_verifier_delay && !insn_idx) msleep(state->nsim_dev->bpf_bind_verifier_delay); - if (insn_idx == env->prog->len - 1) + if (insn_idx == env->prog->len - 1) { pr_vlog(env, "Hello from netdevsim!\n"); - return 0; + if (!state->nsim_dev->bpf_bind_verifier_accept) + ret = -EOPNOTSUPP; + } + + return ret; } static int nsim_bpf_finalize(struct bpf_verifier_env *env) @@ -595,6 +600,9 @@ int nsim_bpf_dev_init(struct nsim_dev *nsim_dev) &nsim_dev->bpf_bind_accept); debugfs_create_u32("bpf_bind_verifier_delay", 0600, nsim_dev->ddir, &nsim_dev->bpf_bind_verifier_delay); + nsim_dev->bpf_bind_verifier_accept = true; + debugfs_create_bool("bpf_bind_verifier_accept", 0600, nsim_dev->ddir, + &nsim_dev->bpf_bind_verifier_accept); return 0; } diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index 827fc80f50a0..c4e7ad2a1964 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -189,6 +189,7 @@ struct nsim_dev { struct dentry *take_snapshot; struct bpf_offload_dev *bpf_dev; bool bpf_bind_accept; + bool bpf_bind_verifier_accept; u32 bpf_bind_verifier_delay; struct dentry *ddir_bpf_bound_progs; u32 prog_id_gen; From d8b5e76ae4e02908d000397597c6bc2868362fbb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Wed, 9 Dec 2020 14:57:40 +0100 Subject: [PATCH 241/296] selftests/bpf/test_offload.py: Only check verifier log on verification fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit 6f8a57ccf851 ("bpf: Make verifier log more relevant by default"), the verifier discards log messages for successfully-verified programs. This broke test_offload.py which is looking for a verification message from the driver callback. Change test_offload.py to use the toggle in netdevsim to make the verification fail before looking for the verification message. Fixes: 6f8a57ccf851 ("bpf: Make verifier log more relevant by default") Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Daniel Borkmann Acked-by: Jakub Kicinski Link: https://lore.kernel.org/bpf/160752226069.110217.12370824996153348073.stgit@toke.dk --- tools/testing/selftests/bpf/test_offload.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py index becd27b2f4ba..61527b43f067 100755 --- a/tools/testing/selftests/bpf/test_offload.py +++ b/tools/testing/selftests/bpf/test_offload.py @@ -911,11 +911,18 @@ try: sim.tc_flush_filters() + start_test("Test TC offloads failure...") + sim.dfs["dev/bpf_bind_verifier_accept"] = 0 + ret, _, err = sim.cls_bpf_add_filter(obj, verbose=True, skip_sw=True, + fail=False, include_stderr=True) + fail(ret == 0, "TC filter did not reject with TC offloads enabled") + check_verifier_log(err, "[netdevsim] Hello from netdevsim!") + sim.dfs["dev/bpf_bind_verifier_accept"] = 1 + start_test("Test TC offloads work...") ret, _, err = sim.cls_bpf_add_filter(obj, verbose=True, skip_sw=True, fail=False, include_stderr=True) fail(ret != 0, "TC filter did not load with TC offloads enabled") - check_verifier_log(err, "[netdevsim] Hello from netdevsim!") start_test("Test TC offload basics...") dfs = simdev.dfs_get_bound_progs(expected=1) @@ -1032,6 +1039,15 @@ try: rm("/sys/fs/bpf/offload") sim.wait_for_flush() + start_test("Test XDP load failure...") + sim.dfs["dev/bpf_bind_verifier_accept"] = 0 + ret, _, err = bpftool_prog_load("sample_ret0.o", "/sys/fs/bpf/offload", + dev=sim['ifname'], fail=False, include_stderr=True) + fail(ret == 0, "verifier should fail on load") + check_verifier_log(err, "[netdevsim] Hello from netdevsim!") + sim.dfs["dev/bpf_bind_verifier_accept"] = 1 + sim.wait_for_flush() + start_test("Test XDP offload...") _, _, err = sim.set_xdp(obj, "offload", verbose=True, include_stderr=True) ipl = sim.ip_link_show(xdp=True) @@ -1039,7 +1055,6 @@ try: progs = bpftool_prog_list(expected=1) prog = progs[0] fail(link_xdp["id"] != prog["id"], "Loaded program has wrong ID") - check_verifier_log(err, "[netdevsim] Hello from netdevsim!") start_test("Test XDP offload is device bound...") dfs = simdev.dfs_get_bound_progs(expected=1) From 852c2ee338f0ac6026458615b624e1c496142cf6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Wed, 9 Dec 2020 14:57:41 +0100 Subject: [PATCH 242/296] selftests/bpf/test_offload.py: Fix expected case of extack messages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 7f0a838254bd ("bpf, xdp: Maintain info on attached XDP BPF programs in net_device") changed the case of some of the extack messages being returned when attaching of XDP programs failed. This broke test_offload.py, so let's fix the test to reflect this. Fixes: 7f0a838254bd ("bpf, xdp: Maintain info on attached XDP BPF programs in net_device") Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Daniel Borkmann Acked-by: Jakub Kicinski Link: https://lore.kernel.org/bpf/160752226175.110217.11214100824416344952.stgit@toke.dk --- tools/testing/selftests/bpf/test_offload.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py index 61527b43f067..51a5e4d939cc 100755 --- a/tools/testing/selftests/bpf/test_offload.py +++ b/tools/testing/selftests/bpf/test_offload.py @@ -1004,7 +1004,7 @@ try: fail=False, include_stderr=True) fail(ret == 0, "Replaced XDP program with a program in different mode") check_extack(err, - "native and generic XDP can't be active at the same time.", + "Native and generic XDP can't be active at the same time.", args) start_test("Test MTU restrictions...") @@ -1035,7 +1035,7 @@ try: offload = bpf_pinned("/sys/fs/bpf/offload") ret, _, err = sim.set_xdp(offload, "drv", fail=False, include_stderr=True) fail(ret == 0, "attached offloaded XDP program to drv") - check_extack(err, "using device-bound program without HW_MODE flag is not supported.", args) + check_extack(err, "Using device-bound program without HW_MODE flag is not supported.", args) rm("/sys/fs/bpf/offload") sim.wait_for_flush() From 766e62b7fcd2cf1d43e6594ba37c659dc48f7ddb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Wed, 9 Dec 2020 14:57:42 +0100 Subject: [PATCH 243/296] selftests/bpf/test_offload.py: Reset ethtool features after failed setting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When setting the ethtool feature flag fails (as expected for the test), the kernel now tracks that the feature was requested to be 'off' and refuses to subsequently disable it again. So reset it back to 'on' so a subsequent disable (that's not supposed to fail) can succeed. Fixes: 417ec26477a5 ("selftests/bpf: add offload test based on netdevsim") Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Daniel Borkmann Acked-by: Jakub Kicinski Link: https://lore.kernel.org/bpf/160752226280.110217.10696241563705667871.stgit@toke.dk --- tools/testing/selftests/bpf/test_offload.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py index 51a5e4d939cc..2128fbd8414b 100755 --- a/tools/testing/selftests/bpf/test_offload.py +++ b/tools/testing/selftests/bpf/test_offload.py @@ -946,6 +946,7 @@ try: start_test("Test disabling TC offloads is rejected while filters installed...") ret, _ = sim.set_ethtool_tc_offloads(False, fail=False) fail(ret == 0, "Driver should refuse to disable TC offloads with filters installed...") + sim.set_ethtool_tc_offloads(True) start_test("Test qdisc removal frees things...") sim.tc_flush_filters() From 8158cad13435639cd4962fb88970960f880ef6d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Wed, 9 Dec 2020 14:57:43 +0100 Subject: [PATCH 244/296] selftests/bpf/test_offload.py: Filter bpftool internal map when counting maps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A few of the tests in test_offload.py expects to see a certain number of maps created, and checks this by counting the number of maps returned by bpftool. There is already a filter that will remove any maps already there at the beginning of the test, but bpftool now creates a map for the PID iterator rodata on each invocation, which makes the map count wrong. Fix this by also filtering the pid_iter.rodata map by name when counting. Fixes: d53dee3fe013 ("tools/bpftool: Show info for processes holding BPF map/prog/link/btf FDs") Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Daniel Borkmann Acked-by: Jakub Kicinski Link: https://lore.kernel.org/bpf/160752226387.110217.9887866138149423444.stgit@toke.dk --- tools/testing/selftests/bpf/test_offload.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py index 2128fbd8414b..b99bb8ed3ed4 100755 --- a/tools/testing/selftests/bpf/test_offload.py +++ b/tools/testing/selftests/bpf/test_offload.py @@ -184,9 +184,7 @@ def bpftool_prog_list(expected=None, ns=""): def bpftool_map_list(expected=None, ns=""): _, maps = bpftool("map show", JSON=True, ns=ns, fail=True) # Remove the base maps - for m in base_maps: - if m in maps: - maps.remove(m) + maps = [m for m in maps if m not in base_maps and m.get('name') not in base_map_names] if expected is not None: if len(maps) != expected: fail(True, "%d BPF maps loaded, expected %d" % @@ -770,6 +768,9 @@ ret, progs = bpftool("prog", fail=False) skip(ret != 0, "bpftool not installed") base_progs = progs _, base_maps = bpftool("map") +base_map_names = [ + 'pid_iter.rodata' # created on each bpftool invocation +] # Check netdevsim ret, out = cmd("modprobe netdevsim", fail=False) From 323a391a220c4a234cb1e678689d7f4c3b73f863 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Fri, 4 Dec 2020 14:35:07 +0100 Subject: [PATCH 245/296] can: isotp: isotp_setsockopt(): block setsockopt on bound sockets The isotp socket can be widely configured in its behaviour regarding addressing types, fill-ups, receive pattern tests and link layer length. Usually all these settings need to be fixed before bind() and can not be changed afterwards. This patch adds a check to enforce the common usage pattern. Fixes: e057dd3fc20f ("can: add ISO 15765-2:2016 transport protocol") Signed-off-by: Oliver Hartkopp Tested-by: Thomas Wagner Link: https://lore.kernel.org/r/20201203140604.25488-2-socketcan@hartkopp.net Signed-off-by: Marc Kleine-Budde Link: https://lore.kernel.org/r/20201204133508.742120-3-mkl@pengutronix.de Signed-off-by: Jakub Kicinski --- net/can/isotp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/can/isotp.c b/net/can/isotp.c index d78ab13bd8be..26bdc3c20b7e 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@ -1157,6 +1157,9 @@ static int isotp_setsockopt(struct socket *sock, int level, int optname, if (level != SOL_CAN_ISOTP) return -EINVAL; + if (so->bound) + return -EISCONN; + switch (optname) { case CAN_ISOTP_OPTS: if (optlen != sizeof(struct can_isotp_options)) From 340b940ea0ed12d9adbb8f72dea17d516b2019e8 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Fri, 4 Dec 2020 08:42:05 +0200 Subject: [PATCH 246/296] RDMA/cm: Fix an attempt to use non-valid pointer when cleaning timewait MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If cm_create_timewait_info() fails, the timewait_info pointer will contain an error value and will be used in cm_remove_remote() later. general protection fault, probably for non-canonical address 0xdffffc0000000024: 0000 [#1] SMP KASAN PTI KASAN: null-ptr-deref in range [0×0000000000000120-0×0000000000000127] CPU: 2 PID: 12446 Comm: syz-executor.3 Not tainted 5.10.0-rc5-5d4c0742a60e #27 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:cm_remove_remote.isra.0+0x24/0×170 drivers/infiniband/core/cm.c:978 Code: 84 00 00 00 00 00 41 54 55 53 48 89 fb 48 8d ab 2d 01 00 00 e8 7d bf 4b fe 48 89 ea 48 b8 00 00 00 00 00 fc ff df 48 c1 ea 03 <0f> b6 04 02 48 89 ea 83 e2 07 38 d0 7f 08 84 c0 0f 85 fc 00 00 00 RSP: 0018:ffff888013127918 EFLAGS: 00010006 RAX: dffffc0000000000 RBX: fffffffffffffff4 RCX: ffffc9000a18b000 RDX: 0000000000000024 RSI: ffffffff82edc573 RDI: fffffffffffffff4 RBP: 0000000000000121 R08: 0000000000000001 R09: ffffed1002624f1d R10: 0000000000000003 R11: ffffed1002624f1c R12: ffff888107760c70 R13: ffff888107760c40 R14: fffffffffffffff4 R15: ffff888107760c9c FS: 00007fe1ffcc1700(0000) GS:ffff88811a600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000001b2ff21000 CR3: 000000010f504001 CR4: 0000000000370ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: cm_destroy_id+0x189/0×15b0 drivers/infiniband/core/cm.c:1155 cma_connect_ib drivers/infiniband/core/cma.c:4029 [inline] rdma_connect_locked+0x1100/0×17c0 drivers/infiniband/core/cma.c:4107 rdma_connect+0x2a/0×40 drivers/infiniband/core/cma.c:4140 ucma_connect+0x277/0×340 drivers/infiniband/core/ucma.c:1069 ucma_write+0x236/0×2f0 drivers/infiniband/core/ucma.c:1724 vfs_write+0x220/0×830 fs/read_write.c:603 ksys_write+0x1df/0×240 fs/read_write.c:658 do_syscall_64+0x33/0×40 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: a977049dacde ("[PATCH] IB: Add the kernel CM implementation") Link: https://lore.kernel.org/r/20201204064205.145795-1-leon@kernel.org Reviewed-by: Maor Gottlieb Reported-by: Amit Matityahu Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 012156624b82..5afd142fe8c7 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -1522,6 +1522,7 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, id.local_id); if (IS_ERR(cm_id_priv->timewait_info)) { ret = PTR_ERR(cm_id_priv->timewait_info); + cm_id_priv->timewait_info = NULL; goto out; } @@ -2114,6 +2115,7 @@ static int cm_req_handler(struct cm_work *work) id.local_id); if (IS_ERR(cm_id_priv->timewait_info)) { ret = PTR_ERR(cm_id_priv->timewait_info); + cm_id_priv->timewait_info = NULL; goto destroy; } cm_id_priv->timewait_info->work.remote_id = cm_id_priv->id.remote_id; From 387270cb0b4035491c4812effd8b5af0e385a66c Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Mon, 7 Dec 2020 16:47:52 +0800 Subject: [PATCH 247/296] ARM: dts: mmp2-olpc-xo-1-75: clear the warnings when make dtbs The check_spi_bus_bridge() in scripts/dtc/checks.c requires that the node have "spi-slave" property must with "#address-cells = <0>" and "#size-cells = <0>". But currently both "#address-cells" and "#size-cells" properties are deleted, the corresponding default values are 2 and 1. As a result, the check fails and below warnings is displayed. arch/arm/boot/dts/mmp2.dtsi:472.23-480.6: Warning (spi_bus_bridge): \ /soc/apb@d4000000/spi@d4037000: incorrect #address-cells for SPI bus also defined at arch/arm/boot/dts/mmp2-olpc-xo-1-75.dts:225.7-237.3 arch/arm/boot/dts/mmp2.dtsi:472.23-480.6: Warning (spi_bus_bridge): \ /soc/apb@d4000000/spi@d4037000: incorrect #size-cells for SPI bus also defined at arch/arm/boot/dts/mmp2-olpc-xo-1-75.dts:225.7-237.3 arch/arm/boot/dts/mmp2-olpc-xo-1-75.dtb: Warning (spi_bus_reg): \ Failed prerequisite 'spi_bus_bridge' Because the value of "#size-cells" is already defined as zero in the node "ssp3: spi@d4037000" in arch/arm/boot/dts/mmp2.dtsi. So we only need to explicitly add "#address-cells = <0>" and keep "#size-cells" no change. Signed-off-by: Zhen Lei Link: https://lore.kernel.org/r/20201207084752.1665-2-thunder.leizhen@huawei.com' Signed-off-by: Arnd Bergmann --- arch/arm/boot/dts/mmp2-olpc-xo-1-75.dts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/mmp2-olpc-xo-1-75.dts b/arch/arm/boot/dts/mmp2-olpc-xo-1-75.dts index adde62d6fce7..342304f5653a 100644 --- a/arch/arm/boot/dts/mmp2-olpc-xo-1-75.dts +++ b/arch/arm/boot/dts/mmp2-olpc-xo-1-75.dts @@ -223,8 +223,7 @@ }; &ssp3 { - /delete-property/ #address-cells; - /delete-property/ #size-cells; + #address-cells = <0>; spi-slave; status = "okay"; ready-gpios = <&gpio 125 GPIO_ACTIVE_HIGH>; From 69fe24d1d80feac4289778582cf0a15256d59baf Mon Sep 17 00:00:00 2001 From: Zou Wei Date: Tue, 1 Dec 2020 19:51:53 +0800 Subject: [PATCH 248/296] firmware: xilinx: Mark pm_api_features_map with static keyword Fix the following sparse warning: drivers/firmware/xilinx/zynqmp.c:32:1: warning: symbol 'pm_api_features_map' was not declared. Should it be static? Signed-off-by: Zou Wei Link: https://lore.kernel.org/r/1606823513-121578-1-git-send-email-zou_wei@huawei.com Signed-off-by: Michal Simek Signed-off-by: Arnd Bergmann --- drivers/firmware/xilinx/zynqmp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/xilinx/zynqmp.c b/drivers/firmware/xilinx/zynqmp.c index d08ac824c993..fd95edeb702b 100644 --- a/drivers/firmware/xilinx/zynqmp.c +++ b/drivers/firmware/xilinx/zynqmp.c @@ -29,7 +29,7 @@ #define PM_API_FEATURE_CHECK_MAX_ORDER 7 static bool feature_check_enabled; -DEFINE_HASHTABLE(pm_api_features_map, PM_API_FEATURE_CHECK_MAX_ORDER); +static DEFINE_HASHTABLE(pm_api_features_map, PM_API_FEATURE_CHECK_MAX_ORDER); /** * struct pm_api_feature_data - PM API Feature data From c02bd115b1d25931159f89c7d9bf47a30f5d4b41 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 9 Dec 2020 14:39:56 -0800 Subject: [PATCH 249/296] Revert "geneve: pull IP header before ECN decapsulation" This reverts commit 4179b00c04d1 ("geneve: pull IP header before ECN decapsulation"). Eric says: "network header should have been pulled already before hitting geneve_rx()". Let's revert the syzbot fix since it's causing more harm than good, and revisit. Suggested-by: Eric Dumazet Reported-by: Jianlin Shi Fixes: 4179b00c04d1 ("geneve: pull IP header before ECN decapsulation") Link: https://bugzilla.kernel.org/show_bug.cgi?id=210569 Link: https://lore.kernel.org/netdev/CANn89iJVWfb=2i7oU1=D55rOyQnBbbikf+Mc6XHMkY7YX-yGEw@mail.gmail.com/ Signed-off-by: Jakub Kicinski --- drivers/net/geneve.c | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 8ae9ce2014a4..1426bfc009bc 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -257,21 +257,11 @@ static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs, skb_dst_set(skb, &tun_dst->dst); /* Ignore packet loops (and multicast echo) */ - if (ether_addr_equal(eth_hdr(skb)->h_source, geneve->dev->dev_addr)) - goto rx_error; - - switch (skb_protocol(skb, true)) { - case htons(ETH_P_IP): - if (pskb_may_pull(skb, sizeof(struct iphdr))) - goto rx_error; - break; - case htons(ETH_P_IPV6): - if (pskb_may_pull(skb, sizeof(struct ipv6hdr))) - goto rx_error; - break; - default: - goto rx_error; + if (ether_addr_equal(eth_hdr(skb)->h_source, geneve->dev->dev_addr)) { + geneve->dev->stats.rx_errors++; + goto drop; } + oiph = skb_network_header(skb); skb_reset_network_header(skb); @@ -308,8 +298,6 @@ static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs, dev_sw_netstats_rx_add(geneve->dev, len); return; -rx_error: - geneve->dev->stats.rx_errors++; drop: /* Consume bad packet */ kfree_skb(skb); From cfb33e174fa25d9d830683a1e1b22850546103b5 Mon Sep 17 00:00:00 2001 From: Sven Auhagen Date: Wed, 11 Nov 2020 18:04:48 +0100 Subject: [PATCH 250/296] igb: XDP xmit back fix error code The igb XDP xmit back function should only return defined error codes. Fixes: 9cbc948b5a20 ("igb: add XDP support") Reported-by: Dan Carpenter Acked-by: Maciej Fijalkowski Signed-off-by: Sven Auhagen Tested-by: Sandeep Penigalapati Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igb/igb_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 5fc2c381da55..08cc6f59aa2e 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -2910,7 +2910,7 @@ static int igb_xdp_xmit_back(struct igb_adapter *adapter, struct xdp_buff *xdp) */ tx_ring = adapter->xdp_prog ? igb_xdp_tx_queue_mapping(adapter) : NULL; if (unlikely(!tx_ring)) - return -ENXIO; + return IGB_XDP_CONSUMED; nq = txring_txq(tx_ring); __netif_tx_lock(nq, cpu); From b829ec1a66bc3dda4b01ab4c57d41ad1a1f82fed Mon Sep 17 00:00:00 2001 From: Sven Auhagen Date: Wed, 11 Nov 2020 18:04:49 +0100 Subject: [PATCH 251/296] igb: take VLAN double header into account Increase the packet header padding to include double VLAN tagging. This patch uses a macro for this. Fixes: 9cbc948b5a20 ("igb: add XDP support") Suggested-by: Maciej Fijalkowski Reviewed-by: Maciej Fijalkowski Acked-by: Maciej Fijalkowski Signed-off-by: Sven Auhagen Tested-by: Sandeep Penigalapati Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igb/igb.h | 5 +++++ drivers/net/ethernet/intel/igb/igb_main.c | 7 +++---- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h index 0286d2fceee4..aaa954aae574 100644 --- a/drivers/net/ethernet/intel/igb/igb.h +++ b/drivers/net/ethernet/intel/igb/igb.h @@ -138,6 +138,8 @@ struct vf_mac_filter { /* this is the size past which hardware will drop packets when setting LPE=0 */ #define MAXIMUM_ETHERNET_VLAN_SIZE 1522 +#define IGB_ETH_PKT_HDR_PAD (ETH_HLEN + ETH_FCS_LEN + (VLAN_HLEN * 2)) + /* Supported Rx Buffer Sizes */ #define IGB_RXBUFFER_256 256 #define IGB_RXBUFFER_1536 1536 @@ -247,6 +249,9 @@ enum igb_tx_flags { #define IGB_SFF_ADDRESSING_MODE 0x4 #define IGB_SFF_8472_UNSUP 0x00 +/* TX resources are shared between XDP and netstack + * and we need to tag the buffer type to distinguish them + */ enum igb_tx_buf_type { IGB_TYPE_SKB = 0, IGB_TYPE_XDP, diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 08cc6f59aa2e..0a9198037b98 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -2826,7 +2826,7 @@ static int igb_setup_tc(struct net_device *dev, enum tc_setup_type type, static int igb_xdp_setup(struct net_device *dev, struct bpf_prog *prog) { - int i, frame_size = dev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; + int i, frame_size = dev->mtu + IGB_ETH_PKT_HDR_PAD; struct igb_adapter *adapter = netdev_priv(dev); bool running = netif_running(dev); struct bpf_prog *old_prog; @@ -3950,8 +3950,7 @@ static int igb_sw_init(struct igb_adapter *adapter) /* set default work limits */ adapter->tx_work_limit = IGB_DEFAULT_TX_WORK; - adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN + - VLAN_HLEN; + adapter->max_frame_size = netdev->mtu + IGB_ETH_PKT_HDR_PAD; adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN; spin_lock_init(&adapter->nfc_lock); @@ -6491,7 +6490,7 @@ static void igb_get_stats64(struct net_device *netdev, static int igb_change_mtu(struct net_device *netdev, int new_mtu) { struct igb_adapter *adapter = netdev_priv(netdev); - int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; + int max_frame = new_mtu + IGB_ETH_PKT_HDR_PAD; if (adapter->xdp_prog) { int i; From 2e2bb5594ca0a5885dc93055ab0f9b5fbcdaa403 Mon Sep 17 00:00:00 2001 From: Sven Auhagen Date: Wed, 11 Nov 2020 18:04:50 +0100 Subject: [PATCH 252/296] igb: XDP extack message on error Add an extack error message when the RX buffer size is too small for the frame size. Fixes: 9cbc948b5a20 ("igb: add XDP support") Suggested-by: Maciej Fijalkowski Reviewed-by: Maciej Fijalkowski Acked-by: Maciej Fijalkowski Signed-off-by: Sven Auhagen Tested-by: Sandeep Penigalapati Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igb/igb_main.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 0a9198037b98..a0a310a75cc5 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -2824,20 +2824,25 @@ static int igb_setup_tc(struct net_device *dev, enum tc_setup_type type, } } -static int igb_xdp_setup(struct net_device *dev, struct bpf_prog *prog) +static int igb_xdp_setup(struct net_device *dev, struct netdev_bpf *bpf) { int i, frame_size = dev->mtu + IGB_ETH_PKT_HDR_PAD; struct igb_adapter *adapter = netdev_priv(dev); + struct bpf_prog *prog = bpf->prog, *old_prog; bool running = netif_running(dev); - struct bpf_prog *old_prog; bool need_reset; /* verify igb ring attributes are sufficient for XDP */ for (i = 0; i < adapter->num_rx_queues; i++) { struct igb_ring *ring = adapter->rx_ring[i]; - if (frame_size > igb_rx_bufsz(ring)) + if (frame_size > igb_rx_bufsz(ring)) { + NL_SET_ERR_MSG_MOD(bpf->extack, + "The RX buffer size is too small for the frame size"); + netdev_warn(dev, "XDP RX buffer size %d is too small for the frame size %d\n", + igb_rx_bufsz(ring), frame_size); return -EINVAL; + } } old_prog = xchg(&adapter->xdp_prog, prog); @@ -2869,7 +2874,7 @@ static int igb_xdp(struct net_device *dev, struct netdev_bpf *xdp) { switch (xdp->command) { case XDP_SETUP_PROG: - return igb_xdp_setup(dev, xdp->prog); + return igb_xdp_setup(dev, xdp); default: return -EINVAL; } @@ -6499,7 +6504,9 @@ static int igb_change_mtu(struct net_device *netdev, int new_mtu) struct igb_ring *ring = adapter->rx_ring[i]; if (max_frame > igb_rx_bufsz(ring)) { - netdev_warn(adapter->netdev, "Requested MTU size is not supported with XDP\n"); + netdev_warn(adapter->netdev, + "Requested MTU size is not supported with XDP. Max frame size is %d\n", + max_frame); return -EINVAL; } } From 681429dba99249546dda160e266e56035a2d750b Mon Sep 17 00:00:00 2001 From: Sven Auhagen Date: Wed, 11 Nov 2020 18:04:51 +0100 Subject: [PATCH 253/296] igb: skb add metasize for xdp add metasize if it is set in xdp Fixes: 9cbc948b5a20 ("igb: add XDP support") Suggested-by: Maciej Fijalkowski Reviewed-by: Maciej Fijalkowski Acked-by: Maciej Fijalkowski Signed-off-by: Sven Auhagen Tested-by: Sandeep Penigalapati Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igb/igb_main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index a0a310a75cc5..8e412aaba0e3 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -8357,6 +8357,7 @@ static struct sk_buff *igb_build_skb(struct igb_ring *rx_ring, SKB_DATA_ALIGN(xdp->data_end - xdp->data_hard_start); #endif + unsigned int metasize = xdp->data - xdp->data_meta; struct sk_buff *skb; /* prefetch first cache line of first page */ @@ -8371,6 +8372,9 @@ static struct sk_buff *igb_build_skb(struct igb_ring *rx_ring, skb_reserve(skb, xdp->data - xdp->data_hard_start); __skb_put(skb, xdp->data_end - xdp->data); + if (metasize) + skb_metadata_set(skb, metasize); + /* pull timestamp out of packet data */ if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) { igb_ptp_rx_pktstamp(rx_ring->q_vector, skb->data, skb); From 3eca859008a75a4ad363db65b0fe83be1a3d5ad1 Mon Sep 17 00:00:00 2001 From: Sven Auhagen Date: Wed, 11 Nov 2020 18:04:52 +0100 Subject: [PATCH 254/296] igb: use xdp_do_flush Since it is a new XDP implementation change xdp_do_flush_map to xdp_do_flush. Fixes: 9cbc948b5a20 ("igb: add XDP support") Suggested-by: Maciej Fijalkowski Reviewed-by: Maciej Fijalkowski Acked-by: Maciej Fijalkowski Signed-off-by: Sven Auhagen Tested-by: Sandeep Penigalapati Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igb/igb_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 8e412aaba0e3..af6ace6c0f87 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -8781,7 +8781,7 @@ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget) rx_ring->skb = skb; if (xdp_xmit & IGB_XDP_REDIR) - xdp_do_flush_map(); + xdp_do_flush(); if (xdp_xmit & IGB_XDP_TX) { struct igb_ring *tx_ring = igb_xdp_tx_queue_mapping(adapter); From ec107e775d84392b35db46f6c3baa441e074042e Mon Sep 17 00:00:00 2001 From: Sven Auhagen Date: Wed, 11 Nov 2020 18:04:53 +0100 Subject: [PATCH 255/296] igb: avoid transmit queue timeout in xdp path Since we share the transmit queue with the network stack, it is possible that we run into a transmit queue timeout. This will reset the queue. This happens under high load when XDP is using the transmit queue pretty much exclusively. netdev_start_xmit() sets the trans_start variable of the transmit queue to jiffies which is later utilized by dev_watchdog(), so to avoid timeout, let stack know that XDP xmit happened by bumping the trans_start within XDP Tx routines to jiffies. Fixes: 9cbc948b5a20 ("igb: add XDP support") Acked-by: Maciej Fijalkowski Signed-off-by: Sven Auhagen Tested-by: Sandeep Penigalapati Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igb/igb_main.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index af6ace6c0f87..0d343d050973 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -2919,6 +2919,8 @@ static int igb_xdp_xmit_back(struct igb_adapter *adapter, struct xdp_buff *xdp) nq = txring_txq(tx_ring); __netif_tx_lock(nq, cpu); + /* Avoid transmit queue timeout since we share it with the slow path */ + nq->trans_start = jiffies; ret = igb_xmit_xdp_ring(adapter, tx_ring, xdpf); __netif_tx_unlock(nq); @@ -2951,6 +2953,9 @@ static int igb_xdp_xmit(struct net_device *dev, int n, nq = txring_txq(tx_ring); __netif_tx_lock(nq, cpu); + /* Avoid transmit queue timeout since we share it with the slow path */ + nq->trans_start = jiffies; + for (i = 0; i < n; i++) { struct xdp_frame *xdpf = frames[i]; int err; From 75aab4e10ae6a4593a60f66d13de755d4e91f400 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Tue, 25 Aug 2020 19:27:34 +0200 Subject: [PATCH 256/296] i40e: avoid premature Rx buffer reuse MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The page recycle code, incorrectly, relied on that a page fragment could not be freed inside xdp_do_redirect(). This assumption leads to that page fragments that are used by the stack/XDP redirect can be reused and overwritten. To avoid this, store the page count prior invoking xdp_do_redirect(). Longer explanation: Intel NICs have a recycle mechanism. The main idea is that a page is split into two parts. One part is owned by the driver, one part might be owned by someone else, such as the stack. t0: Page is allocated, and put on the Rx ring +--------------- used by NIC ->| upper buffer (rx_buffer) +--------------- | lower buffer +--------------- page count == USHRT_MAX rx_buffer->pagecnt_bias == USHRT_MAX t1: Buffer is received, and passed to the stack (e.g.) +--------------- | upper buff (skb) +--------------- used by NIC ->| lower buffer (rx_buffer) +--------------- page count == USHRT_MAX rx_buffer->pagecnt_bias == USHRT_MAX - 1 t2: Buffer is received, and redirected +--------------- | upper buff (skb) +--------------- used by NIC ->| lower buffer (rx_buffer) +--------------- Now, prior calling xdp_do_redirect(): page count == USHRT_MAX rx_buffer->pagecnt_bias == USHRT_MAX - 2 This means that buffer *cannot* be flipped/reused, because the skb is still using it. The problem arises when xdp_do_redirect() actually frees the segment. Then we get: page count == USHRT_MAX - 1 rx_buffer->pagecnt_bias == USHRT_MAX - 2 From a recycle perspective, the buffer can be flipped and reused, which means that the skb data area is passed to the Rx HW ring! To work around this, the page count is stored prior calling xdp_do_redirect(). Note that this is not optimal, since the NIC could actually reuse the "lower buffer" again. However, then we need to track whether XDP_REDIRECT consumed the buffer or not. Fixes: d9314c474d4f ("i40e: add support for XDP_REDIRECT") Reported-and-analyzed-by: Li RongQing Signed-off-by: Björn Töpel Tested-by: George Kuruvinakunnel Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 27 +++++++++++++++------ 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index d43ce13a93c9..3f5825fa67c9 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -1850,6 +1850,7 @@ static inline bool i40e_page_is_reusable(struct page *page) * the adapter for another receive * * @rx_buffer: buffer containing the page + * @rx_buffer_pgcnt: buffer page refcount pre xdp_do_redirect() call * * If page is reusable, rx_buffer->page_offset is adjusted to point to * an unused region in the page. @@ -1872,7 +1873,8 @@ static inline bool i40e_page_is_reusable(struct page *page) * * In either case, if the page is reusable its refcount is increased. **/ -static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer) +static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer, + int rx_buffer_pgcnt) { unsigned int pagecnt_bias = rx_buffer->pagecnt_bias; struct page *page = rx_buffer->page; @@ -1883,7 +1885,7 @@ static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer) #if (PAGE_SIZE < 8192) /* if we are only owner of page we can reuse it */ - if (unlikely((page_count(page) - pagecnt_bias) > 1)) + if (unlikely((rx_buffer_pgcnt - pagecnt_bias) > 1)) return false; #else #define I40E_LAST_OFFSET \ @@ -1942,16 +1944,24 @@ static void i40e_add_rx_frag(struct i40e_ring *rx_ring, * i40e_get_rx_buffer - Fetch Rx buffer and synchronize data for use * @rx_ring: rx descriptor ring to transact packets on * @size: size of buffer to add to skb + * @rx_buffer_pgcnt: buffer page refcount * * This function will pull an Rx buffer from the ring and synchronize it * for use by the CPU. */ static struct i40e_rx_buffer *i40e_get_rx_buffer(struct i40e_ring *rx_ring, - const unsigned int size) + const unsigned int size, + int *rx_buffer_pgcnt) { struct i40e_rx_buffer *rx_buffer; rx_buffer = i40e_rx_bi(rx_ring, rx_ring->next_to_clean); + *rx_buffer_pgcnt = +#if (PAGE_SIZE < 8192) + page_count(rx_buffer->page); +#else + 0; +#endif prefetch_page_address(rx_buffer->page); /* we are reusing so sync this buffer for CPU use */ @@ -2102,14 +2112,16 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring, * i40e_put_rx_buffer - Clean up used buffer and either recycle or free * @rx_ring: rx descriptor ring to transact packets on * @rx_buffer: rx buffer to pull data from + * @rx_buffer_pgcnt: rx buffer page refcount pre xdp_do_redirect() call * * This function will clean up the contents of the rx_buffer. It will * either recycle the buffer or unmap it and free the associated resources. */ static void i40e_put_rx_buffer(struct i40e_ring *rx_ring, - struct i40e_rx_buffer *rx_buffer) + struct i40e_rx_buffer *rx_buffer, + int rx_buffer_pgcnt) { - if (i40e_can_reuse_rx_page(rx_buffer)) { + if (i40e_can_reuse_rx_page(rx_buffer, rx_buffer_pgcnt)) { /* hand second half of page back to the ring */ i40e_reuse_rx_page(rx_ring, rx_buffer); } else { @@ -2336,6 +2348,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) while (likely(total_rx_packets < (unsigned int)budget)) { struct i40e_rx_buffer *rx_buffer; union i40e_rx_desc *rx_desc; + int rx_buffer_pgcnt; unsigned int size; u64 qword; @@ -2378,7 +2391,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) break; i40e_trace(clean_rx_irq, rx_ring, rx_desc, skb); - rx_buffer = i40e_get_rx_buffer(rx_ring, size); + rx_buffer = i40e_get_rx_buffer(rx_ring, size, &rx_buffer_pgcnt); /* retrieve a buffer from the ring */ if (!skb) { @@ -2421,7 +2434,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) break; } - i40e_put_rx_buffer(rx_ring, rx_buffer); + i40e_put_rx_buffer(rx_ring, rx_buffer, rx_buffer_pgcnt); cleaned_count++; if (i40e_is_non_eop(rx_ring, rx_desc, skb)) From a06316dc87bdc000f7f39a315476957af2ba0f05 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Tue, 25 Aug 2020 19:27:35 +0200 Subject: [PATCH 257/296] ixgbe: avoid premature Rx buffer reuse MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The page recycle code, incorrectly, relied on that a page fragment could not be freed inside xdp_do_redirect(). This assumption leads to that page fragments that are used by the stack/XDP redirect can be reused and overwritten. To avoid this, store the page count prior invoking xdp_do_redirect(). Fixes: 6453073987ba ("ixgbe: add initial support for xdp redirect") Reported-and-analyzed-by: Li RongQing Signed-off-by: Björn Töpel Tested-by: Sandeep Penigalapati Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 24 +++++++++++++------ 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 45ae33e15303..f3f449f53920 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -1945,7 +1945,8 @@ static inline bool ixgbe_page_is_reserved(struct page *page) return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page); } -static bool ixgbe_can_reuse_rx_page(struct ixgbe_rx_buffer *rx_buffer) +static bool ixgbe_can_reuse_rx_page(struct ixgbe_rx_buffer *rx_buffer, + int rx_buffer_pgcnt) { unsigned int pagecnt_bias = rx_buffer->pagecnt_bias; struct page *page = rx_buffer->page; @@ -1956,7 +1957,7 @@ static bool ixgbe_can_reuse_rx_page(struct ixgbe_rx_buffer *rx_buffer) #if (PAGE_SIZE < 8192) /* if we are only owner of page we can reuse it */ - if (unlikely((page_ref_count(page) - pagecnt_bias) > 1)) + if (unlikely((rx_buffer_pgcnt - pagecnt_bias) > 1)) return false; #else /* The last offset is a bit aggressive in that we assume the @@ -2021,11 +2022,18 @@ static void ixgbe_add_rx_frag(struct ixgbe_ring *rx_ring, static struct ixgbe_rx_buffer *ixgbe_get_rx_buffer(struct ixgbe_ring *rx_ring, union ixgbe_adv_rx_desc *rx_desc, struct sk_buff **skb, - const unsigned int size) + const unsigned int size, + int *rx_buffer_pgcnt) { struct ixgbe_rx_buffer *rx_buffer; rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean]; + *rx_buffer_pgcnt = +#if (PAGE_SIZE < 8192) + page_count(rx_buffer->page); +#else + 0; +#endif prefetchw(rx_buffer->page); *skb = rx_buffer->skb; @@ -2055,9 +2063,10 @@ skip_sync: static void ixgbe_put_rx_buffer(struct ixgbe_ring *rx_ring, struct ixgbe_rx_buffer *rx_buffer, - struct sk_buff *skb) + struct sk_buff *skb, + int rx_buffer_pgcnt) { - if (ixgbe_can_reuse_rx_page(rx_buffer)) { + if (ixgbe_can_reuse_rx_page(rx_buffer, rx_buffer_pgcnt)) { /* hand second half of page back to the ring */ ixgbe_reuse_rx_page(rx_ring, rx_buffer); } else { @@ -2303,6 +2312,7 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, union ixgbe_adv_rx_desc *rx_desc; struct ixgbe_rx_buffer *rx_buffer; struct sk_buff *skb; + int rx_buffer_pgcnt; unsigned int size; /* return some buffers to hardware, one at a time is too slow */ @@ -2322,7 +2332,7 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, */ dma_rmb(); - rx_buffer = ixgbe_get_rx_buffer(rx_ring, rx_desc, &skb, size); + rx_buffer = ixgbe_get_rx_buffer(rx_ring, rx_desc, &skb, size, &rx_buffer_pgcnt); /* retrieve a buffer from the ring */ if (!skb) { @@ -2367,7 +2377,7 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, break; } - ixgbe_put_rx_buffer(rx_ring, rx_buffer, skb); + ixgbe_put_rx_buffer(rx_ring, rx_buffer, skb, rx_buffer_pgcnt); cleaned_count++; /* place incomplete frames back on ring for completion */ From 1beb7830d3b285b28f7cde3644d59d2590a47e51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Tue, 25 Aug 2020 19:27:36 +0200 Subject: [PATCH 258/296] ice: avoid premature Rx buffer reuse MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The page recycle code, incorrectly, relied on that a page fragment could not be freed inside xdp_do_redirect(). This assumption leads to that page fragments that are used by the stack/XDP redirect can be reused and overwritten. To avoid this, store the page count prior invoking xdp_do_redirect(). Fixes: efc2214b6047 ("ice: Add support for XDP") Reported-and-analyzed-by: Li RongQing Signed-off-by: Björn Töpel Tested-by: George Kuruvinakunnel Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_txrx.c | 31 ++++++++++++++++------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index eae75260fe20..23eca2f0a03b 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -762,13 +762,15 @@ ice_rx_buf_adjust_pg_offset(struct ice_rx_buf *rx_buf, unsigned int size) /** * ice_can_reuse_rx_page - Determine if page can be reused for another Rx * @rx_buf: buffer containing the page + * @rx_buf_pgcnt: rx_buf page refcount pre xdp_do_redirect() call * * If page is reusable, we have a green light for calling ice_reuse_rx_page, * which will assign the current buffer to the buffer that next_to_alloc is * pointing to; otherwise, the DMA mapping needs to be destroyed and * page freed */ -static bool ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf) +static bool +ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf, int rx_buf_pgcnt) { unsigned int pagecnt_bias = rx_buf->pagecnt_bias; struct page *page = rx_buf->page; @@ -779,7 +781,7 @@ static bool ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf) #if (PAGE_SIZE < 8192) /* if we are only owner of page we can reuse it */ - if (unlikely((page_count(page) - pagecnt_bias) > 1)) + if (unlikely((rx_buf_pgcnt - pagecnt_bias) > 1)) return false; #else #define ICE_LAST_OFFSET \ @@ -864,17 +866,24 @@ ice_reuse_rx_page(struct ice_ring *rx_ring, struct ice_rx_buf *old_buf) * @rx_ring: Rx descriptor ring to transact packets on * @skb: skb to be used * @size: size of buffer to add to skb + * @rx_buf_pgcnt: rx_buf page refcount * * This function will pull an Rx buffer from the ring and synchronize it * for use by the CPU. */ static struct ice_rx_buf * ice_get_rx_buf(struct ice_ring *rx_ring, struct sk_buff **skb, - const unsigned int size) + const unsigned int size, int *rx_buf_pgcnt) { struct ice_rx_buf *rx_buf; rx_buf = &rx_ring->rx_buf[rx_ring->next_to_clean]; + *rx_buf_pgcnt = +#if (PAGE_SIZE < 8192) + page_count(rx_buf->page); +#else + 0; +#endif prefetchw(rx_buf->page); *skb = rx_buf->skb; @@ -1006,12 +1015,15 @@ ice_construct_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf, * ice_put_rx_buf - Clean up used buffer and either recycle or free * @rx_ring: Rx descriptor ring to transact packets on * @rx_buf: Rx buffer to pull data from + * @rx_buf_pgcnt: Rx buffer page count pre xdp_do_redirect() * * This function will update next_to_clean and then clean up the contents * of the rx_buf. It will either recycle the buffer or unmap it and free * the associated resources. */ -static void ice_put_rx_buf(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf) +static void +ice_put_rx_buf(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf, + int rx_buf_pgcnt) { u16 ntc = rx_ring->next_to_clean + 1; @@ -1022,7 +1034,7 @@ static void ice_put_rx_buf(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf) if (!rx_buf) return; - if (ice_can_reuse_rx_page(rx_buf)) { + if (ice_can_reuse_rx_page(rx_buf, rx_buf_pgcnt)) { /* hand second half of page back to the ring */ ice_reuse_rx_page(rx_ring, rx_buf); } else { @@ -1097,6 +1109,7 @@ int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget) struct sk_buff *skb; unsigned int size; u16 stat_err_bits; + int rx_buf_pgcnt; u16 vlan_tag = 0; u8 rx_ptype; @@ -1119,7 +1132,7 @@ int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget) dma_rmb(); if (rx_desc->wb.rxdid == FDIR_DESC_RXDID || !rx_ring->netdev) { - ice_put_rx_buf(rx_ring, NULL); + ice_put_rx_buf(rx_ring, NULL, 0); cleaned_count++; continue; } @@ -1128,7 +1141,7 @@ int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget) ICE_RX_FLX_DESC_PKT_LEN_M; /* retrieve a buffer from the ring */ - rx_buf = ice_get_rx_buf(rx_ring, &skb, size); + rx_buf = ice_get_rx_buf(rx_ring, &skb, size, &rx_buf_pgcnt); if (!size) { xdp.data = NULL; @@ -1168,7 +1181,7 @@ int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget) total_rx_pkts++; cleaned_count++; - ice_put_rx_buf(rx_ring, rx_buf); + ice_put_rx_buf(rx_ring, rx_buf, rx_buf_pgcnt); continue; construct_skb: if (skb) { @@ -1187,7 +1200,7 @@ construct_skb: break; } - ice_put_rx_buf(rx_ring, rx_buf); + ice_put_rx_buf(rx_ring, rx_buf, rx_buf_pgcnt); cleaned_count++; /* skip if it is NOP desc */ From a379b01cd4b2aa3f12786b281a714871574e5ccb Mon Sep 17 00:00:00 2001 From: Vitaly Lifshits Date: Tue, 8 Dec 2020 12:56:32 -0600 Subject: [PATCH 259/296] e1000e: fix S0ix flow to allow S0i3.2 subset entry Changed a configuration in the flows to align with architecture requirements to achieve S0i3.2 substate. This helps both i219V and i219LM configurations. Also fixed a typo in the previous commit 632fbd5eb5b0 ("e1000e: fix S0ix flows for cable connected case"). Fixes: 632fbd5eb5b0 ("e1000e: fix S0ix flows for cable connected case"). Signed-off-by: Vitaly Lifshits Tested-by: Aaron Brown Signed-off-by: Tony Nguyen Reviewed-by: Alexander Duyck Signed-off-by: Mario Limonciello Link: https://lore.kernel.org/r/20201208185632.151052-1-mario.limonciello@dell.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/e1000e/netdev.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index b30f00891c03..128ab6898070 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -6475,13 +6475,13 @@ static void e1000e_s0ix_entry_flow(struct e1000_adapter *adapter) /* Ungate PGCB clock */ mac_data = er32(FEXTNVM9); - mac_data |= BIT(28); + mac_data &= ~BIT(28); ew32(FEXTNVM9, mac_data); /* Enable K1 off to enable mPHY Power Gating */ mac_data = er32(FEXTNVM6); mac_data |= BIT(31); - ew32(FEXTNVM12, mac_data); + ew32(FEXTNVM6, mac_data); /* Enable mPHY power gating for any link and speed */ mac_data = er32(FEXTNVM8); @@ -6525,11 +6525,11 @@ static void e1000e_s0ix_exit_flow(struct e1000_adapter *adapter) /* Disable K1 off */ mac_data = er32(FEXTNVM6); mac_data &= ~BIT(31); - ew32(FEXTNVM12, mac_data); + ew32(FEXTNVM6, mac_data); /* Disable Ungate PGCB clock */ mac_data = er32(FEXTNVM9); - mac_data &= ~BIT(28); + mac_data |= BIT(28); ew32(FEXTNVM9, mac_data); /* Cancel not waking from dynamic From a770bf515613c6e12ae904c3593e26016de99448 Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Tue, 8 Dec 2020 23:13:51 +0100 Subject: [PATCH 260/296] ethtool: fix stack overflow in ethnl_parse_bitset() Syzbot reported a stack overflow in bitmap_from_arr32() called from ethnl_parse_bitset() when bitset from netlink message is longer than target bitmap length. While ethnl_compact_sanity_checks() makes sure that trailing part is all zeros (i.e. the request does not try to touch bits kernel does not recognize), we also need to cap change_bits to nbits so that we don't try to write past the prepared bitmaps. Fixes: 88db6d1e4f62 ("ethtool: add ethnl_parse_bitset() helper") Reported-by: syzbot+9d39fa49d4df294aab93@syzkaller.appspotmail.com Signed-off-by: Michal Kubecek Link: https://lore.kernel.org/r/3487ee3a98e14cd526f55b6caaa959d2dcbcad9f.1607465316.git.mkubecek@suse.cz Signed-off-by: Jakub Kicinski --- net/ethtool/bitset.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ethtool/bitset.c b/net/ethtool/bitset.c index 1fb3603d92ad..0515d6604b3b 100644 --- a/net/ethtool/bitset.c +++ b/net/ethtool/bitset.c @@ -628,6 +628,8 @@ int ethnl_parse_bitset(unsigned long *val, unsigned long *mask, return ret; change_bits = nla_get_u32(tb[ETHTOOL_A_BITSET_SIZE]); + if (change_bits > nbits) + change_bits = nbits; bitmap_from_arr32(val, nla_data(tb[ETHTOOL_A_BITSET_VALUE]), change_bits); if (change_bits < nbits) From 8ef44b6fe49d2b8d03ba9aa69063612b474f963b Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Tue, 8 Dec 2020 09:55:08 -0800 Subject: [PATCH 261/296] tcp: Retain ECT bits for tos reflection For DCTCP, we have to retain the ECT bits set by the congestion control algorithm on the socket when reflecting syn TOS in syn-ack, in order to make ECN work properly. Fixes: ac8f1710c12b ("tcp: reflect tos value received in SYN to the socket") Reported-by: Alexander Duyck Signed-off-by: Wei Wang Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 7 +++++-- net/ipv6/tcp_ipv6.c | 7 +++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 8391aa29e7a4..595dcc3afac5 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -984,7 +984,8 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst, __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr); tos = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ? - tcp_rsk(req)->syn_tos & ~INET_ECN_MASK : + (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) | + (inet_sk(sk)->tos & INET_ECN_MASK) : inet_sk(sk)->tos; if (!INET_ECN_is_capable(tos) && @@ -1541,7 +1542,9 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen; newinet->inet_id = prandom_u32(); - /* Set ToS of the new socket based upon the value of incoming SYN. */ + /* Set ToS of the new socket based upon the value of incoming SYN. + * ECT bits are set later in tcp_init_transfer(). + */ if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) newinet->tos = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 992cbf3eb9e3..991dc36f95ff 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -528,7 +528,8 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts)); tclass = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ? - tcp_rsk(req)->syn_tos & ~INET_ECN_MASK : + (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) | + (np->tclass & INET_ECN_MASK) : np->tclass; if (!INET_ECN_is_capable(tclass) && @@ -1320,7 +1321,9 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * if (np->repflow) newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb)); - /* Set ToS of the new socket based upon the value of incoming SYN. */ + /* Set ToS of the new socket based upon the value of incoming SYN. + * ECT bits are set later in tcp_init_transfer(). + */ if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK; From 5137d303659d8c324e67814b1cc2e1bc0c0d9836 Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Tue, 8 Dec 2020 10:48:35 +0800 Subject: [PATCH 262/296] net: flow_offload: Fix memory leak for indirect flow block The offending commit introduces a cleanup callback that is invoked when the driver module is removed to clean up the tunnel device flow block. But it returns on the first iteration of the for loop. The remaining indirect flow blocks will never be freed. Fixes: 1fac52da5942 ("net: flow_offload: consolidate indirect flow_block infrastructure") CC: Pablo Neira Ayuso Signed-off-by: Chris Mi Reviewed-by: Roi Dayan --- net/core/flow_offload.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c index d4474c812b64..715b67f6c62f 100644 --- a/net/core/flow_offload.c +++ b/net/core/flow_offload.c @@ -381,10 +381,8 @@ static void __flow_block_indr_cleanup(void (*release)(void *cb_priv), list_for_each_entry_safe(this, next, &flow_block_indr_list, indr.list) { if (this->release == release && - this->indr.cb_priv == cb_priv) { + this->indr.cb_priv == cb_priv) list_move(&this->indr.list, cleanup_list); - return; - } } } From 299bcb55ecd1412f6df606e9dc0912d55610029e Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Tue, 8 Dec 2020 22:57:59 -0500 Subject: [PATCH 263/296] tcp: fix cwnd-limited bug for TSO deferral where we send nothing When cwnd is not a multiple of the TSO skb size of N*MSS, we can get into persistent scenarios where we have the following sequence: (1) ACK for full-sized skb of N*MSS arrives -> tcp_write_xmit() transmit full-sized skb with N*MSS -> move pacing release time forward -> exit tcp_write_xmit() because pacing time is in the future (2) TSQ callback or TCP internal pacing timer fires -> try to transmit next skb, but TSO deferral finds remainder of available cwnd is not big enough to trigger an immediate send now, so we defer sending until the next ACK. (3) repeat... So we can get into a case where we never mark ourselves as cwnd-limited for many seconds at a time, even with bulk/infinite-backlog senders, because: o In case (1) above, every time in tcp_write_xmit() we have enough cwnd to send a full-sized skb, we are not fully using the cwnd (because cwnd is not a multiple of the TSO skb size). So every time we send data, we are not cwnd limited, and so in the cwnd-limited tracking code in tcp_cwnd_validate() we mark ourselves as not cwnd-limited. o In case (2) above, every time in tcp_write_xmit() that we try to transmit the "remainder" of the cwnd but defer, we set the local variable is_cwnd_limited to true, but we do not send any packets, so sent_pkts is zero, so we don't call the cwnd-limited logic to update tp->is_cwnd_limited. Fixes: ca8a22634381 ("tcp: make cwnd-limited checks measurement-based, and gentler") Reported-by: Ingemar Johansson Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Acked-by: Soheil Hassas Yeganeh Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20201209035759.1225145-1-ncardwell.kernel@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_output.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index bf48cd73e967..99011768c264 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1880,7 +1880,8 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited) * window, and remember whether we were cwnd-limited then. */ if (!before(tp->snd_una, tp->max_packets_seq) || - tp->packets_out > tp->max_packets_out) { + tp->packets_out > tp->max_packets_out || + is_cwnd_limited) { tp->max_packets_out = tp->packets_out; tp->max_packets_seq = tp->snd_nxt; tp->is_cwnd_limited = is_cwnd_limited; @@ -2702,6 +2703,10 @@ repair: else tcp_chrono_stop(sk, TCP_CHRONO_RWND_LIMITED); + is_cwnd_limited |= (tcp_packets_in_flight(tp) >= tp->snd_cwnd); + if (likely(sent_pkts || is_cwnd_limited)) + tcp_cwnd_validate(sk, is_cwnd_limited); + if (likely(sent_pkts)) { if (tcp_in_cwnd_reduction(sk)) tp->prr_out += sent_pkts; @@ -2709,8 +2714,6 @@ repair: /* Send one loss probe per tail loss episode. */ if (push_one != 2) tcp_schedule_loss_probe(sk, false); - is_cwnd_limited |= (tcp_packets_in_flight(tp) >= tp->snd_cwnd); - tcp_cwnd_validate(sk, is_cwnd_limited); return false; } return !tp->packets_out && !tcp_write_queue_empty(sk); From fed91613c9dd455dd154b22fa8e11b8526466082 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Wed, 9 Dec 2020 15:03:38 +0200 Subject: [PATCH 264/296] net/mlx4_en: Avoid scheduling restart task if it is already running Add restarting state flag to avoid scheduling another restart task while such task is already running. Change task name from watchdog_task to restart_task to better fit the task role. Fixes: 1e338db56e5a ("mlx4_en: Fix a race at restart task") Signed-off-by: Moshe Shemesh Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx4/en_netdev.c | 20 ++++++++++++------- drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 7 ++++++- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 106513f772c3..1a2b0bd64aa9 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1378,8 +1378,10 @@ static void mlx4_en_tx_timeout(struct net_device *dev, unsigned int txqueue) tx_ring->cons, tx_ring->prod); priv->port_stats.tx_timeout++; - en_dbg(DRV, priv, "Scheduling watchdog\n"); - queue_work(mdev->workqueue, &priv->watchdog_task); + if (!test_and_set_bit(MLX4_EN_STATE_FLAG_RESTARTING, &priv->state)) { + en_dbg(DRV, priv, "Scheduling port restart\n"); + queue_work(mdev->workqueue, &priv->restart_task); + } } @@ -1829,6 +1831,7 @@ int mlx4_en_start_port(struct net_device *dev) local_bh_enable(); } + clear_bit(MLX4_EN_STATE_FLAG_RESTARTING, &priv->state); netif_tx_start_all_queues(dev); netif_device_attach(dev); @@ -1999,7 +2002,7 @@ void mlx4_en_stop_port(struct net_device *dev, int detach) static void mlx4_en_restart(struct work_struct *work) { struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv, - watchdog_task); + restart_task); struct mlx4_en_dev *mdev = priv->mdev; struct net_device *dev = priv->dev; @@ -2377,7 +2380,7 @@ static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu) if (netif_running(dev)) { mutex_lock(&mdev->state_lock); if (!mdev->device_up) { - /* NIC is probably restarting - let watchdog task reset + /* NIC is probably restarting - let restart task reset * the port */ en_dbg(DRV, priv, "Change MTU called with card down!?\n"); } else { @@ -2386,7 +2389,9 @@ static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu) if (err) { en_err(priv, "Failed restarting port:%d\n", priv->port); - queue_work(mdev->workqueue, &priv->watchdog_task); + if (!test_and_set_bit(MLX4_EN_STATE_FLAG_RESTARTING, + &priv->state)) + queue_work(mdev->workqueue, &priv->restart_task); } } mutex_unlock(&mdev->state_lock); @@ -2792,7 +2797,8 @@ static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog) if (err) { en_err(priv, "Failed starting port %d for XDP change\n", priv->port); - queue_work(mdev->workqueue, &priv->watchdog_task); + if (!test_and_set_bit(MLX4_EN_STATE_FLAG_RESTARTING, &priv->state)) + queue_work(mdev->workqueue, &priv->restart_task); } } @@ -3165,7 +3171,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, priv->counter_index = MLX4_SINK_COUNTER_INDEX(mdev->dev); spin_lock_init(&priv->stats_lock); INIT_WORK(&priv->rx_mode_task, mlx4_en_do_set_rx_mode); - INIT_WORK(&priv->watchdog_task, mlx4_en_restart); + INIT_WORK(&priv->restart_task, mlx4_en_restart); INIT_WORK(&priv->linkstate_task, mlx4_en_linkstate); INIT_DELAYED_WORK(&priv->stats_task, mlx4_en_do_get_stats); INIT_DELAYED_WORK(&priv->service_task, mlx4_en_service_task); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index a46efe37cfa9..fd9535bde1b8 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -530,6 +530,10 @@ struct mlx4_en_stats_bitmap { struct mutex mutex; /* for mutual access to stats bitmap */ }; +enum { + MLX4_EN_STATE_FLAG_RESTARTING, +}; + struct mlx4_en_priv { struct mlx4_en_dev *mdev; struct mlx4_en_port_profile *prof; @@ -595,7 +599,7 @@ struct mlx4_en_priv { struct mlx4_en_cq *rx_cq[MAX_RX_RINGS]; struct mlx4_qp drop_qp; struct work_struct rx_mode_task; - struct work_struct watchdog_task; + struct work_struct restart_task; struct work_struct linkstate_task; struct delayed_work stats_task; struct delayed_work service_task; @@ -641,6 +645,7 @@ struct mlx4_en_priv { u32 pflags; u8 rss_key[MLX4_EN_RSS_KEY_SIZE]; u8 rss_hash_fn; + unsigned long state; }; enum mlx4_en_wol { From ba603d9d7b1215c72513d7c7aa02b6775fd4891b Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Wed, 9 Dec 2020 15:03:39 +0200 Subject: [PATCH 265/296] net/mlx4_en: Handle TX error CQE In case error CQE was found while polling TX CQ, the QP is in error state and all posted WQEs will generate error CQEs without any data transmitted. Fix it by reopening the channels, via same method used for TX timeout handling. In addition add some more info on error CQE and WQE for debug. Fixes: bd2f631d7c60 ("net/mlx4_en: Notify user when TX ring in error state") Signed-off-by: Moshe Shemesh Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx4/en_netdev.c | 1 + drivers/net/ethernet/mellanox/mlx4/en_tx.c | 40 +++++++++++++++---- drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 5 +++ 3 files changed, 39 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 1a2b0bd64aa9..6f290319b617 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1735,6 +1735,7 @@ int mlx4_en_start_port(struct net_device *dev) mlx4_en_deactivate_cq(priv, cq); goto tx_err; } + clear_bit(MLX4_EN_TX_RING_STATE_RECOVERING, &tx_ring->state); if (t != TX_XDP) { tx_ring->tx_queue = netdev_get_tx_queue(dev, i); tx_ring->recycle_ring = NULL; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 3ddb7268e415..59b097cda327 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -392,6 +392,35 @@ int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring) return cnt; } +static void mlx4_en_handle_err_cqe(struct mlx4_en_priv *priv, struct mlx4_err_cqe *err_cqe, + u16 cqe_index, struct mlx4_en_tx_ring *ring) +{ + struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_en_tx_info *tx_info; + struct mlx4_en_tx_desc *tx_desc; + u16 wqe_index; + int desc_size; + + en_err(priv, "CQE error - cqn 0x%x, ci 0x%x, vendor syndrome: 0x%x syndrome: 0x%x\n", + ring->sp_cqn, cqe_index, err_cqe->vendor_err_syndrome, err_cqe->syndrome); + print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, 16, 1, err_cqe, sizeof(*err_cqe), + false); + + wqe_index = be16_to_cpu(err_cqe->wqe_index) & ring->size_mask; + tx_info = &ring->tx_info[wqe_index]; + desc_size = tx_info->nr_txbb << LOG_TXBB_SIZE; + en_err(priv, "Related WQE - qpn 0x%x, wqe index 0x%x, wqe size 0x%x\n", ring->qpn, + wqe_index, desc_size); + tx_desc = ring->buf + (wqe_index << LOG_TXBB_SIZE); + print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, 16, 1, tx_desc, desc_size, false); + + if (test_and_set_bit(MLX4_EN_STATE_FLAG_RESTARTING, &priv->state)) + return; + + en_err(priv, "Scheduling port restart\n"); + queue_work(mdev->workqueue, &priv->restart_task); +} + int mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int napi_budget) { @@ -438,13 +467,10 @@ int mlx4_en_process_tx_cq(struct net_device *dev, dma_rmb(); if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == - MLX4_CQE_OPCODE_ERROR)) { - struct mlx4_err_cqe *cqe_err = (struct mlx4_err_cqe *)cqe; - - en_err(priv, "CQE error - vendor syndrome: 0x%x syndrome: 0x%x\n", - cqe_err->vendor_err_syndrome, - cqe_err->syndrome); - } + MLX4_CQE_OPCODE_ERROR)) + if (!test_and_set_bit(MLX4_EN_TX_RING_STATE_RECOVERING, &ring->state)) + mlx4_en_handle_err_cqe(priv, (struct mlx4_err_cqe *)cqe, index, + ring); /* Skip over last polled CQE */ new_index = be16_to_cpu(cqe->wqe_index) & size_mask; diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index fd9535bde1b8..30378e4c90b5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -271,6 +271,10 @@ struct mlx4_en_page_cache { } buf[MLX4_EN_CACHE_SIZE]; }; +enum { + MLX4_EN_TX_RING_STATE_RECOVERING, +}; + struct mlx4_en_priv; struct mlx4_en_tx_ring { @@ -317,6 +321,7 @@ struct mlx4_en_tx_ring { * Only queue_stopped might be used if BQL is not properly working. */ unsigned long queue_stopped; + unsigned long state; struct mlx4_hwq_resources sp_wqres; struct mlx4_qp sp_qp; struct mlx4_qp_context sp_context; From c5b58c8c860db330c0b8b891b69014ee9d470dab Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Wed, 9 Dec 2020 16:34:22 +0800 Subject: [PATCH 266/296] drm/amd/pm: typo fix (CUSTOM -> COMPUTE) The "COMPUTE" was wrongly spelled as "CUSTOM". Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 5.9.x --- drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index 895d89bea7fa..cf7c4f0e0a0b 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -217,7 +217,7 @@ static struct cmn2asic_mapping sienna_cichlid_workload_map[PP_SMC_POWER_PROFILE_ WORKLOAD_MAP(PP_SMC_POWER_PROFILE_POWERSAVING, WORKLOAD_PPLIB_POWER_SAVING_BIT), WORKLOAD_MAP(PP_SMC_POWER_PROFILE_VIDEO, WORKLOAD_PPLIB_VIDEO_BIT), WORKLOAD_MAP(PP_SMC_POWER_PROFILE_VR, WORKLOAD_PPLIB_VR_BIT), - WORKLOAD_MAP(PP_SMC_POWER_PROFILE_COMPUTE, WORKLOAD_PPLIB_CUSTOM_BIT), + WORKLOAD_MAP(PP_SMC_POWER_PROFILE_COMPUTE, WORKLOAD_PPLIB_COMPUTE_BIT), WORKLOAD_MAP(PP_SMC_POWER_PROFILE_CUSTOM, WORKLOAD_PPLIB_CUSTOM_BIT), }; From 7fdd375e383097a785bb65c66802e468f398bf82 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Wed, 9 Dec 2020 16:48:41 +0100 Subject: [PATCH 267/296] net: sched: Fix dump of MPLS_OPT_LSE_LABEL attribute in cls_flower TCA_FLOWER_KEY_MPLS_OPT_LSE_LABEL is a u32 attribute (MPLS label is 20 bits long). Fixes the following bug: $ tc filter add dev ethX ingress protocol mpls_uc \ flower mpls lse depth 2 label 256 \ action drop $ tc filter show dev ethX ingress filter protocol mpls_uc pref 49152 flower chain 0 filter protocol mpls_uc pref 49152 flower chain 0 handle 0x1 eth_type 8847 mpls lse depth 2 label 0 <-- invalid label 0, should be 256 ... Fixes: 61aec25a6db5 ("cls_flower: Support filtering on multiple MPLS Label Stack Entries") Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/sched/cls_flower.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index fed18fd2c50b..1319986693fc 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -2424,8 +2424,8 @@ static int fl_dump_key_mpls_opt_lse(struct sk_buff *skb, return err; } if (lse_mask->mpls_label) { - err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_OPT_LSE_LABEL, - lse_key->mpls_label); + err = nla_put_u32(skb, TCA_FLOWER_KEY_MPLS_OPT_LSE_LABEL, + lse_key->mpls_label); if (err) return err; } From 177745beebe39773004921d6bffd6c94c77dca32 Mon Sep 17 00:00:00 2001 From: Mickey Rachamim Date: Wed, 9 Dec 2020 15:47:39 +0200 Subject: [PATCH 268/296] MAINTAINERS: Add entry for Marvell Prestera Ethernet Switch driver Add maintainers info for new Marvell Prestera Ethernet switch driver. Signed-off-by: Mickey Rachamim Signed-off-by: David S. Miller --- MAINTAINERS | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index ebe4829cdd4d..0381eb273944 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10555,6 +10555,13 @@ S: Supported F: Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst F: drivers/net/ethernet/marvell/octeontx2/af/ +MARVELL PRESTERA ETHERNET SWITCH DRIVER +M: Vadym Kochan +M: Taras Chornyi +S: Supported +W: https://github.com/Marvell-switching/switchdev-prestera +F: drivers/net/ethernet/marvell/prestera/ + MARVELL SOC MMC/SD/SDIO CONTROLLER DRIVER M: Nicolas Pitre S: Odd Fixes From 6bea0225a4bf14a58af71cb9677a756921469e46 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 9 Dec 2020 20:16:10 +0900 Subject: [PATCH 269/296] zonefs: fix page reference and BIO leak In zonefs_file_dio_append(), the pages obtained using bio_iov_iter_get_pages() are not released on completion of the REQ_OP_APPEND BIO, nor when bio_iov_iter_get_pages() fails. Furthermore, a call to bio_put() is missing when bio_iov_iter_get_pages() fails. Fix these resource leaks by adding BIO resource release code (bio_put()i and bio_release_pages()) at the end of the function after the BIO execution and add a jump to this resource cleanup code in case of bio_iov_iter_get_pages() failure. While at it, also fix the call to task_io_account_write() to be passed the correct BIO size instead of bio_iov_iter_get_pages() return value. Reported-by: Christoph Hellwig Fixes: 02ef12a663c7 ("zonefs: use REQ_OP_ZONE_APPEND for sync DIO") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Reviewed-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig --- fs/zonefs/super.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index ff5930be096c..bec47f2d074b 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -691,21 +691,23 @@ static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from) bio->bi_opf |= REQ_FUA; ret = bio_iov_iter_get_pages(bio, from); - if (unlikely(ret)) { - bio_io_error(bio); - return ret; - } + if (unlikely(ret)) + goto out_release; + size = bio->bi_iter.bi_size; - task_io_account_write(ret); + task_io_account_write(size); if (iocb->ki_flags & IOCB_HIPRI) bio_set_polled(bio, iocb); ret = submit_bio_wait(bio); + zonefs_file_write_dio_end_io(iocb, size, ret, 0); + +out_release: + bio_release_pages(bio, false); bio_put(bio); - zonefs_file_write_dio_end_io(iocb, size, ret, 0); if (ret >= 0) { iocb->ki_pos += size; return size; From b02709587ea3d699a608568ee8157d8db4fd8cae Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 8 Dec 2020 19:01:51 +0100 Subject: [PATCH 270/296] bpf: Fix propagation of 32-bit signed bounds from 64-bit bounds. The 64-bit signed bounds should not affect 32-bit signed bounds unless the verifier knows that upper 32-bits are either all 1s or all 0s. For example the register with smin_value==1 doesn't mean that s32_min_value is also equal to 1, since smax_value could be larger than 32-bit subregister can hold. The verifier refines the smax/s32_max return value from certain helpers in do_refine_retval_range(). Teach the verifier to recognize that smin/s32_min value is also bounded. When both smin and smax bounds fit into 32-bit subregister the verifier can propagate those bounds. Fixes: 3f50f132d840 ("bpf: Verifier, do explicit ALU32 bounds tracking") Reported-by: Jean-Philippe Brucker Acked-by: John Fastabend Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 1388bf733071..53fe6ef6d931 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1298,9 +1298,7 @@ static void __reg_combine_32_into_64(struct bpf_reg_state *reg) static bool __reg64_bound_s32(s64 a) { - if (a > S32_MIN && a < S32_MAX) - return true; - return false; + return a > S32_MIN && a < S32_MAX; } static bool __reg64_bound_u32(u64 a) @@ -1314,10 +1312,10 @@ static void __reg_combine_64_into_32(struct bpf_reg_state *reg) { __mark_reg32_unbounded(reg); - if (__reg64_bound_s32(reg->smin_value)) + if (__reg64_bound_s32(reg->smin_value) && __reg64_bound_s32(reg->smax_value)) { reg->s32_min_value = (s32)reg->smin_value; - if (__reg64_bound_s32(reg->smax_value)) reg->s32_max_value = (s32)reg->smax_value; + } if (__reg64_bound_u32(reg->umin_value)) reg->u32_min_value = (u32)reg->umin_value; if (__reg64_bound_u32(reg->umax_value)) @@ -4895,6 +4893,8 @@ static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type, ret_reg->smax_value = meta->msize_max_value; ret_reg->s32_max_value = meta->msize_max_value; + ret_reg->smin_value = -MAX_ERRNO; + ret_reg->s32_min_value = -MAX_ERRNO; __reg_deduce_bounds(ret_reg); __reg_bound_offset(ret_reg); __update_reg_bounds(ret_reg); From 511a76bcb0ce242a19153658b25437906cc6070e Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Tue, 8 Dec 2020 19:01:52 +0100 Subject: [PATCH 271/296] selftests/bpf: Add test for signed 32-bit bound check bug After a 32-bit load followed by a branch, the verifier would reduce the maximum bound of the register to 0x7fffffff, allowing a user to bypass bound checks. Ensure such a program is rejected. In the second test, the 64-bit compare should not sufficient to determine whether the signed 32-bit lower bound is 0, so the verifier should reject the second branch. Signed-off-by: Jean-Philippe Brucker Acked-by: John Fastabend Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/verifier/bounds.c | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/tools/testing/selftests/bpf/verifier/bounds.c b/tools/testing/selftests/bpf/verifier/bounds.c index dac40de3f868..57ed67b86074 100644 --- a/tools/testing/selftests/bpf/verifier/bounds.c +++ b/tools/testing/selftests/bpf/verifier/bounds.c @@ -703,3 +703,44 @@ .fixup_map_hash_8b = { 3 }, .result = ACCEPT, }, +{ + "bounds checks after 32-bit truncation. test 1", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + /* This used to reduce the max bound to 0x7fffffff */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), + BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 0x7fffffff, 1), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .errstr_unpriv = "R0 leaks addr", + .result_unpriv = REJECT, + .result = ACCEPT, +}, +{ + "bounds checks after 32-bit truncation. test 2", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JSLT, BPF_REG_1, 1, 1), + BPF_JMP32_IMM(BPF_JSLT, BPF_REG_1, 0, 1), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .errstr_unpriv = "R0 leaks addr", + .result_unpriv = REJECT, + .result = ACCEPT, +}, From 77ce220c0549dcc3db8226c61c60e83fc59dfafc Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Tue, 8 Dec 2020 19:01:53 +0100 Subject: [PATCH 272/296] selftests/bpf: Fix array access with signed variable test The test fails because of a recent fix to the verifier, even though this program is valid. In details what happens is: 7: (61) r1 = *(u32 *)(r0 +0) Load a 32-bit value, with signed bounds [S32_MIN, S32_MAX]. The bounds of the 64-bit value are [0, U32_MAX]... 8: (65) if r1 s> 0xffffffff goto pc+1 ... therefore this is always true (the operand is sign-extended). 10: (b4) w2 = 11 11: (6d) if r2 s> r1 goto pc+1 When true, the 64-bit bounds become [0, 10]. The 32-bit bounds are still [S32_MIN, 10]. 13: (64) w1 <<= 2 Because this is a 32-bit operation, the verifier propagates the new 32-bit bounds to the 64-bit ones, and the knowledge gained from insn 11 is lost. 14: (0f) r0 += r1 15: (7a) *(u64 *)(r0 +0) = 4 Then the verifier considers r0 unbounded here, rejecting the test. To make the test work, change insn 8 to check the sign of the 32-bit value. Signed-off-by: Jean-Philippe Brucker Acked-by: John Fastabend Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/verifier/array_access.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/verifier/array_access.c b/tools/testing/selftests/bpf/verifier/array_access.c index 1c4b1939f5a8..bed53b561e04 100644 --- a/tools/testing/selftests/bpf/verifier/array_access.c +++ b/tools/testing/selftests/bpf/verifier/array_access.c @@ -68,7 +68,7 @@ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 0xffffffff, 1), + BPF_JMP32_IMM(BPF_JSGT, BPF_REG_1, 0xffffffff, 1), BPF_MOV32_IMM(BPF_REG_1, 0), BPF_MOV32_IMM(BPF_REG_2, MAX_ENTRIES), BPF_JMP_REG(BPF_JSGT, BPF_REG_2, BPF_REG_1, 1), From 3615bdf6d9b19db12b1589861609b4f1c6a8d303 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Tue, 8 Dec 2020 19:01:54 +0100 Subject: [PATCH 273/296] selftests/bpf: Fix "dubious pointer arithmetic" test The verifier trace changed following a bugfix. After checking the 64-bit sign, only the upper bit mask is known, not bit 31. Update the test accordingly. Signed-off-by: Jean-Philippe Brucker Acked-by: John Fastabend Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/align.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/align.c b/tools/testing/selftests/bpf/prog_tests/align.c index 52414058a627..5861446d0777 100644 --- a/tools/testing/selftests/bpf/prog_tests/align.c +++ b/tools/testing/selftests/bpf/prog_tests/align.c @@ -456,10 +456,10 @@ static struct bpf_align_test tests[] = { */ {7, "R5_w=inv(id=0,smin_value=-9223372036854775806,smax_value=9223372036854775806,umin_value=2,umax_value=18446744073709551614,var_off=(0x2; 0xfffffffffffffffc)"}, /* Checked s>=0 */ - {9, "R5=inv(id=0,umin_value=2,umax_value=9223372034707292158,var_off=(0x2; 0x7fffffff7ffffffc)"}, + {9, "R5=inv(id=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"}, /* packet pointer + nonnegative (4n+2) */ - {11, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372034707292158,var_off=(0x2; 0x7fffffff7ffffffc)"}, - {13, "R4_w=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372034707292158,var_off=(0x2; 0x7fffffff7ffffffc)"}, + {11, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"}, + {13, "R4_w=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"}, /* NET_IP_ALIGN + (4n+2) == (4n), alignment is fine. * We checked the bounds, but it might have been able * to overflow if the packet pointer started in the @@ -467,7 +467,7 @@ static struct bpf_align_test tests[] = { * So we did not get a 'range' on R6, and the access * attempt will fail. */ - {15, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372034707292158,var_off=(0x2; 0x7fffffff7ffffffc)"}, + {15, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"}, } }, { From 38bf8cd821be292e7d8e6f6283d67c5d9708f887 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 9 Dec 2020 12:21:13 +0100 Subject: [PATCH 274/296] selftests: fix poll error in udpgro.sh The test program udpgso_bench_rx always invokes the poll() syscall with a timeout of 10ms. If a larger timeout is specified via the command line, udpgso_bench_rx is supposed to do multiple poll() calls till the timeout is expired or an event is received. Currently the poll() loop errors out after the first invocation with no events, and may causes self-tests failure alike: failed GRO with custom segment size ./udpgso_bench_rx: poll: 0x0 expected 0x1 This change addresses the issue allowing the poll() loop to consume all the configured timeout. Fixes: ada641ff6ed3 ("selftests: fixes for UDP GRO") Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- tools/testing/selftests/net/udpgso_bench_rx.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/testing/selftests/net/udpgso_bench_rx.c b/tools/testing/selftests/net/udpgso_bench_rx.c index db3d4a8b5a4c..76a24052f4b4 100644 --- a/tools/testing/selftests/net/udpgso_bench_rx.c +++ b/tools/testing/selftests/net/udpgso_bench_rx.c @@ -113,6 +113,9 @@ static void do_poll(int fd, int timeout_ms) interrupted = true; break; } + + /* no events and more time to wait, do poll again */ + continue; } if (pfd.revents != POLLIN) error(1, errno, "poll: 0x%x expected 0x%x\n", From 1c87b85162975627d684a234d7347ef630f0e3aa Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 24 Nov 2020 19:15:18 -0500 Subject: [PATCH 275/296] NFS: Fix rpcrdma_inline_fixup() crash with new LISTXATTRS operation By switching to an XFS-backed export, I am able to reproduce the ibcomp worker crash on my client with xfstests generic/013. For the failing LISTXATTRS operation, xdr_inline_pages() is called with page_len=12 and buflen=128. - When ->send_request() is called, rpcrdma_marshal_req() does not set up a Reply chunk because buflen is smaller than the inline threshold. Thus rpcrdma_convert_iovs() does not get invoked at all and the transport's XDRBUF_SPARSE_PAGES logic is not invoked on the receive buffer. - During reply processing, rpcrdma_inline_fixup() tries to copy received data into rq_rcv_buf->pages because page_len is positive. But there are no receive pages because rpcrdma_marshal_req() never allocated them. The result is that the ibcomp worker faults and dies. Sometimes that causes a visible crash, and sometimes it results in a transport hang without other symptoms. RPC/RDMA's XDRBUF_SPARSE_PAGES support is not entirely correct, and should eventually be fixed or replaced. However, my preference is that upper-layer operations should explicitly allocate their receive buffers (using GFP_KERNEL) when possible, rather than relying on XDRBUF_SPARSE_PAGES. Reported-by: Olga kornievskaia Suggested-by: Olga kornievskaia Fixes: c10a75145feb ("NFSv4.2: add the extended attribute proc functions.") Signed-off-by: Chuck Lever Reviewed-by: Olga kornievskaia Reviewed-by: Frank van der Linden Tested-by: Olga kornievskaia Signed-off-by: Anna Schumaker --- fs/nfs/nfs42proc.c | 21 +++++++++++++-------- fs/nfs/nfs42xdr.c | 1 - 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 2b2211d1234e..4fc61e3d098d 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -1241,12 +1241,13 @@ static ssize_t _nfs42_proc_listxattrs(struct inode *inode, void *buf, .rpc_resp = &res, }; u32 xdrlen; - int ret, np; + int ret, np, i; + ret = -ENOMEM; res.scratch = alloc_page(GFP_KERNEL); if (!res.scratch) - return -ENOMEM; + goto out; xdrlen = nfs42_listxattr_xdrsize(buflen); if (xdrlen > server->lxasize) @@ -1254,9 +1255,12 @@ static ssize_t _nfs42_proc_listxattrs(struct inode *inode, void *buf, np = xdrlen / PAGE_SIZE + 1; pages = kcalloc(np, sizeof(struct page *), GFP_KERNEL); - if (pages == NULL) { - __free_page(res.scratch); - return -ENOMEM; + if (!pages) + goto out_free_scratch; + for (i = 0; i < np; i++) { + pages[i] = alloc_page(GFP_KERNEL); + if (!pages[i]) + goto out_free_pages; } arg.xattr_pages = pages; @@ -1271,14 +1275,15 @@ static ssize_t _nfs42_proc_listxattrs(struct inode *inode, void *buf, *eofp = res.eof; } +out_free_pages: while (--np >= 0) { if (pages[np]) __free_page(pages[np]); } - - __free_page(res.scratch); kfree(pages); - +out_free_scratch: + __free_page(res.scratch); +out: return ret; } diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index 6e060a88f98c..8432bd6b95f0 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -1528,7 +1528,6 @@ static void nfs4_xdr_enc_listxattrs(struct rpc_rqst *req, rpc_prepare_reply_pages(req, args->xattr_pages, 0, args->count, hdr.replen); - req->rq_rcv_buf.flags |= XDRBUF_SPARSE_PAGES; encode_nops(&hdr); } From fe8eb820e388ca81643443b6208001ab516d1cf6 Mon Sep 17 00:00:00 2001 From: Dai Ngo Date: Mon, 23 Nov 2020 22:15:17 -0500 Subject: [PATCH 276/296] NFSv4.2: Fix 5 seconds delay when doing inter server copy Since commit b4868b44c5628 ("NFSv4: Wait for stateid updates after CLOSE/OPEN_DOWNGRADE"), every inter server copy operation suffers 5 seconds delay regardless of the size of the copy. The delay is from nfs_set_open_stateid_locked when the check by nfs_stateid_is_sequential fails because the seqid in both nfs4_state and nfs4_stateid are 0. Fix __nfs42_ssc_open to delay setting of NFS_OPEN_STATE in nfs4_state, until after the call to update_open_stateid, to indicate this is the 1st open. This fix is part of a 2 patches, the other patch is the fix in the source server to return the stateid for COPY_NOTIFY request with seqid 1 instead of 0. Fixes: ce0887ac96d3 ("NFSD add nfs4 inter ssc to nfsd4_copy") Signed-off-by: Dai Ngo Signed-off-by: Anna Schumaker --- fs/nfs/nfs4file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 9d354de613da..57b3821d975a 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -377,10 +377,10 @@ static struct file *__nfs42_ssc_open(struct vfsmount *ss_mnt, goto out_stateowner; set_bit(NFS_SRV_SSC_COPY_STATE, &ctx->state->flags); - set_bit(NFS_OPEN_STATE, &ctx->state->flags); memcpy(&ctx->state->open_stateid.other, &stateid->other, NFS4_STATEID_OTHER_SIZE); update_open_stateid(ctx->state, stateid, NULL, filep->f_mode); + set_bit(NFS_OPEN_STATE, &ctx->state->flags); nfs_file_set_open_context(filep, ctx); put_nfs_open_context(ctx); From 21e31401fc4595aeefa224cd36ab8175ec867b87 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Thu, 3 Dec 2020 15:18:39 -0500 Subject: [PATCH 277/296] NFS: Disable READ_PLUS by default We've been seeing failures with xfstests generic/091 and generic/263 when using READ_PLUS. I've made some progress on these issues, and the tests fail later on but still don't pass. Let's disable READ_PLUS by default until we can work out what is going on. Signed-off-by: Anna Schumaker --- fs/nfs/Kconfig | 9 +++++++++ fs/nfs/nfs4proc.c | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 88e1763e02f3..e2a488d403a6 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -205,3 +205,12 @@ config NFS_DISABLE_UDP_SUPPORT Choose Y here to disable the use of NFS over UDP. NFS over UDP on modern networks (1Gb+) can lead to data corruption caused by fragmentation during high loads. + +config NFS_V4_2_READ_PLUS + bool "NFS: Enable support for the NFSv4.2 READ_PLUS operation" + depends on NFS_V4_2 + default n + help + This is intended for developers only. The READ_PLUS operation has + been shown to have issues under specific conditions and should not + be used in production. diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 9e0ca9b2b210..e89468678ae1 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5309,7 +5309,7 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr) nfs4_read_done_cb(task, hdr); } -#ifdef CONFIG_NFS_V4_2 +#if defined CONFIG_NFS_V4_2 && defined CONFIG_NFS_V4_2_READ_PLUS static void nfs42_read_plus_support(struct nfs_server *server, struct rpc_message *msg) { if (server->caps & NFS_CAP_READ_PLUS) From b1f195fc49812359296a901e26cc7c0b761d8a70 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 10 Dec 2020 23:07:41 +0000 Subject: [PATCH 278/296] drm/i915/display: Go softly softly on initial modeset failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reduce the module/device probe error into a mere debug to hide issues where the initial modeset is failing (after lies told by hw probe) and the system hangs with a livelock in cleaning up the failed commit. Reported-by: H.J. Lu Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=210619 Fixes: b3bf99daaee9 ("drm/i915/display: Defer initial modeset until after GGTT is initialised") Fixes: ccc9e67ab26f ("drm/i915/display: Defer initial modeset until after GGTT is initialised") Signed-off-by: Chris Wilson Cc: "Ville Syrjälä" Cc: Rodrigo Vivi Cc: H.J. Lu Cc: Dave Airlie Reviewed-by: Rodrigo Vivi Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20201210230741.17140-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/display/intel_display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 3bfe6ed67da1..aabf09f89cad 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -18040,7 +18040,7 @@ int intel_modeset_init(struct drm_i915_private *i915) */ ret = intel_initial_commit(&i915->drm); if (ret) - return ret; + drm_dbg_kms(&i915->drm, "Initial modeset failed, %d\n", ret); intel_overlay_setup(i915); From 249a9599c9123a4af655dd5800c2506602ffa055 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 3 Dec 2020 20:03:32 +0100 Subject: [PATCH 279/296] mtd: rawnand: ams-delta: Do not force a particular software ECC engine Originally, commit d7157ff49a5b ("mtd: rawnand: Use the ECC framework user input parsing bits") kind of broke the logic around the initialization of several ECC engines. Unfortunately, the fix (which indeed moved the ECC initialization to the right place) did not take into account the fact that a different ECC algorithm could have been used thanks to a DT property, considering the "Hamming" algorithm entry a configuration while it was only a default. Add the necessary logic to be sure Hamming keeps being only a default. Fixes: 59d93473323a ("mtd: rawnand: ams-delta: Move the ECC initialization to ->attach_chip()") Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20201203190340.15522-2-miquel.raynal@bootlin.com --- drivers/mtd/nand/raw/ams-delta.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/nand/raw/ams-delta.c b/drivers/mtd/nand/raw/ams-delta.c index 0c352b39ad4b..ff1697f899ba 100644 --- a/drivers/mtd/nand/raw/ams-delta.c +++ b/drivers/mtd/nand/raw/ams-delta.c @@ -218,7 +218,9 @@ static int gpio_nand_setup_interface(struct nand_chip *this, int csline, static int gpio_nand_attach_chip(struct nand_chip *chip) { chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT; - chip->ecc.algo = NAND_ECC_ALGO_HAMMING; + + if (chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN) + chip->ecc.algo = NAND_ECC_ALGO_HAMMING; return 0; } From 52277269c4060ced7e7d1a83c77377590d106f44 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 3 Dec 2020 20:03:33 +0100 Subject: [PATCH 280/296] mtd: rawnand: au1550: Do not force a particular software ECC engine Originally, commit d7157ff49a5b ("mtd: rawnand: Use the ECC framework user input parsing bits") kind of broke the logic around the initialization of several ECC engines. Unfortunately, the fix (which indeed moved the ECC initialization to the right place) did not take into account the fact that a different ECC algorithm could have been used thanks to a DT property, considering the "Hamming" algorithm entry a configuration while it was only a default. Add the necessary logic to be sure Hamming keeps being only a default. Fixes: dbffc8ccdf3a ("mtd: rawnand: au1550: Move the ECC initialization to ->attach_chip()") Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20201203190340.15522-3-miquel.raynal@bootlin.com --- drivers/mtd/nand/raw/au1550nd.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/nand/raw/au1550nd.c b/drivers/mtd/nand/raw/au1550nd.c index 7892022bd6dd..7b6b354f2d39 100644 --- a/drivers/mtd/nand/raw/au1550nd.c +++ b/drivers/mtd/nand/raw/au1550nd.c @@ -239,7 +239,9 @@ static int au1550nd_exec_op(struct nand_chip *this, static int au1550nd_attach_chip(struct nand_chip *chip) { chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT; - chip->ecc.algo = NAND_ECC_ALGO_HAMMING; + + if (chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN) + chip->ecc.algo = NAND_ECC_ALGO_HAMMING; return 0; } From bd47fa451315684424d2b39aa95f2ed235fee37a Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 3 Dec 2020 20:03:34 +0100 Subject: [PATCH 281/296] mtd: rawnand: gpio: Do not force a particular software ECC engine Originally, commit d7157ff49a5b ("mtd: rawnand: Use the ECC framework user input parsing bits") kind of broke the logic around the initialization of several ECC engines. Unfortunately, the fix (which indeed moved the ECC initialization to the right place) did not take into account the fact that a different ECC algorithm could have been used thanks to a DT property, considering the "Hamming" algorithm entry a configuration while it was only a default. Add the necessary logic to be sure Hamming keeps being only a default. Fixes: f6341f6448e0 ("mtd: rawnand: gpio: Move the ECC initialization to ->attach_chip()") Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20201203190340.15522-4-miquel.raynal@bootlin.com --- drivers/mtd/nand/raw/gpio.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/nand/raw/gpio.c b/drivers/mtd/nand/raw/gpio.c index eb03b8cea1cb..fb7a086de35e 100644 --- a/drivers/mtd/nand/raw/gpio.c +++ b/drivers/mtd/nand/raw/gpio.c @@ -164,7 +164,9 @@ static int gpio_nand_exec_op(struct nand_chip *chip, static int gpio_nand_attach_chip(struct nand_chip *chip) { chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT; - chip->ecc.algo = NAND_ECC_ALGO_HAMMING; + + if (chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN) + chip->ecc.algo = NAND_ECC_ALGO_HAMMING; return 0; } From f49bde9fe25cf4cd02440a7ec68f278957ddea37 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 3 Dec 2020 20:03:35 +0100 Subject: [PATCH 282/296] mtd: rawnand: mpc5121: Do not force a particular software ECC engine Originally, commit d7157ff49a5b ("mtd: rawnand: Use the ECC framework user input parsing bits") kind of broke the logic around the initialization of several ECC engines. Unfortunately, the fix (which indeed moved the ECC initialization to the right place) did not take into account the fact that a different ECC algorithm could have been used thanks to a DT property, considering the "Hamming" algorithm entry a configuration while it was only a default. Add the necessary logic to be sure Hamming keeps being only a default. Fixes: 6dd09f775b72 ("mtd: rawnand: mpc5121: Move the ECC initialization to ->attach_chip()") Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20201203190340.15522-5-miquel.raynal@bootlin.com --- drivers/mtd/nand/raw/mpc5121_nfc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/nand/raw/mpc5121_nfc.c b/drivers/mtd/nand/raw/mpc5121_nfc.c index fb4c0b11689f..bcd4a556c959 100644 --- a/drivers/mtd/nand/raw/mpc5121_nfc.c +++ b/drivers/mtd/nand/raw/mpc5121_nfc.c @@ -606,7 +606,9 @@ static void mpc5121_nfc_free(struct device *dev, struct mtd_info *mtd) static int mpc5121_nfc_attach_chip(struct nand_chip *chip) { chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT; - chip->ecc.algo = NAND_ECC_ALGO_HAMMING; + + if (chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN) + chip->ecc.algo = NAND_ECC_ALGO_HAMMING; return 0; } From f87b720a21641d5c5ccd4f5ccc06d96251b7f282 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 3 Dec 2020 20:03:36 +0100 Subject: [PATCH 283/296] mtd: rawnand: orion: Do not force a particular software ECC engine Originally, commit d7157ff49a5b ("mtd: rawnand: Use the ECC framework user input parsing bits") kind of broke the logic around the initialization of several ECC engines. Unfortunately, the fix (which indeed moved the ECC initialization to the right place) did not take into account the fact that a different ECC algorithm could have been used thanks to a DT property, considering the "Hamming" algorithm entry a configuration while it was only a default. Add the necessary logic to be sure Hamming keeps being only a default. Reported-by: Chris Packham Fixes: 553508cec2e8 ("mtd: rawnand: orion: Move the ECC initialization to ->attach_chip()") Signed-off-by: Miquel Raynal Tested-by: Chris Packham Link: https://lore.kernel.org/linux-mtd/20201203190340.15522-6-miquel.raynal@bootlin.com --- drivers/mtd/nand/raw/orion_nand.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/nand/raw/orion_nand.c b/drivers/mtd/nand/raw/orion_nand.c index e3bb65fd3ab2..66211c9311d2 100644 --- a/drivers/mtd/nand/raw/orion_nand.c +++ b/drivers/mtd/nand/raw/orion_nand.c @@ -86,7 +86,9 @@ static void orion_nand_read_buf(struct nand_chip *chip, uint8_t *buf, int len) static int orion_nand_attach_chip(struct nand_chip *chip) { chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT; - chip->ecc.algo = NAND_ECC_ALGO_HAMMING; + + if (chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN) + chip->ecc.algo = NAND_ECC_ALGO_HAMMING; return 0; } From 9506ab943db438d316386d45eb8a8b60bfeee27c Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 3 Dec 2020 20:03:37 +0100 Subject: [PATCH 284/296] mtd: rawnand: pasemi: Do not force a particular software ECC engine Originally, commit d7157ff49a5b ("mtd: rawnand: Use the ECC framework user input parsing bits") kind of broke the logic around the initialization of several ECC engines. Unfortunately, the fix (which indeed moved the ECC initialization to the right place) did not take into account the fact that a different ECC algorithm could have been used thanks to a DT property, considering the "Hamming" algorithm entry a configuration while it was only a default. Add the necessary logic to be sure Hamming keeps being only a default. Fixes: 8fc6f1f042b2 ("mtd: rawnand: pasemi: Move the ECC initialization to ->attach_chip()") Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20201203190340.15522-7-miquel.raynal@bootlin.com --- drivers/mtd/nand/raw/pasemi_nand.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/nand/raw/pasemi_nand.c b/drivers/mtd/nand/raw/pasemi_nand.c index 4dfff34800f4..68c08772d7c2 100644 --- a/drivers/mtd/nand/raw/pasemi_nand.c +++ b/drivers/mtd/nand/raw/pasemi_nand.c @@ -77,7 +77,9 @@ static int pasemi_device_ready(struct nand_chip *chip) static int pasemi_attach_chip(struct nand_chip *chip) { chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT; - chip->ecc.algo = NAND_ECC_ALGO_HAMMING; + + if (chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN) + chip->ecc.algo = NAND_ECC_ALGO_HAMMING; return 0; } From 148b4f16159f49c6d05da8189e0941880ad10a46 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 3 Dec 2020 20:03:38 +0100 Subject: [PATCH 285/296] mtd: rawnand: plat_nand: Do not force a particular software ECC engine Originally, commit d7157ff49a5b ("mtd: rawnand: Use the ECC framework user input parsing bits") kind of broke the logic around the initialization of several ECC engines. Unfortunately, the fix (which indeed moved the ECC initialization to the right place) did not take into account the fact that a different ECC algorithm could have been used thanks to a DT property, considering the "Hamming" algorithm entry a configuration while it was only a default. Add the necessary logic to be sure Hamming keeps being only a default. Fixes: 612e048e6aab ("mtd: rawnand: plat_nand: Move the ECC initialization to ->attach_chip()") Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20201203190340.15522-8-miquel.raynal@bootlin.com --- drivers/mtd/nand/raw/plat_nand.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/nand/raw/plat_nand.c b/drivers/mtd/nand/raw/plat_nand.c index 93d9f1694dc1..7711e1020c21 100644 --- a/drivers/mtd/nand/raw/plat_nand.c +++ b/drivers/mtd/nand/raw/plat_nand.c @@ -22,7 +22,9 @@ struct plat_nand_data { static int plat_nand_attach_chip(struct nand_chip *chip) { chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT; - chip->ecc.algo = NAND_ECC_ALGO_HAMMING; + + if (chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN) + chip->ecc.algo = NAND_ECC_ALGO_HAMMING; return 0; } From cf75f00fc8026040f6ff8f054cfec99e820b62e7 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 3 Dec 2020 20:03:39 +0100 Subject: [PATCH 286/296] mtd: rawnand: socrates: Do not force a particular software ECC engine Originally, commit d7157ff49a5b ("mtd: rawnand: Use the ECC framework user input parsing bits") kind of broke the logic around the initialization of several ECC engines. Unfortunately, the fix (which indeed moved the ECC initialization to the right place) did not take into account the fact that a different ECC algorithm could have been used thanks to a DT property, considering the "Hamming" algorithm entry a configuration while it was only a default. Add the necessary logic to be sure Hamming keeps being only a default. Fixes: b36bf0a0fe5d ("mtd: rawnand: socrates: Move the ECC initialization to ->attach_chip()") Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20201203190340.15522-9-miquel.raynal@bootlin.com --- drivers/mtd/nand/raw/socrates_nand.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/nand/raw/socrates_nand.c b/drivers/mtd/nand/raw/socrates_nand.c index 107208311987..70f8305c9b6e 100644 --- a/drivers/mtd/nand/raw/socrates_nand.c +++ b/drivers/mtd/nand/raw/socrates_nand.c @@ -120,7 +120,9 @@ static int socrates_nand_device_ready(struct nand_chip *nand_chip) static int socrates_attach_chip(struct nand_chip *chip) { chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT; - chip->ecc.algo = NAND_ECC_ALGO_HAMMING; + + if (chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN) + chip->ecc.algo = NAND_ECC_ALGO_HAMMING; return 0; } From 33d974e76e21e9da8a36b14d2dce6394c36c3e30 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 3 Dec 2020 20:03:40 +0100 Subject: [PATCH 287/296] mtd: rawnand: xway: Do not force a particular software ECC engine Originally, commit d7157ff49a5b ("mtd: rawnand: Use the ECC framework user input parsing bits") kind of broke the logic around the initialization of several ECC engines. Unfortunately, the fix (which indeed moved the ECC initialization to the right place) did not take into account the fact that a different ECC algorithm could have been used thanks to a DT property, considering the "Hamming" algorithm entry a configuration while it was only a default. Add the necessary logic to be sure Hamming keeps being only a default. Fixes: d525914b5bd8 ("mtd: rawnand: xway: Move the ECC initialization to ->attach_chip()") Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20201203190340.15522-10-miquel.raynal@bootlin.com --- drivers/mtd/nand/raw/xway_nand.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/nand/raw/xway_nand.c b/drivers/mtd/nand/raw/xway_nand.c index efc5bf5434e0..26751976e502 100644 --- a/drivers/mtd/nand/raw/xway_nand.c +++ b/drivers/mtd/nand/raw/xway_nand.c @@ -149,7 +149,9 @@ static void xway_write_buf(struct nand_chip *chip, const u_char *buf, int len) static int xway_attach_chip(struct nand_chip *chip) { chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT; - chip->ecc.algo = NAND_ECC_ALGO_HAMMING; + + if (chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN) + chip->ecc.algo = NAND_ECC_ALGO_HAMMING; return 0; } From 16c0cc0ce3059e315a0aab6538061d95a6612589 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 11 Dec 2020 13:36:27 -0800 Subject: [PATCH 288/296] revert "mm/filemap: add static for function __add_to_page_cache_locked" Revert commit 3351b16af494 ("mm/filemap: add static for function __add_to_page_cache_locked") due to incompatibility with ALLOW_ERROR_INJECTION which result in build errors. Link: https://lkml.kernel.org/r/CAADnVQJ6tmzBXvtroBuEH6QA0H+q7yaSKxrVvVxhqr3KBZdEXg@mail.gmail.com Tested-by: Justin Forbes Tested-by: Greg Thelen Acked-by: Alexei Starovoitov Cc: Michal Kubecek Cc: Alex Shi Cc: Souptick Joarder Cc: Daniel Borkmann Cc: Josef Bacik Cc: Tony Luck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/filemap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/filemap.c b/mm/filemap.c index 331f4261d723..0b2067b3c328 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -827,7 +827,7 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask) } EXPORT_SYMBOL_GPL(replace_page_cache_page); -static noinline int __add_to_page_cache_locked(struct page *page, +noinline int __add_to_page_cache_locked(struct page *page, struct address_space *mapping, pgoff_t offset, gfp_t gfp, void **shadowp) From 40d6366e9d86d9a67b5642040e76082fdb5bdcf9 Mon Sep 17 00:00:00 2001 From: Miles Chen Date: Fri, 11 Dec 2020 13:36:31 -0800 Subject: [PATCH 289/296] proc: use untagged_addr() for pagemap_read addresses When we try to visit the pagemap of a tagged userspace pointer, we find that the start_vaddr is not correct because of the tag. To fix it, we should untag the userspace pointers in pagemap_read(). I tested with 5.10-rc4 and the issue remains. Explanation from Catalin in [1]: "Arguably, that's a user-space bug since tagged file offsets were never supported. In this case it's not even a tag at bit 56 as per the arm64 tagged address ABI but rather down to bit 47. You could say that the problem is caused by the C library (malloc()) or whoever created the tagged vaddr and passed it to this function. It's not a kernel regression as we've never supported it. Now, pagemap is a special case where the offset is usually not generated as a classic file offset but rather derived by shifting a user virtual address. I guess we can make a concession for pagemap (only) and allow such offset with the tag at bit (56 - PAGE_SHIFT + 3)" My test code is based on [2]: A userspace pointer which has been tagged by 0xb4: 0xb400007662f541c8 userspace program: uint64 OsLayer::VirtualToPhysical(void *vaddr) { uint64 frame, paddr, pfnmask, pagemask; int pagesize = sysconf(_SC_PAGESIZE); off64_t off = ((uintptr_t)vaddr) / pagesize * 8; // off = 0xb400007662f541c8 / pagesize * 8 = 0x5a00003b317aa0 int fd = open(kPagemapPath, O_RDONLY); ... if (lseek64(fd, off, SEEK_SET) != off || read(fd, &frame, 8) != 8) { int err = errno; string errtxt = ErrorString(err); if (fd >= 0) close(fd); return 0; } ... } kernel fs/proc/task_mmu.c: static ssize_t pagemap_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { ... src = *ppos; svpfn = src / PM_ENTRY_BYTES; // svpfn == 0xb400007662f54 start_vaddr = svpfn << PAGE_SHIFT; // start_vaddr == 0xb400007662f54000 end_vaddr = mm->task_size; /* watch out for wraparound */ // svpfn == 0xb400007662f54 // (mm->task_size >> PAGE) == 0x8000000 if (svpfn > mm->task_size >> PAGE_SHIFT) // the condition is true because of the tag 0xb4 start_vaddr = end_vaddr; ret = 0; while (count && (start_vaddr < end_vaddr)) { // we cannot visit correct entry because start_vaddr is set to end_vaddr int len; unsigned long end; ... } ... } [1] https://lore.kernel.org/patchwork/patch/1343258/ [2] https://github.com/stressapptest/stressapptest/blob/master/src/os.cc#L158 Link: https://lkml.kernel.org/r/20201204024347.8295-1-miles.chen@mediatek.com Signed-off-by: Miles Chen Reviewed-by: Vincenzo Frascino Reviewed-by: Catalin Marinas Cc: Alexey Dobriyan Cc: Andrey Konovalov Cc: Alexander Potapenko Cc: Vincenzo Frascino Cc: Andrey Ryabinin Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Marco Elver Cc: Will Deacon Cc: Eric W. Biederman Cc: Song Bao Hua (Barry Song) Cc: [5.4-] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 217aa2705d5d..ee5a235b3056 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1599,11 +1599,15 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, src = *ppos; svpfn = src / PM_ENTRY_BYTES; - start_vaddr = svpfn << PAGE_SHIFT; end_vaddr = mm->task_size; /* watch out for wraparound */ - if (svpfn > mm->task_size >> PAGE_SHIFT) + start_vaddr = end_vaddr; + if (svpfn <= (ULONG_MAX >> PAGE_SHIFT)) + start_vaddr = untagged_addr(svpfn << PAGE_SHIFT); + + /* Ensure the address is inside the task */ + if (start_vaddr > mm->task_size) start_vaddr = end_vaddr; /* From 84edc2eff82730d45ab513ecec49cb63beb973c9 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 11 Dec 2020 13:36:35 -0800 Subject: [PATCH 290/296] selftest/fpu: avoid clang warning With extra warnings enabled, clang complains about the redundant -mhard-float argument: clang: error: argument unused during compilation: '-mhard-float' [-Werror,-Wunused-command-line-argument] Move this into the gcc-only part of the Makefile. Link: https://lkml.kernel.org/r/20201203223652.1320700-1-arnd@kernel.org Fixes: 4185b3b92792 ("selftests/fpu: Add an FPU selftest") Signed-off-by: Arnd Bergmann Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Petteri Aimonen Cc: Borislav Petkov Cc: Arnd Bergmann Cc: Andy Shevchenko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/Makefile b/lib/Makefile index ce45af50983a..d415fc7067c5 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -107,7 +107,7 @@ obj-$(CONFIG_TEST_FREE_PAGES) += test_free_pages.o # off the generation of FPU/SSE* instructions for kernel proper but FPU_FLAGS # get appended last to CFLAGS and thus override those previous compiler options. # -FPU_CFLAGS := -mhard-float -msse -msse2 +FPU_CFLAGS := -msse -msse2 ifdef CONFIG_CC_IS_GCC # Stack alignment mismatch, proceed with caution. # GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 @@ -120,6 +120,7 @@ ifdef CONFIG_CC_IS_GCC # -mpreferred-stack-boundary=3 is not between 4 and 12 # # can be triggered. Otherwise gcc doesn't complain. +FPU_CFLAGS += -mhard-float FPU_CFLAGS += $(call cc-option,-msse -mpreferred-stack-boundary=3,-mpreferred-stack-boundary=4) endif From 14dc3983b5dff513a90bd5a8cc90acaf7867c3d0 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 11 Dec 2020 13:36:38 -0800 Subject: [PATCH 291/296] kbuild: avoid static_assert for genksyms genksyms does not know or care about the _Static_assert() built-in, and sometimes falls back to ignoring the later symbols, which causes undefined behavior such as WARNING: modpost: EXPORT symbol "ethtool_set_ethtool_phy_ops" [vmlinux] version generation failed, symbol will not be versioned. ld: net/ethtool/common.o: relocation R_AARCH64_ABS32 against `__crc_ethtool_set_ethtool_phy_ops' can not be used when making a shared object net/ethtool/common.o:(_ftrace_annotated_branch+0x0): dangerous relocation: unsupported relocation Redefine static_assert for genksyms to avoid that. Link: https://lkml.kernel.org/r/20201203230955.1482058-1-arnd@kernel.org Signed-off-by: Arnd Bergmann Suggested-by: Ard Biesheuvel Cc: Masahiro Yamada Cc: Michal Marek Cc: Kees Cook Cc: Rikard Falkeborn Cc: Marco Elver Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/build_bug.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/linux/build_bug.h b/include/linux/build_bug.h index e3a0be2c90ad..7bb66e15b481 100644 --- a/include/linux/build_bug.h +++ b/include/linux/build_bug.h @@ -77,4 +77,9 @@ #define static_assert(expr, ...) __static_assert(expr, ##__VA_ARGS__, #expr) #define __static_assert(expr, msg, ...) _Static_assert(expr, msg) +#ifdef __GENKSYMS__ +/* genksyms gets confused by _Static_assert */ +#define _Static_assert(expr, ...) +#endif + #endif /* _LINUX_BUILD_BUG_H */ From 55d5b7dd6451b58489ce384282ca5a4a289eb8d5 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 11 Dec 2020 13:36:42 -0800 Subject: [PATCH 292/296] initramfs: fix clang build failure There is only one function in init/initramfs.c that is in the .text section, and it is marked __weak. When building with clang-12 and the integrated assembler, this leads to a bug with recordmcount: ./scripts/recordmcount "init/initramfs.o" Cannot find symbol for section 2: .text. init/initramfs.o: failed I'm not quite sure what exactly goes wrong, but I notice that this function is only ever called from an __init function, and normally inlined. Marking it __init as well is clearly correct and it leads to recordmcount no longer complaining. Link: https://lkml.kernel.org/r/20201204165742.3815221-1-arnd@kernel.org Signed-off-by: Arnd Bergmann Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Barret Rhoden Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- init/initramfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/init/initramfs.c b/init/initramfs.c index 1f97c0328a7a..55b74d7e5260 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -535,7 +535,7 @@ extern unsigned long __initramfs_size; #include #include -void __weak free_initrd_mem(unsigned long start, unsigned long end) +void __weak __init free_initrd_mem(unsigned long start, unsigned long end) { #ifdef CONFIG_ARCH_KEEP_MEMBLOCK unsigned long aligned_start = ALIGN_DOWN(start, PAGE_SIZE); From 6e7b64b9dd6d96537d816ea07ec26b7dedd397b9 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 11 Dec 2020 13:36:46 -0800 Subject: [PATCH 293/296] elfcore: fix building with clang kernel/elfcore.c only contains weak symbols, which triggers a bug with clang in combination with recordmcount: Cannot find symbol for section 2: .text. kernel/elfcore.o: failed Move the empty stubs into linux/elfcore.h as inline functions. As only two architectures use these, just use the architecture specific Kconfig symbols to key off the declaration. Link: https://lkml.kernel.org/r/20201204165742.3815221-2-arnd@kernel.org Signed-off-by: Arnd Bergmann Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Barret Rhoden Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/elfcore.h | 22 ++++++++++++++++++++++ kernel/Makefile | 1 - kernel/elfcore.c | 26 -------------------------- 3 files changed, 22 insertions(+), 27 deletions(-) delete mode 100644 kernel/elfcore.c diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h index 46c3d691f677..de51c1bef27d 100644 --- a/include/linux/elfcore.h +++ b/include/linux/elfcore.h @@ -104,6 +104,7 @@ static inline int elf_core_copy_task_fpregs(struct task_struct *t, struct pt_reg #endif } +#if defined(CONFIG_UM) || defined(CONFIG_IA64) /* * These functions parameterize elf_core_dump in fs/binfmt_elf.c to write out * extra segments containing the gate DSO contents. Dumping its @@ -118,5 +119,26 @@ elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset); extern int elf_core_write_extra_data(struct coredump_params *cprm); extern size_t elf_core_extra_data_size(void); +#else +static inline Elf_Half elf_core_extra_phdrs(void) +{ + return 0; +} + +static inline int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset) +{ + return 1; +} + +static inline int elf_core_write_extra_data(struct coredump_params *cprm) +{ + return 1; +} + +static inline size_t elf_core_extra_data_size(void) +{ + return 0; +} +#endif #endif /* _LINUX_ELFCORE_H */ diff --git a/kernel/Makefile b/kernel/Makefile index af601b9bda0e..6c9f19911be0 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -97,7 +97,6 @@ obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o obj-$(CONFIG_TRACEPOINTS) += tracepoint.o obj-$(CONFIG_LATENCYTOP) += latencytop.o -obj-$(CONFIG_ELFCORE) += elfcore.o obj-$(CONFIG_FUNCTION_TRACER) += trace/ obj-$(CONFIG_TRACING) += trace/ obj-$(CONFIG_TRACE_CLOCK) += trace/ diff --git a/kernel/elfcore.c b/kernel/elfcore.c deleted file mode 100644 index 57fb4dcff434..000000000000 --- a/kernel/elfcore.c +++ /dev/null @@ -1,26 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include -#include -#include -#include - -Elf_Half __weak elf_core_extra_phdrs(void) -{ - return 0; -} - -int __weak elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset) -{ - return 1; -} - -int __weak elf_core_write_extra_data(struct coredump_params *cprm) -{ - return 1; -} - -size_t __weak elf_core_extra_data_size(void) -{ - return 0; -} From 6c82d45c7f0348b44e00bd7dcccfc47dec7577d1 Mon Sep 17 00:00:00 2001 From: Kuan-Ying Lee Date: Fri, 11 Dec 2020 13:36:49 -0800 Subject: [PATCH 294/296] kasan: fix object remaining in offline per-cpu quarantine We hit this issue in our internal test. When enabling generic kasan, a kfree()'d object is put into per-cpu quarantine first. If the cpu goes offline, object still remains in the per-cpu quarantine. If we call kmem_cache_destroy() now, slub will report "Objects remaining" error. ============================================================================= BUG test_module_slab (Not tainted): Objects remaining in test_module_slab on __kmem_cache_shutdown() ----------------------------------------------------------------------------- Disabling lock debugging due to kernel taint INFO: Slab 0x(____ptrval____) objects=34 used=1 fp=0x(____ptrval____) flags=0x2ffff00000010200 CPU: 3 PID: 176 Comm: cat Tainted: G B 5.10.0-rc1-00007-g4525c8781ec0-dirty #10 Hardware name: linux,dummy-virt (DT) Call trace: dump_backtrace+0x0/0x2b0 show_stack+0x18/0x68 dump_stack+0xfc/0x168 slab_err+0xac/0xd4 __kmem_cache_shutdown+0x1e4/0x3c8 kmem_cache_destroy+0x68/0x130 test_version_show+0x84/0xf0 module_attr_show+0x40/0x60 sysfs_kf_seq_show+0x128/0x1c0 kernfs_seq_show+0xa0/0xb8 seq_read+0x1f0/0x7e8 kernfs_fop_read+0x70/0x338 vfs_read+0xe4/0x250 ksys_read+0xc8/0x180 __arm64_sys_read+0x44/0x58 el0_svc_common.constprop.0+0xac/0x228 do_el0_svc+0x38/0xa0 el0_sync_handler+0x170/0x178 el0_sync+0x174/0x180 INFO: Object 0x(____ptrval____) @offset=15848 INFO: Allocated in test_version_show+0x98/0xf0 age=8188 cpu=6 pid=172 stack_trace_save+0x9c/0xd0 set_track+0x64/0xf0 alloc_debug_processing+0x104/0x1a0 ___slab_alloc+0x628/0x648 __slab_alloc.isra.0+0x2c/0x58 kmem_cache_alloc+0x560/0x588 test_version_show+0x98/0xf0 module_attr_show+0x40/0x60 sysfs_kf_seq_show+0x128/0x1c0 kernfs_seq_show+0xa0/0xb8 seq_read+0x1f0/0x7e8 kernfs_fop_read+0x70/0x338 vfs_read+0xe4/0x250 ksys_read+0xc8/0x180 __arm64_sys_read+0x44/0x58 el0_svc_common.constprop.0+0xac/0x228 kmem_cache_destroy test_module_slab: Slab cache still has objects Register a cpu hotplug function to remove all objects in the offline per-cpu quarantine when cpu is going offline. Set a per-cpu variable to indicate this cpu is offline. [qiang.zhang@windriver.com: fix slab double free when cpu-hotplug] Link: https://lkml.kernel.org/r/20201204102206.20237-1-qiang.zhang@windriver.com Link: https://lkml.kernel.org/r/1606895585-17382-2-git-send-email-Kuan-Ying.Lee@mediatek.com Signed-off-by: Kuan-Ying Lee Signed-off-by: Zqiang Suggested-by: Dmitry Vyukov Reported-by: Guangye Yang Reviewed-by: Dmitry Vyukov Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Matthias Brugger Cc: Nicholas Tang Cc: Miles Chen Cc: Qian Cai Cc: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kasan/quarantine.c | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/mm/kasan/quarantine.c b/mm/kasan/quarantine.c index 4c5375810449..0e3f8494628f 100644 --- a/mm/kasan/quarantine.c +++ b/mm/kasan/quarantine.c @@ -29,6 +29,7 @@ #include #include #include +#include #include "../slab.h" #include "kasan.h" @@ -43,6 +44,7 @@ struct qlist_head { struct qlist_node *head; struct qlist_node *tail; size_t bytes; + bool offline; }; #define QLIST_INIT { NULL, NULL, 0 } @@ -188,6 +190,10 @@ void quarantine_put(struct kasan_free_meta *info, struct kmem_cache *cache) local_irq_save(flags); q = this_cpu_ptr(&cpu_quarantine); + if (q->offline) { + local_irq_restore(flags); + return; + } qlist_put(q, &info->quarantine_link, cache->size); if (unlikely(q->bytes > QUARANTINE_PERCPU_SIZE)) { qlist_move_all(q, &temp); @@ -328,3 +334,36 @@ void quarantine_remove_cache(struct kmem_cache *cache) synchronize_srcu(&remove_cache_srcu); } + +static int kasan_cpu_online(unsigned int cpu) +{ + this_cpu_ptr(&cpu_quarantine)->offline = false; + return 0; +} + +static int kasan_cpu_offline(unsigned int cpu) +{ + struct qlist_head *q; + + q = this_cpu_ptr(&cpu_quarantine); + /* Ensure the ordering between the writing to q->offline and + * qlist_free_all. Otherwise, cpu_quarantine may be corrupted + * by interrupt. + */ + WRITE_ONCE(q->offline, true); + barrier(); + qlist_free_all(q, NULL); + return 0; +} + +static int __init kasan_cpu_quarantine_init(void) +{ + int ret = 0; + + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "mm/kasan:online", + kasan_cpu_online, kasan_cpu_offline); + if (ret < 0) + pr_err("kasan cpu quarantine register failed [%d]\n", ret); + return ret; +} +late_initcall(kasan_cpu_quarantine_init); From ba9c1201beaa86a773e83be5654602a0667e4a4d Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Fri, 11 Dec 2020 13:36:53 -0800 Subject: [PATCH 295/296] mm/hugetlb: clear compound_nr before freeing gigantic pages Commit 1378a5ee451a ("mm: store compound_nr as well as compound_order") added compound_nr counter to first tail struct page, overlaying with page->mapping. The overlay itself is fine, but while freeing gigantic hugepages via free_contig_range(), a "bad page" check will trigger for non-NULL page->mapping on the first tail page: BUG: Bad page state in process bash pfn:380001 page:00000000c35f0856 refcount:0 mapcount:0 mapping:00000000126b68aa index:0x0 pfn:0x380001 aops:0x0 flags: 0x3ffff00000000000() raw: 3ffff00000000000 0000000000000100 0000000000000122 0000000100000000 raw: 0000000000000000 0000000000000000 ffffffff00000000 0000000000000000 page dumped because: non-NULL mapping Modules linked in: CPU: 6 PID: 616 Comm: bash Not tainted 5.10.0-rc7-next-20201208 #1 Hardware name: IBM 3906 M03 703 (LPAR) Call Trace: show_stack+0x6e/0xe8 dump_stack+0x90/0xc8 bad_page+0xd6/0x130 free_pcppages_bulk+0x26a/0x800 free_unref_page+0x6e/0x90 free_contig_range+0x94/0xe8 update_and_free_page+0x1c4/0x2c8 free_pool_huge_page+0x11e/0x138 set_max_huge_pages+0x228/0x300 nr_hugepages_store_common+0xb8/0x130 kernfs_fop_write+0xd2/0x218 vfs_write+0xb0/0x2b8 ksys_write+0xac/0xe0 system_call+0xe6/0x288 Disabling lock debugging due to kernel taint This is because only the compound_order is cleared in destroy_compound_gigantic_page(), and compound_nr is set to 1U << order == 1 for order 0 in set_compound_order(page, 0). Fix this by explicitly clearing compound_nr for first tail page after calling set_compound_order(page, 0). Link: https://lkml.kernel.org/r/20201208182813.66391-2-gerald.schaefer@linux.ibm.com Fixes: 1378a5ee451a ("mm: store compound_nr as well as compound_order") Signed-off-by: Gerald Schaefer Reviewed-by: Matthew Wilcox (Oracle) Cc: Heiko Carstens Cc: Mike Kravetz Cc: Christian Borntraeger Cc: [5.9+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 37f15c3c24dc..d029d938d26d 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1216,6 +1216,7 @@ static void destroy_compound_gigantic_page(struct page *page, } set_compound_order(page, 0); + page[1].compound_nr = 0; __ClearPageHead(page); } From b7906b70a2337e445b8dca3ce7ba8976b6ebd07d Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 11 Dec 2020 22:36:25 +0100 Subject: [PATCH 296/296] bpf: Fix enum names for bpf_this_cpu_ptr() and bpf_per_cpu_ptr() helpers Remove bpf_ prefix, which causes these helpers to be reported in verifier dump as bpf_bpf_this_cpu_ptr() and bpf_bpf_per_cpu_ptr(), respectively. Lets fix it as long as it is still possible before UAPI freezes on these helpers. Fixes: eaa6bcb71ef6 ("bpf: Introduce bpf_per_cpu_ptr()") Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Signed-off-by: Linus Torvalds --- include/uapi/linux/bpf.h | 4 ++-- kernel/bpf/helpers.c | 4 ++-- kernel/trace/bpf_trace.c | 4 ++-- tools/include/uapi/linux/bpf.h | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e6ceac3f7d62..556216dc9703 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3897,8 +3897,8 @@ union bpf_attr { FN(seq_printf_btf), \ FN(skb_cgroup_classid), \ FN(redirect_neigh), \ - FN(bpf_per_cpu_ptr), \ - FN(bpf_this_cpu_ptr), \ + FN(per_cpu_ptr), \ + FN(this_cpu_ptr), \ FN(redirect_peer), \ /* */ diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 25520f5eeaf6..deda1185237b 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -717,9 +717,9 @@ bpf_base_func_proto(enum bpf_func_id func_id) return &bpf_snprintf_btf_proto; case BPF_FUNC_jiffies64: return &bpf_jiffies64_proto; - case BPF_FUNC_bpf_per_cpu_ptr: + case BPF_FUNC_per_cpu_ptr: return &bpf_per_cpu_ptr_proto; - case BPF_FUNC_bpf_this_cpu_ptr: + case BPF_FUNC_this_cpu_ptr: return &bpf_this_cpu_ptr_proto; default: break; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 048c655315f1..a125ea5e04cd 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1337,9 +1337,9 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return prog->aux->sleepable ? &bpf_copy_from_user_proto : NULL; case BPF_FUNC_snprintf_btf: return &bpf_snprintf_btf_proto; - case BPF_FUNC_bpf_per_cpu_ptr: + case BPF_FUNC_per_cpu_ptr: return &bpf_per_cpu_ptr_proto; - case BPF_FUNC_bpf_this_cpu_ptr: + case BPF_FUNC_this_cpu_ptr: return &bpf_this_cpu_ptr_proto; default: return NULL; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index e6ceac3f7d62..556216dc9703 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3897,8 +3897,8 @@ union bpf_attr { FN(seq_printf_btf), \ FN(skb_cgroup_classid), \ FN(redirect_neigh), \ - FN(bpf_per_cpu_ptr), \ - FN(bpf_this_cpu_ptr), \ + FN(per_cpu_ptr), \ + FN(this_cpu_ptr), \ FN(redirect_peer), \ /* */